diff --git a/inception/inception-kb/pom.xml b/inception/inception-kb/pom.xml index 20ad5bfd749..3f85079dc65 100644 --- a/inception/inception-kb/pom.xml +++ b/inception/inception-kb/pom.xml @@ -263,13 +263,17 @@ jackson-annotations + + org.apache.httpcomponents + httpclient + org.apache.httpcomponents httpcore org.apache.httpcomponents - httpclient + httpmime diff --git a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/IriConstants.java b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/IriConstants.java index d18670a6f41..09beb355b79 100644 --- a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/IriConstants.java +++ b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/IriConstants.java @@ -48,6 +48,7 @@ public class IriConstants public static final String PREFIX_MWAPI = "https://www.mediawiki.org/ontology#API/"; public static final String PREFIX_STARDOG = "tag:stardog:api:search:"; public static final String PREFIX_BLAZEGRAPH = "http://www.bigdata.com/rdf/search#"; + public static final String PREFIX_GRAPHDB = "http://www.ontotext.com/"; public static final String UKP_WIKIDATA_SPARQL_ENDPOINT = "http://knowledgebase.ukp.informatik.tu-darmstadt.de:8890/sparql"; public static final Set IMPLICIT_NAMESPACES = Set.of(RDF.NAMESPACE, RDFS.NAMESPACE, @@ -90,6 +91,7 @@ public class IriConstants public static final IRI FTS_WIKIDATA; public static final IRI FTS_STARDOG; public static final IRI FTS_BLAZEGRAPH; + public static final IRI FTS_GRAPHDB; public static final IRI FTS_NONE; public static final List CLASS_IRIS; @@ -121,6 +123,7 @@ public class IriConstants FTS_WIKIDATA = vf.createIRI(PREFIX_MWAPI, "search"); FTS_STARDOG = vf.createIRI(PREFIX_STARDOG, "textMatch"); FTS_BLAZEGRAPH = vf.createIRI(PREFIX_BLAZEGRAPH, "search"); + FTS_GRAPHDB = vf.createIRI(PREFIX_GRAPHDB, "fts"); FTS_NONE = vf.createIRI("FTS:NONE"); CLASS_IRIS = asList(RDFS.CLASS, OWL.CLASS, WIKIDATA_CLASS, SKOS.CONCEPT); @@ -147,6 +150,10 @@ public static String getFtsBackendName(String aFTS) return "Blazegraph DB"; } + if (FTS_GRAPHDB.stringValue().equals(aFTS)) { + return "GraphDB"; + } + if (FTS_VIRTUOSO.stringValue().equals(aFTS)) { return "Virtuoso"; } diff --git a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/FtsAdapterGraphDb.java b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/FtsAdapterGraphDb.java new file mode 100644 index 00000000000..a36471f0e10 --- /dev/null +++ b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/FtsAdapterGraphDb.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.kb.querybuilder; + +import static de.tudarmstadt.ukp.inception.kb.IriConstants.PREFIX_GRAPHDB; +import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.convertToRequiredTokenPrefixMatchingQuery; +import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.Priority.PRIMARY; +import static org.apache.commons.lang3.StringUtils.isBlank; +import static org.eclipse.rdf4j.sparqlbuilder.constraint.Expressions.and; +import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix; +import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.and; +import static org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns.union; +import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri; + +import java.util.ArrayList; + +import org.eclipse.rdf4j.sparqlbuilder.constraint.Expression; +import org.eclipse.rdf4j.sparqlbuilder.core.Prefix; +import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern; + +public class FtsAdapterGraphDb + implements FtsAdapter +{ + private static final String MULTI_CHAR_WILDCARD = "*"; + + private static final Prefix PREFIX_GRAPHDB_SEARCH = prefix("onto", iri(PREFIX_GRAPHDB)); + + private final SPARQLQueryBuilder builder; + + public FtsAdapterGraphDb(SPARQLQueryBuilder aBuilder) + { + builder = aBuilder; + builder.addPrefix(PREFIX_GRAPHDB_SEARCH); + } + + @Override + public void withLabelMatchingExactlyAnyOf(String... aValues) + { + var kb = builder.getKnowledgeBase(); + + var valuePatterns = new ArrayList(); + for (var value : aValues) { + var sanitizedValue = builder.sanitizeQueryString_FTS(value); + + if (isBlank(sanitizedValue)) { + continue; + } + + builder.addProjection(SPARQLQueryBuilder.VAR_SCORE); + + valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, + SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM, + SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) // + .withLimit(builder.getLimit()) // + .filter(builder.equalsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, value, + kb))); + } + + if (valuePatterns.isEmpty()) { + builder.noResult(); + } + + builder.addPattern(PRIMARY, and( // + builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), // + union(valuePatterns.toArray(GraphPattern[]::new)))); + } + + @Override + public void withLabelContainingAnyOf(String... aValues) + { + var valuePatterns = new ArrayList(); + for (var value : aValues) { + var sanitizedValue = builder.sanitizeQueryString_FTS(value); + + if (isBlank(sanitizedValue)) { + continue; + } + + builder.addProjection(SPARQLQueryBuilder.VAR_SCORE); + + valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, + SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM, + SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, sanitizedValue) // + .withLimit(builder.getLimit()) // + .filter(builder.containsPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, + value))); + } + + if (valuePatterns.isEmpty()) { + builder.noResult(); + } + + builder.addPattern(PRIMARY, + and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), + union(valuePatterns.toArray(GraphPattern[]::new)))); + } + + @Override + public void withLabelStartingWith(String aPrefixQuery) + { + // Strip single quotes and asterisks because they have special semantics + var queryString = builder.sanitizeQueryString_FTS(aPrefixQuery); + + if (isBlank(queryString)) { + builder.noResult(); + } + + // If the query string entered by the user does not end with a space character, then + // we assume that the user may not yet have finished writing the word and add a + // wildcard + if (!aPrefixQuery.endsWith(" ")) { + queryString += MULTI_CHAR_WILDCARD; + } + + builder.addProjection(SPARQLQueryBuilder.VAR_SCORE); + + // Locate all entries where the label contains the prefix (using the FTS) and then + // filter them by those which actually start with the prefix. + builder.addPattern(PRIMARY, and( // + builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), // + new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, SPARQLQueryBuilder.VAR_SCORE, + SPARQLQueryBuilder.VAR_MATCH_TERM, + SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, queryString) // + .withLimit(builder.getLimit()) // + .filter(builder.startsWithPattern(SPARQLQueryBuilder.VAR_MATCH_TERM, + aPrefixQuery)))); + } + + @Override + public void withLabelMatchingAnyOf(String... aValues) + { + var valuePatterns = new ArrayList(); + for (var value : aValues) { + var sanitizedValue = builder.sanitizeQueryString_FTS(value); + + if (isBlank(sanitizedValue)) { + continue; + } + + var fuzzyQuery = convertToRequiredTokenPrefixMatchingQuery(sanitizedValue, "", + MULTI_CHAR_WILDCARD); + + if (isBlank(fuzzyQuery)) { + continue; + } + + builder.addProjection(SPARQLQueryBuilder.VAR_SCORE); + + var labelFilterExpressions = new ArrayList>(); + labelFilterExpressions.add(builder.matchKbLanguage(VAR_MATCH_TERM)); + + valuePatterns.add(new GraphDbFtsQuery(SPARQLQueryBuilder.VAR_SUBJECT, + SPARQLQueryBuilder.VAR_SCORE, SPARQLQueryBuilder.VAR_MATCH_TERM, + SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY, fuzzyQuery) // + .withLimit(builder.getLimit()) // + .filter(and(labelFilterExpressions.toArray(Expression[]::new)))); + } + + if (valuePatterns.isEmpty()) { + builder.noResult(); + } + + builder.addPattern(PRIMARY, + and(builder.bindMatchTermProperties(SPARQLQueryBuilder.VAR_MATCH_TERM_PROPERTY), + union(valuePatterns.toArray(GraphPattern[]::new)))); + } +} diff --git a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbFtsQuery.java b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbFtsQuery.java new file mode 100644 index 00000000000..6e2e734e547 --- /dev/null +++ b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbFtsQuery.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.kb.querybuilder; + +import static de.tudarmstadt.ukp.inception.kb.querybuilder.RdfCollection.collectionOf; +import static org.eclipse.rdf4j.sparqlbuilder.core.SparqlBuilder.prefix; +import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.iri; +import static org.eclipse.rdf4j.sparqlbuilder.rdf.Rdf.literalOf; + +import java.util.ArrayList; + +import org.eclipse.rdf4j.sparqlbuilder.core.Prefix; +import org.eclipse.rdf4j.sparqlbuilder.core.QueryElement; +import org.eclipse.rdf4j.sparqlbuilder.core.Variable; +import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPattern; +import org.eclipse.rdf4j.sparqlbuilder.graphpattern.GraphPatterns; +import org.eclipse.rdf4j.sparqlbuilder.rdf.Iri; + +import de.tudarmstadt.ukp.inception.kb.IriConstants; + +public class GraphDbFtsQuery + implements GraphPattern +{ + public static final Prefix PREFIX_GRAPHDB_FTS = prefix("onto", + iri(IriConstants.PREFIX_GRAPHDB)); + public static final Iri GRAPHDB_FTS = PREFIX_GRAPHDB_FTS.iri("fts"); + + private final Variable subject; + private final Variable score; + private final Variable matchTerm; + private final Variable matchTermProperty; + private final String query; + private int limit = 0; + + public GraphDbFtsQuery(Variable aSubject, Variable aScore, Variable aMatchTerm, + Variable aMatchTermProperty, String aQuery) + { + subject = aSubject; + score = aScore; + matchTerm = aMatchTerm; + matchTermProperty = aMatchTermProperty; + query = aQuery; + } + + public GraphDbFtsQuery withLimit(int aLimit) + { + limit = aLimit; + return this; + } + + @Override + public String getQueryString() + { + var queryElements = new ArrayList(); + queryElements.add(literalOf(query)); + if (limit > 0) { + queryElements.add(literalOf(2 * limit)); + } + + return GraphPatterns.and( // + matchTerm.has(GRAPHDB_FTS, collectionOf(queryElements)), // + subject.has(matchTermProperty, matchTerm)) // + .getQueryString(); + } + + @Override + public boolean isEmpty() + { + return false; + } +} diff --git a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java index 8450dc32f05..190fc710886 100644 --- a/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java +++ b/inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilder.java @@ -20,6 +20,7 @@ import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_ALLEGRO_GRAPH; import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_BLAZEGRAPH; import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_FUSEKI; +import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_GRAPHDB; import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_NONE; import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_RDF4J_LUCENE; import static de.tudarmstadt.ukp.inception.kb.IriConstants.FTS_STARDOG; @@ -799,6 +800,10 @@ private FtsAdapter getAdapter() return new FtsAdapterBlazegraph(this); } + if (FTS_GRAPHDB.equals(ftsMode)) { + return new FtsAdapterGraphDb(this); + } + if (FTS_FUSEKI.equals(ftsMode)) { return new FtsAdapterFuseki(this); } @@ -995,6 +1000,9 @@ private Expression matchString(SparqlFunction aFunction, Variable aVariable, value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) // .map(t -> "(?=.*" + asRegexp(t) + ")") // .collect(joining()); + // value = Stream.of(TOKENKIZER_PATTERN.split(aValue)) // + // .map(t -> asRegexp(t)) // + // .collect(joining("|")); break; default: throw new IllegalArgumentException( diff --git a/inception/inception-kb/src/test/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbRepositoryTest.java b/inception/inception-kb/src/test/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbRepositoryTest.java new file mode 100644 index 00000000000..2e6400f3291 --- /dev/null +++ b/inception/inception-kb/src/test/java/de/tudarmstadt/ukp/inception/kb/querybuilder/GraphDbRepositoryTest.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Technische Universität Darmstadt under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The Technische Universität Darmstadt + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.tudarmstadt.ukp.inception.kb.querybuilder; + +import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilderLocalTestScenarios.buildSparqlRepository; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.time.Duration.ofMinutes; +import static java.util.Arrays.asList; +import static org.apache.http.entity.ContentType.APPLICATION_OCTET_STREAM; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.http.client.methods.RequestBuilder; +import org.apache.http.entity.mime.HttpMultipartMode; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.wicket.util.io.IOUtils; +import org.eclipse.rdf4j.repository.Repository; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import de.tudarmstadt.ukp.inception.kb.IriConstants; +import de.tudarmstadt.ukp.inception.kb.RepositoryType; +import de.tudarmstadt.ukp.inception.kb.model.KnowledgeBase; +import de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilderLocalTestScenarios.Scenario; + +@Testcontainers(disabledWithoutDocker = true) +public class GraphDbRepositoryTest +{ + private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final String GRAPHDB_IMAGE = "ontotext/graphdb:10.7.0"; + private static final int GRAPHDB_PORT = 7200; + + @Container + private static final GenericContainer GRAPH_DB = new GenericContainer<>(GRAPHDB_IMAGE) // + .withExposedPorts(GRAPHDB_PORT) // + .withLogConsumer(new Slf4jLogConsumer(LOG)) // + .waitingFor(Wait.forHttp("/rest/repositories").forPort(GRAPHDB_PORT) + .withStartupTimeout(ofMinutes(2))); + + private static boolean repoCreated = false; + + private Repository repository; + private KnowledgeBase kb; + + @BeforeEach + public void setUp() throws Exception + { + assertThat(GRAPH_DB.isRunning()).isTrue(); + + var repositoryId = "test"; + + var baseUrl = "http://" + GRAPH_DB.getHost() + ":" + GRAPH_DB.getMappedPort(GRAPHDB_PORT); + + if (!repoCreated) { + createRepository(baseUrl, "test"); + repoCreated = true; + } + + kb = new KnowledgeBase(); + kb.setDefaultLanguage("en"); + kb.setType(RepositoryType.REMOTE); + // kb.setFullTextSearchIri(IriConstants.FTS_GRAPHDB.stringValue()); + kb.setFullTextSearchIri(IriConstants.FTS_NONE.stringValue()); + kb.setMaxResults(100); + + SPARQLQueryBuilderLocalTestScenarios.initRdfsMapping(kb); + + repository = buildSparqlRepository( + "http://" + GRAPH_DB.getHost() + ":" + GRAPH_DB.getMappedPort(GRAPHDB_PORT) + + "/repositories/" + repositoryId, + "http://" + GRAPH_DB.getHost() + ":" + GRAPH_DB.getMappedPort(GRAPHDB_PORT) + + "/repositories/" + repositoryId + "/statements"); + + try (var conn = repository.getConnection()) { + conn.clear(); + } + } + + private static List tests() throws Exception + { + var exclusions = asList(); + + return SPARQLQueryBuilderLocalTestScenarios.tests().stream() // + .filter(scenario -> !exclusions.contains(scenario.name)) + .map(scenario -> Arguments.of(scenario.name, scenario)) + .collect(Collectors.toList()); + } + + @ParameterizedTest(name = "{index}: test {0}") + @MethodSource("tests") + public void runTests(String aScenarioName, Scenario aScenario) throws Exception + { + aScenario.implementation.accept(repository, kb); + } + + private static void createRepository(String baseUrl, String repositoryId) + throws IOException, URISyntaxException + { + var config = TEMPLATE // + .replace("${REPOSITORY_ID}", repositoryId); + + var entity = MultipartEntityBuilder.create() // + .setMode(HttpMultipartMode.BROWSER_COMPATIBLE) // + .addBinaryBody("config", config.getBytes(UTF_8), APPLICATION_OCTET_STREAM, + "config.ttl") // + .build(); + + var request = RequestBuilder.post(new URI(baseUrl + "/rest/repositories")) // + .setEntity(entity) // + .build(); + + // Send the request and get the response + var client = HttpClientBuilder.create().build(); + var response = client.execute(request); + + // Print the response status and body + var statusCode = response.getStatusLine().getStatusCode(); + if (statusCode < 200 || statusCode >= 300) { + LOG.error("Response status code: {}", statusCode); + LOG.error("Response body: {}", IOUtils.toString(response.getEntity().getContent())); + } + else { + LOG.info("Repository created: [{}]", repositoryId); + } + } + + private static final String TEMPLATE = // + """ + # Example RDF4J configuration template for a GraphDB repository named "wines" + + @prefix rdfs: . + @prefix rep: . + @prefix sail: . + @prefix xsd: . + + <#wines> a rep:Repository; + rep:repositoryID "${REPOSITORY_ID}"; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository"; + [ + "http://example.org/owlim#"; + "false"; + ""; + "true"; + "false"; + "true"; + "true"; + "true"; + "32"; + "10000000"; + ("default" "en" "de" "fr" "iri"); + "none"; + "default"; + ""; + "true"; + "0"; + "0"; + "false"; + "file-repository"; + "rdfsplus-optimized"; + "storage"; + "false"; + sail:sailType "graphdb:Sail" + ] + ]; + rdfs:label "" . + """; + +}