From e03bf3f31403df581ce4ff55a340b7e2e2e76aa7 Mon Sep 17 00:00:00 2001 From: agazzarini Date: Thu, 28 Aug 2014 19:37:22 +0200 Subject: [PATCH] [ issue #14 ] SPARQL Integration suite works on SOLR too! --- .../jena-nosql-binding-solr/pom.xml | 1 - .../gazzax/labs/jena/nosql/solr/Field.java | 1 + .../labs/jena/nosql/solr/NoOpDictionary.java | 137 ++++++++++++++++++ .../nosql/solr/SolrStorageLayerFactory.java | 3 +- .../solr/dao/SolrDeepPagingIterator.java | 136 +++++++++++++++++ .../nosql/solr/dao/SolrTripleIndexDAO.java | 114 +++++---------- .../main/resources/jena-nosql-default.yaml | 8 +- .../solr-home/plain-store/conf/solrconfig.xml | 2 + .../fwk/factory/StorageLayerFactory.java | 1 + .../jena/nosql/fwk/log/MessageCatalog.java | 1 + jena-nosql-integration-tests/pom.xml | 5 + .../src/test/resources/log4j.xml | 4 +- .../eclipse/install-with-cassandra-2x.launch | 2 +- .../dev/eclipse/install-with-solr-4x.launch | 2 +- 14 files changed, 330 insertions(+), 87 deletions(-) create mode 100644 jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/NoOpDictionary.java create mode 100644 jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrDeepPagingIterator.java diff --git a/jena-nosql-binding/jena-nosql-binding-solr/pom.xml b/jena-nosql-binding/jena-nosql-binding-solr/pom.xml index 7b64c76..facb400 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/pom.xml +++ b/jena-nosql-binding/jena-nosql-binding-solr/pom.xml @@ -15,7 +15,6 @@ org.apache.solr solr-solrj ${solr.version} - provided \ No newline at end of file diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/Field.java b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/Field.java index f15eb35..4903c63 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/Field.java +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/Field.java @@ -1,6 +1,7 @@ package org.gazzax.labs.jena.nosql.solr; public interface Field { + String ID = "id"; String S = "s"; String P = "p"; String O = "o"; diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/NoOpDictionary.java b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/NoOpDictionary.java new file mode 100644 index 0000000..e932d83 --- /dev/null +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/NoOpDictionary.java @@ -0,0 +1,137 @@ +package org.gazzax.labs.jena.nosql.solr; + +import java.util.Iterator; + +import org.gazzax.labs.jena.nosql.fwk.StorageLayerException; +import org.gazzax.labs.jena.nosql.fwk.dictionary.TopLevelDictionary; +import org.gazzax.labs.jena.nosql.fwk.factory.StorageLayerFactory; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A NullObject dictionary that, as the name suggests, does nothing. + * + * @author Andrea Gazzarini + * @since 1.0 + */ +class NoOpDictionary implements TopLevelDictionary { + + @Override + public void close() { + // Nothing to be done here... + } + + @Override + public byte[] getID(Node node, boolean p) throws StorageLayerException { + // Nothing to be done here... + return null; + } + + @Override + public Node getValue(byte[] id, boolean p) throws StorageLayerException { + // Nothing to be done here... + return null; + } + + @Override + public void removeValue(Node value, boolean p) throws StorageLayerException { + // Nothing to be done here... + } + + @Override + public String getName() { + // Nothing to be done here... + return null; + } + + @Override + public void initialise(final StorageLayerFactory factory) { + // Nothing to be done here... + } + + @Override + public byte[][] asIdentifiers(final Node s, final Node p, final Node o) { + // Nothing to be done here... + return null; + } + + @Override + public byte[][] asIdentifiers(final Node s, final Node p, final Node o, final Node c) { + // Nothing to be done here... + return null; + } + + @Override + public Triple asTriple(final byte[] s, final byte[] p, final byte[] o) { + // Nothing to be done here... + return null; + } + + @Override + public Quad asQuad(final byte[] s, final byte[] p, final byte[] o, final byte[] c) { + // Nothing to be done here... + return null; + } + + @Override + public boolean isBNode(final byte[] id) { + // Nothing to be done here... + return false; + } + + @Override + public boolean isLiteral(final byte[] id) { + // Nothing to be done here... + return false; + } + + @Override + public boolean isResource(final byte[] id) { + // Nothing to be done here... + return false; + } + + @Override + public Iterator asTripleIdentifiersIterator(final Iterator triples) { + // Nothing to be done here... + return null; + } + + @Override + public Iterator asQuadIdentifiersIterator(final Iterator quads) { + // Nothing to be done here... + return null; + } + + @Override + public Iterator asTripleIterator(final Iterator identifiers) { + // Nothing to be done here... + return null; + } + + @Override + public Iterator asQuadIterator(final Iterator quads) { + // Nothing to be done here... + return null; + } + + @Override + public byte[] compose(final byte[] id1, final byte[] id2) { + // Nothing to be done here... + return null; + } + + @Override + public byte[] compose(final byte[] id1, final byte[] id2, final byte[] id3) { + // Nothing to be done here... + return null; + } + + @Override + public byte[][] decompose(final byte[] compositeId) { + // Nothing to be done here... + return null; + } +} diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/SolrStorageLayerFactory.java b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/SolrStorageLayerFactory.java index 536e2c6..e0765a4 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/SolrStorageLayerFactory.java +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/SolrStorageLayerFactory.java @@ -25,6 +25,7 @@ * @since 1.0 */ public class SolrStorageLayerFactory extends StorageLayerFactory { + private final TopLevelDictionary dictionary = new NoOpDictionary(); private SolrServer solr; @Override @@ -65,7 +66,7 @@ public TripleIndexDAO getTripleIndexDAO() { @Override public TopLevelDictionary getDictionary() { - throw new UnsupportedOperationException(); + return dictionary; } @Override diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrDeepPagingIterator.java b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrDeepPagingIterator.java new file mode 100644 index 0000000..85a7599 --- /dev/null +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrDeepPagingIterator.java @@ -0,0 +1,136 @@ +package org.gazzax.labs.jena.nosql.solr.dao; + +import java.util.Iterator; + +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.CursorMarkParams; +import org.gazzax.labs.jena.nosql.fwk.util.NTriples; +import org.gazzax.labs.jena.nosql.solr.Field; + +import com.google.common.collect.UnmodifiableIterator; +import com.hp.hpl.jena.graph.Triple; + +/** + * An iterator over SOLR results that uses the built-in Deep Paging strategy. + * Internally it uses other iterators to represents each iteration state. + * + * @see http://solr.pl/en/2014/03/10/solr-4-7-efficient-deep-paging + * @see http://heliosearch.org/solr/paging-and-deep-paging + * @see http://en.wikipedia.org/wiki/Finite-state_machine + * @author Andrea Gazzarini + * @since 1.0 + */ +public class SolrDeepPagingIterator extends UnmodifiableIterator { + + private final SolrServer solr; + private final SolrQuery query; + private SolrDocumentList page; + + private String nextCursorMark; + private String sentCursorMark; + + /** + * Iteration state: we need to (re)execute a query. + * This could be needed the very first time we start iteration and each time the current result + * page has been consumed. + */ + private final Iterator executeQuery = new UnmodifiableIterator() { + @Override + public boolean hasNext() { + try { + final QueryResponse response = solr.query(query); + nextCursorMark = response.getNextCursorMark(); + page = response.getResults(); + return page.getNumFound() > 0; + } catch (final Exception exception) { + throw new RuntimeException(exception); + } + } + + @Override + public Triple next() { + currentState = iterateOverCurrentPage; + return currentState.next(); + } + }; + + /** + * Iteration state: query has been executed and now it's time to iterate over results. + */ + private final Iterator iterateOverCurrentPage = new UnmodifiableIterator() { + Iterator iterator; + + @Override + public boolean hasNext() { + if (iterator().hasNext()) { + return true; + } else { + currentState = checkForIterationCompleteness; + return currentState.hasNext(); + } + } + + @Override + public Triple next() { + final SolrDocument document = iterator().next(); + return Triple.create( + NTriples.asURIorBlankNode((String) document.getFieldValue(Field.S)), + NTriples.asURI((String) document.getFieldValue(Field.P)), + NTriples.asNode((String) document.getFieldValue(Field.O))); + } + + Iterator iterator() { + if (iterator == null) { + iterator = page.iterator(); + } + return iterator; + + } + }; + + /** + * Iteration state: once a page has been consumed we need to determine if another query should be issued or not. + */ + private final Iterator checkForIterationCompleteness = new UnmodifiableIterator() { + @Override + public boolean hasNext() { + return !(page.size() < query.getRows() || sentCursorMark.equals(nextCursorMark)); + } + + @Override + public Triple next() { + query.set(CursorMarkParams.CURSOR_MARK_PARAM, nextCursorMark); + currentState = executeQuery; + return currentState.next(); + } + }; + + private Iterator currentState = executeQuery; + + /** + * Builds a new iterator with the given data. + * + * @param solr the SOLR facade. + * @param query the query that will be submitted. + */ + public SolrDeepPagingIterator(final SolrServer solr, final SolrQuery query) { + this.solr = solr; + this.query = query; + this.sentCursorMark = CursorMarkParams.CURSOR_MARK_START; + this.query.set(CursorMarkParams.CURSOR_MARK_PARAM, CursorMarkParams.CURSOR_MARK_START); + } + + @Override + public boolean hasNext() { + return currentState.hasNext(); + } + + @Override + public Triple next() { + return currentState.next(); + } +} \ No newline at end of file diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrTripleIndexDAO.java b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrTripleIndexDAO.java index 71bddb0..2fe33b6 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrTripleIndexDAO.java +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/java/org/gazzax/labs/jena/nosql/solr/dao/SolrTripleIndexDAO.java @@ -8,26 +8,24 @@ import java.util.List; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.SolrServer; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.UpdateResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrInputDocument; import org.gazzax.labs.jena.nosql.fwk.StorageLayerException; import org.gazzax.labs.jena.nosql.fwk.ds.TripleIndexDAO; -import org.gazzax.labs.jena.nosql.fwk.util.NTriples; +import org.gazzax.labs.jena.nosql.fwk.log.Log; +import org.gazzax.labs.jena.nosql.fwk.log.MessageCatalog; import org.gazzax.labs.jena.nosql.solr.Field; +import org.slf4j.LoggerFactory; -import com.google.common.collect.AbstractIterator; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.graph.TripleMatch; public class SolrTripleIndexDAO implements TripleIndexDAO { - - private final static Iterator EMPTY_TRIPLES_ITERATOR = new ArrayList(0).iterator(); + protected final Log logger = new Log(LoggerFactory.getLogger(SolrTripleIndexDAO.class)); private final SolrServer solr; @@ -45,7 +43,7 @@ public void insertTriple(final Triple triple) throws StorageLayerException { final SolrInputDocument document = new SolrInputDocument(); document.setField(Field.S, asNt(triple.getSubject())); document.setField(Field.P, asNtURI(triple.getPredicate())); - document.setField(Field.O, asNt(triple.getSubject())); + document.setField(Field.O, asNt(triple.getObject())); try { solr.add(document); @@ -70,10 +68,11 @@ public void deleteTriple(final Triple triple) throws StorageLayerException { * @return a delete query starting from a given triple. */ private String deleteQuery(final Triple triple) { + return new StringBuilder() - .append(Field.S).append(":\"").append(asNt(triple.getSubject())).append("\" AND ") - .append(Field.P).append(":\"").append(asNt(triple.getPredicate())).append("\" AND ") - .append(Field.O).append(":\"").append(asNt(triple.getObject())).append("\"") + .append(Field.S).append(":\"").append(ClientUtils.escapeQueryChars(asNt(triple.getSubject()))).append("\" AND ") + .append(Field.P).append(":\"").append(ClientUtils.escapeQueryChars(asNt(triple.getPredicate()))).append("\" AND ") + .append(Field.O).append(":\"").append(ClientUtils.escapeQueryChars(asNt(triple.getObject()))).append("\"") .toString(); } @@ -111,89 +110,50 @@ public void executePendingMutations() throws StorageLayerException { @Override public void clear() { try { - solr.deleteByQuery("*;*"); + solr.deleteByQuery("*:*"); } catch (final Exception exception) { - // TODO: log - exception.printStackTrace(); + logger.error(MessageCatalog._00170_UNABLE_TO_CLEAR, exception); } } @Override public Iterator query(final TripleMatch query) throws StorageLayerException { - final SolrQuery q = new SolrQuery("*:*"); - q.setStart(0); + final SolrQuery q = new SolrQuery(); + q.addSort(Field.ID, ORDER.asc); + q.setRows(10); final Node s = query.getMatchSubject(); final Node p = query.getMatchPredicate(); final Node o = query.getMatchObject(); if (s != null) { - q.addFilterQuery( - new StringBuilder() - .append(Field.S) - .append(":\"") - .append(asNt(s)) - .append("\"") - .toString()); + q.addFilterQuery(newFilterQuery(Field.S, ClientUtils.escapeQueryChars(asNt(s)))); } if (p != null) { - q.addFilterQuery( - new StringBuilder() - .append(Field.P) - .append(":\"") - .append(asNtURI(p)) - .append("\"") - .toString()); + q.addFilterQuery(newFilterQuery(Field.P, ClientUtils.escapeQueryChars(asNtURI(p)))); } if (o != null) { - q.addFilterQuery( - new StringBuilder() - .append(Field.O) - .append(":\"") - .append(asNt(o)) - .append("\"") - .toString()); + q.addFilterQuery(newFilterQuery(Field.O, ClientUtils.escapeQueryChars(asNt(o)))); } - try { - final QueryResponse response = solr.query(q); - - if (response.getResults().getNumFound() == 0) { - return EMPTY_TRIPLES_ITERATOR; - } - - return new AbstractIterator() { - - int rowId; - SolrDocumentList page = response.getResults(); - - @Override - protected Triple computeNext() { - - if (page.getStart() + page.size() == page.getNumFound()) { - return endOfData(); - } - - if (rowId == page.size() - 1) { - rowId = 0; - q.setStart(q.getStart() + page.size()); - try { - page = solr.query(q).getResults(); - } catch (final SolrServerException exception) { - throw new RuntimeException(exception); - } - } - - final SolrDocument document = page.get(rowId); - return Triple.create( - NTriples.asURIorBlankNode((String) document.getFieldValue(Field.S)), - NTriples.asURI((String) document.getFieldValue(Field.P)), - NTriples.asNode((String) document.getFieldValue(Field.P))); - } - }; - } catch (final Exception exception) { - throw new StorageLayerException(exception); - } + return new SolrDeepPagingIterator(solr, q); } + + /** + * Builds a filter query with the given data. + * + * @param fieldName the field name. + * @param value the field value. + * @return a filter query with the given data. + */ + String newFilterQuery(final String fieldName, final String value) { + return new StringBuilder() + .append(fieldName) + .append(":\"") + .append(ClientUtils.escapeQueryChars(value)) + .append("\"") + .toString(); + } + } \ No newline at end of file diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/main/resources/jena-nosql-default.yaml b/jena-nosql-binding/jena-nosql-binding-solr/src/main/resources/jena-nosql-default.yaml index 3eadad5..2daff6d 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/src/main/resources/jena-nosql-default.yaml +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/main/resources/jena-nosql-default.yaml @@ -1,8 +1,8 @@ --- -########################################## -## JENA-NOSQL default configuration ## -########################################## +########################################### +## JENA-NOSQL default configuration ## +########################################### # SorlServer implementation solr-server-class: "org.apache.solr.client.solrj.impl.HttpSolrServer" -solr-address: "http://127.0.0.1:8080/solr/store" \ No newline at end of file +solr-address: "http://127.0.0.1:8080/solr/store" diff --git a/jena-nosql-binding/jena-nosql-binding-solr/src/solr-home/plain-store/conf/solrconfig.xml b/jena-nosql-binding/jena-nosql-binding-solr/src/solr-home/plain-store/conf/solrconfig.xml index 7bc36e2..82893e2 100644 --- a/jena-nosql-binding/jena-nosql-binding-solr/src/solr-home/plain-store/conf/solrconfig.xml +++ b/jena-nosql-binding/jena-nosql-binding-solr/src/solr-home/plain-store/conf/solrconfig.xml @@ -50,6 +50,8 @@ explicit 10 + *:* + false diff --git a/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/factory/StorageLayerFactory.java b/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/factory/StorageLayerFactory.java index dd9231c..4f045e9 100644 --- a/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/factory/StorageLayerFactory.java +++ b/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/factory/StorageLayerFactory.java @@ -65,6 +65,7 @@ public abstract MapDAO getMapDAO( * * @return the Data Access Object for interacting with the triple index. */ + @SuppressWarnings("rawtypes") public abstract TripleIndexDAO getTripleIndexDAO(); /** diff --git a/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/log/MessageCatalog.java b/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/log/MessageCatalog.java index c964956..639b3d0 100644 --- a/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/log/MessageCatalog.java +++ b/jena-nosql-framework/src/main/java/org/gazzax/labs/jena/nosql/fwk/log/MessageCatalog.java @@ -39,4 +39,5 @@ public interface MessageCatalog { String _00166_MBEAN_ALREADY_REGISTERED = PREFIX + "-00166> : A Management Interface with ID #%s already exists on Management Server."; String _00167_MBEAN_UNREGISTERED = PREFIX + "-00167> : Management Interface with ID #%s has been unregistered from Management Server."; String _00168_UNABLE_TO_UNREGISTER_MBEAN = PREFIX + "-00168> : Unable to unregister the management interface with name #%s."; + String _00170_UNABLE_TO_CLEAR = PREFIX + "-00101> : Unable to run a clear command against a graph. See below for further details."; } \ No newline at end of file diff --git a/jena-nosql-integration-tests/pom.xml b/jena-nosql-integration-tests/pom.xml index f1afb65..4414640 100644 --- a/jena-nosql-integration-tests/pom.xml +++ b/jena-nosql-integration-tests/pom.xml @@ -54,6 +54,11 @@ commons-logging 1.2 + + org.gazzax.labs + jena-nosql-binding-solr + ${project.version} + diff --git a/jena-nosql-integration-tests/src/test/resources/log4j.xml b/jena-nosql-integration-tests/src/test/resources/log4j.xml index c652d32..8f64c20 100644 --- a/jena-nosql-integration-tests/src/test/resources/log4j.xml +++ b/jena-nosql-integration-tests/src/test/resources/log4j.xml @@ -9,8 +9,8 @@ - - + + diff --git a/src/site/dev/eclipse/install-with-cassandra-2x.launch b/src/site/dev/eclipse/install-with-cassandra-2x.launch index 2753508..0dea75f 100644 --- a/src/site/dev/eclipse/install-with-cassandra-2x.launch +++ b/src/site/dev/eclipse/install-with-cassandra-2x.launch @@ -1,7 +1,7 @@ - + diff --git a/src/site/dev/eclipse/install-with-solr-4x.launch b/src/site/dev/eclipse/install-with-solr-4x.launch index 618d610..5d3ca78 100644 --- a/src/site/dev/eclipse/install-with-solr-4x.launch +++ b/src/site/dev/eclipse/install-with-solr-4x.launch @@ -1,7 +1,7 @@ - +