From ef53b5e87c2e1382203def6cdd44dc5e1919fad6 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Wed, 17 Jul 2024 14:59:15 +0200 Subject: [PATCH 1/6] scaffold collection records endpoint --- backend/collect/api.py | 25 +++++++++++++++++++++++-- backend/collect/api_test.py | 10 ++++++++++ backend/collect/graphs.py | 20 ++++++++++++++++++++ backend/collect/urls.py | 7 +++++++ backend/edpop/urls.py | 1 + 5 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 backend/collect/graphs.py create mode 100644 backend/collect/urls.py diff --git a/backend/collect/api.py b/backend/collect/api.py index ed105dd0..71fa0d83 100644 --- a/backend/collect/api.py +++ b/backend/collect/api.py @@ -1,6 +1,8 @@ from rest_framework.viewsets import ModelViewSet +from rest_framework.views import Request from rest_framework.exceptions import NotFound -from rdflib import URIRef, RDF, Graph +from rdf.views import RDFView +from rdflib import URIRef, RDF, Graph, RDFS from django.conf import settings from projects.api import user_projects @@ -9,10 +11,11 @@ from triplestore.constants import EDPOPCOL from collect.serializers import CollectionSerializer from collect.permissions import CollectionPermission +from collect.graphs import as_collection_from_records class CollectionViewSet(ModelViewSet): ''' - Viewset for listing or retrieving collections + Viewset for listing or retrieving collection metadata ''' lookup_value_regex = '.+' @@ -41,3 +44,21 @@ def get_object(self): self.check_object_permissions(self.request, collection) return collection + +class CollectionRecordsView(RDFView): + ''' + View the records inside a collection + ''' + + def get_graph(self, request: Request, collection: str, **kwargs): + collection_uri = URIRef(collection) + + if not collection_exists(collection_uri): + raise NotFound('Collection does not exist') + + collection_obj = EDPOPCollection(collection_graph(collection_uri), collection_uri) + + g = Graph() + g += as_collection_from_records(collection_uri, collection_obj.records) + + return g \ No newline at end of file diff --git a/backend/collect/api_test.py b/backend/collect/api_test.py index afe24524..e90bfd1e 100644 --- a/backend/collect/api_test.py +++ b/backend/collect/api_test.py @@ -127,3 +127,13 @@ def test_project_validation(db, user, client: Client): }, content_type='application/json') assert is_client_error(response.status_code) + +def test_collection_records(db, user, project, client: Client): + client.force_login(user) + create_response = post_collection(client, project.name) + collection_uri = create_response.data['uri'] + + records_url = '/api/collection-records/' + collection_uri + '/' + + response = client.get(records_url) + assert is_success(response.status_code) \ No newline at end of file diff --git a/backend/collect/graphs.py b/backend/collect/graphs.py new file mode 100644 index 00000000..9919ade0 --- /dev/null +++ b/backend/collect/graphs.py @@ -0,0 +1,20 @@ +from typing import List +from rdflib import URIRef, BNode, Graph, RDF, Literal + +from triplestore.constants import AS + +def as_collection_from_records(collection: URIRef, records: List[URIRef]) -> Graph: + ''' + Wrap a list of records in an ActivityStreams Collection + ''' + + g = Graph() + g.add((collection, RDF.type, AS.Collection)) + g.add((collection, AS.totalItems, Literal(len(records)))) + + items_node = BNode() + items = g.collection(items_node) + for record in records: + items.append(record) + g.add((collection, AS.items, items_node)) + return g diff --git a/backend/collect/urls.py b/backend/collect/urls.py new file mode 100644 index 00000000..da188e7c --- /dev/null +++ b/backend/collect/urls.py @@ -0,0 +1,7 @@ +from django.urls import re_path + +from . import api + +urlpatterns = [ + re_path('collection-records/(?P.+)/', api.CollectionRecordsView.as_view()), +] diff --git a/backend/edpop/urls.py b/backend/edpop/urls.py index aad6dabc..ed35b259 100644 --- a/backend/edpop/urls.py +++ b/backend/edpop/urls.py @@ -35,6 +35,7 @@ path('api-auth/', include('rest_framework.urls', namespace='rest_framework')), path('api/', include(api_router.urls)), + path('api/', include('collect.urls')), path('', include('catalogs.urls')), path('', include('accounts.urls')), path('', include('projects.urls')), From 0a3d961241c34b8127786df85d67e243b1be79cc Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Thu, 18 Jul 2024 13:10:32 +0200 Subject: [PATCH 2/6] save collection records as activityStreams collection --- backend/collect/api.py | 1 - backend/collect/graphs.py | 42 ++++++++++++++++++++-------- backend/collect/rdf_models.py | 45 ++++++++++++++++++++++-------- backend/collect/rdf_models_test.py | 21 ++++++++++---- 4 files changed, 78 insertions(+), 31 deletions(-) diff --git a/backend/collect/api.py b/backend/collect/api.py index 71fa0d83..524e19ae 100644 --- a/backend/collect/api.py +++ b/backend/collect/api.py @@ -59,6 +59,5 @@ def get_graph(self, request: Request, collection: str, **kwargs): collection_obj = EDPOPCollection(collection_graph(collection_uri), collection_uri) g = Graph() - g += as_collection_from_records(collection_uri, collection_obj.records) return g \ No newline at end of file diff --git a/backend/collect/graphs.py b/backend/collect/graphs.py index 9919ade0..2f7ca5db 100644 --- a/backend/collect/graphs.py +++ b/backend/collect/graphs.py @@ -1,20 +1,38 @@ from typing import List -from rdflib import URIRef, BNode, Graph, RDF, Literal +from rdflib import Graph, RDF, IdentifiedNode +from rdflib.term import Node -from triplestore.constants import AS +from triplestore.utils import Triples -def as_collection_from_records(collection: URIRef, records: List[URIRef]) -> Graph: +def list_from_graph_collection(graph: Graph, list_node: IdentifiedNode) -> List[Node]: ''' - Wrap a list of records in an ActivityStreams Collection + Extract a list of nodes from an RDF collection in a graph ''' + items = list(graph.objects(list_node, RDF.first)) + rest_nodes = graph.objects(list_node, RDF.rest) + for rest in rest_nodes: + items += list_from_graph_collection(graph, rest) + return items + + +def list_to_graph_collection(items: List[Node], items_node: IdentifiedNode) -> Graph: + ''' + Return a list of items as an RDF collection + ''' + g = Graph() - g.add((collection, RDF.type, AS.Collection)) - g.add((collection, AS.totalItems, Literal(len(records)))) - - items_node = BNode() - items = g.collection(items_node) - for record in records: - items.append(record) - g.add((collection, AS.items, items_node)) + collection = g.collection(items_node) + for item in items: + collection.append(item) return g + + +def collection_triples(graph: Graph, list_node: IdentifiedNode) -> Triples: + triples = list(graph.triples((list_node, RDF.first, None))) + triples += list(graph.triples((list_node, RDF.rest, None))) + + for rest in graph.objects(list_node, RDF.rest): + triples += collection_triples(graph, rest) + + return triples diff --git a/backend/collect/rdf_models.py b/backend/collect/rdf_models.py index 05b11aed..5db692dd 100644 --- a/backend/collect/rdf_models.py +++ b/backend/collect/rdf_models.py @@ -1,30 +1,51 @@ -from rdflib import RDFS, IdentifiedNode, URIRef +from rdflib import RDFS, IdentifiedNode, URIRef, Graph, RDF, Literal from typing import Iterable -from triplestore.utils import Triples +from triplestore.utils import Triples, replace_blank_nodes_in_triples from triplestore.constants import EDPOPCOL, AS from triplestore.rdf_model import RDFModel from triplestore.rdf_field import RDFField, RDFUniquePropertyField - +from collect.graphs import ( + list_from_graph_collection, list_to_graph_collection, collection_triples +) class CollectionMembersField(RDFField): def get(self, instance: RDFModel): - return [ - s - for (s, p, o) in self._stored_triples(instance) - ] + g = self.get_graph(instance) + items = next(g.objects(instance.uri, AS.items), None) + if items: + return list_from_graph_collection(g, items) + return [] def _stored_triples(self,instance: RDFModel) -> Triples: g = self.get_graph(instance) - return g.triples((None, RDFS.member, instance.uri)) + subgraph = Graph() + subgraph += g.triples((instance.uri, RDF.type, AS.Collection)) + subgraph += g.triples((instance.uri, AS.totalItems, None)) + subgraph += g.triples((instance.uri, AS.items, None)) + + item_collections = g.objects(instance.uri, AS.items) + for collection in item_collections: + subgraph += collection_triples(g, collection) + + return list(subgraph.triples((None, None, None))) def _triples_to_store(self, instance: RDFModel, value: Iterable[IdentifiedNode]) -> Triples: - return [ - (uri, RDFS.member, instance.uri) - for uri in value - ] + g = Graph() + g.add((instance.uri, RDF.type, AS.Collection)) + g.add((instance.uri, AS.totalItems, Literal(len(value)))) + + items_node = self._items_uri(instance) + g += list_to_graph_collection(value, items_node) + g.add((instance.uri, AS.items, items_node)) + + return list(replace_blank_nodes_in_triples(g.triples((None, None, None)))) + + + def _items_uri(self, instance: RDFModel): + return URIRef(str(instance.uri) + '/items') class EDPOPCollection(RDFModel): diff --git a/backend/collect/rdf_models_test.py b/backend/collect/rdf_models_test.py index b758937c..d708feb3 100644 --- a/backend/collect/rdf_models_test.py +++ b/backend/collect/rdf_models_test.py @@ -1,11 +1,12 @@ import pytest -from rdflib import URIRef, RDF, RDFS +from rdflib import URIRef, RDF from django.conf import settings from triplestore.constants import AS, EDPOPCOL from projects.models import Project from projects.rdf_models import RDFProject from collect.rdf_models import EDPOPCollection +from collect.utils import collection_graph, collection_uri @pytest.fixture() def project(db): @@ -14,9 +15,8 @@ def project(db): return rdf_project def test_collection_model(project): - uri = URIRef('test-collection', base='https://test.org/collections/') - - collection = EDPOPCollection(project.graph, uri) + uri = collection_uri('Test collection') + collection = EDPOPCollection(collection_graph(uri), uri) collection.name = 'Test collection' collection.project = project.uri collection.records = [ @@ -27,12 +27,21 @@ def test_collection_model(project): store = settings.RDFLIB_STORE + for triple, _ in store.triples((None, None, None)): + print(*triple) + assert any(store.triples((collection.uri, RDF.type, EDPOPCOL.Collection))) assert any(store.triples((collection.uri, AS.context, project.uri))) - assert any(store.triples((None, RDFS.member, collection.uri))) + assert any(store.triples((collection.uri, AS.items, None))) + + collection.refresh_from_store() + assert collection.records == [ + URIRef('https://example.org/example1'), + URIRef('https://example.org/example2') + ] collection.delete() assert not any(store.triples((collection.uri, RDF.type, EDPOPCOL.Collection))) assert not any(store.triples((collection.uri, AS.context, project.uri))) - assert not any(store.triples((None, RDFS.member, collection.uri))) + assert not any(store.triples((collection.uri, AS.items, None))) From 9618451864512434d066059d8ec01bc58b2d3339 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Tue, 23 Jul 2024 14:28:35 +0200 Subject: [PATCH 3/6] collection records endopint --- backend/collect/api.py | 5 ++-- backend/collect/api_test.py | 55 +++++++++++++++++++++++++++++++++---- backend/collect/graphs.py | 3 +- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/backend/collect/api.py b/backend/collect/api.py index 524e19ae..c865765a 100644 --- a/backend/collect/api.py +++ b/backend/collect/api.py @@ -11,7 +11,6 @@ from triplestore.constants import EDPOPCOL from collect.serializers import CollectionSerializer from collect.permissions import CollectionPermission -from collect.graphs import as_collection_from_records class CollectionViewSet(ModelViewSet): ''' @@ -50,7 +49,7 @@ class CollectionRecordsView(RDFView): View the records inside a collection ''' - def get_graph(self, request: Request, collection: str, **kwargs): + def get_graph(self, request: Request, collection: str, **kwargs) -> Graph: collection_uri = URIRef(collection) if not collection_exists(collection_uri): @@ -59,5 +58,7 @@ def get_graph(self, request: Request, collection: str, **kwargs): collection_obj = EDPOPCollection(collection_graph(collection_uri), collection_uri) g = Graph() + g += collection_obj._class_triples() + g += EDPOPCollection.records._stored_triples(collection_obj) return g \ No newline at end of file diff --git a/backend/collect/api_test.py b/backend/collect/api_test.py index e90bfd1e..ff4490f3 100644 --- a/backend/collect/api_test.py +++ b/backend/collect/api_test.py @@ -1,6 +1,6 @@ from django.test import Client from rest_framework.status import is_success, is_client_error -from rdflib import URIRef, RDF, Literal +from rdflib import URIRef, RDF, Graph, Literal from django.conf import settings from urllib.parse import quote from typing import Dict @@ -8,6 +8,8 @@ from triplestore.constants import EDPOPCOL, AS from collect.utils import collection_uri from projects.models import Project +from collect.rdf_models import EDPOPCollection +from collect.utils import collection_graph def example_collection_data(project_name) -> Dict: return { @@ -131,9 +133,50 @@ def test_project_validation(db, user, client: Client): def test_collection_records(db, user, project, client: Client): client.force_login(user) create_response = post_collection(client, project.name) - collection_uri = create_response.data['uri'] - - records_url = '/api/collection-records/' + collection_uri + '/' - + collection_uri = URIRef(create_response.data['uri']) + + records_url = '/api/collection-records/' + str(collection_uri) + '/' + + # check response with empty data + empty_response = client.get(records_url) + assert is_success(empty_response.status_code) + g = Graph().parse(empty_response.content) + result = g.query(f''' + ASK {{ + <{collection_uri}> a edpopcol:Collection ; + a as:Collection ; + as:items ?items ; + as:totalItems 0 . + ?items rdf:rest rdf:nil . + }} + ''', + initNs={'as': AS, 'rdf': RDF, 'edpopcol': EDPOPCOL} + ) + assert result.askAnswer + + # add some records to the collection + collection_obj = EDPOPCollection(collection_graph(collection_uri), collection_uri) + collection_obj.records = [ + URIRef('https://example.com/example1'), URIRef('https://example.com/example2') + ] + collection_obj.save() + + # check response contains records response = client.get(records_url) - assert is_success(response.status_code) \ No newline at end of file + assert is_success(response.status_code) + g = Graph().parse(response.content) + result = g.query(f''' + ASK {{ + <{collection_uri}> a edpopcol:Collection ; + a as:Collection ; + as:items ?items ; + as:totalItems 2 . + ?items rdf:first ; + rdf:rest ?rest . + ?rest rdf:first ; + rdf:rest rdf:nil . + }} + ''', + initNs={'as': AS, 'rdf': RDF, 'edpopcol': EDPOPCOL} + ) + assert result.askAnswer diff --git a/backend/collect/graphs.py b/backend/collect/graphs.py index 2f7ca5db..c4b39c69 100644 --- a/backend/collect/graphs.py +++ b/backend/collect/graphs.py @@ -23,8 +23,7 @@ def list_to_graph_collection(items: List[Node], items_node: IdentifiedNode) -> G g = Graph() collection = g.collection(items_node) - for item in items: - collection.append(item) + collection += items return g From 860349cf5867192f8e3772d04b039ea4460638d5 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Tue, 23 Jul 2024 14:49:04 +0200 Subject: [PATCH 4/6] docstrings + formatting --- backend/collect/api.py | 2 +- backend/collect/graphs.py | 7 +++++++ backend/collect/rdf_models.py | 4 ++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/backend/collect/api.py b/backend/collect/api.py index c865765a..d2696b8d 100644 --- a/backend/collect/api.py +++ b/backend/collect/api.py @@ -61,4 +61,4 @@ def get_graph(self, request: Request, collection: str, **kwargs) -> Graph: g += collection_obj._class_triples() g += EDPOPCollection.records._stored_triples(collection_obj) - return g \ No newline at end of file + return g diff --git a/backend/collect/graphs.py b/backend/collect/graphs.py index c4b39c69..e1ebcf3a 100644 --- a/backend/collect/graphs.py +++ b/backend/collect/graphs.py @@ -28,6 +28,13 @@ def list_to_graph_collection(items: List[Node], items_node: IdentifiedNode) -> G def collection_triples(graph: Graph, list_node: IdentifiedNode) -> Triples: + ''' + Select all triples that make up an RDF collection in a graph. + + This collects the chain of `rdf:first` / `rdf:rest` relations that make up the + collection. + ''' + triples = list(graph.triples((list_node, RDF.first, None))) triples += list(graph.triples((list_node, RDF.rest, None))) diff --git a/backend/collect/rdf_models.py b/backend/collect/rdf_models.py index 5db692dd..c9ff9a9f 100644 --- a/backend/collect/rdf_models.py +++ b/backend/collect/rdf_models.py @@ -10,6 +10,10 @@ ) class CollectionMembersField(RDFField): + ''' + Field for the records that are contained in an EDPOP collection. + ''' + def get(self, instance: RDFModel): g = self.get_graph(instance) items = next(g.objects(instance.uri, AS.items), None) From 55a207932b4cd17695284d3efa16634d259ff0e5 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Wed, 11 Dec 2024 17:15:01 +0100 Subject: [PATCH 5/6] clarity --- backend/collect/graphs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/collect/graphs.py b/backend/collect/graphs.py index e1ebcf3a..0cd435b4 100644 --- a/backend/collect/graphs.py +++ b/backend/collect/graphs.py @@ -23,7 +23,7 @@ def list_to_graph_collection(items: List[Node], items_node: IdentifiedNode) -> G g = Graph() collection = g.collection(items_node) - collection += items + collection += items # indirectly modifies g return g @@ -32,7 +32,9 @@ def collection_triples(graph: Graph, list_node: IdentifiedNode) -> Triples: Select all triples that make up an RDF collection in a graph. This collects the chain of `rdf:first` / `rdf:rest` relations that make up the - collection. + collection. It collects what is actually stored in the graph, rather than a + normalised version, so this method should be used to select the current triples in + a delete or update operation. ''' triples = list(graph.triples((list_node, RDF.first, None))) From fbd22f79a60b9bbcda731261f8838979b6a8fdc2 Mon Sep 17 00:00:00 2001 From: Luka van der Plas Date: Wed, 11 Dec 2024 17:34:51 +0100 Subject: [PATCH 6/6] don't use private method in view --- backend/collect/api.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/backend/collect/api.py b/backend/collect/api.py index d2696b8d..9a4da8ef 100644 --- a/backend/collect/api.py +++ b/backend/collect/api.py @@ -2,15 +2,16 @@ from rest_framework.views import Request from rest_framework.exceptions import NotFound from rdf.views import RDFView -from rdflib import URIRef, RDF, Graph, RDFS +from rdflib import URIRef, RDF, Graph, BNode, Literal from django.conf import settings from projects.api import user_projects from collect.rdf_models import EDPOPCollection from collect.utils import collection_exists, collection_graph -from triplestore.constants import EDPOPCOL +from triplestore.constants import EDPOPCOL, AS from collect.serializers import CollectionSerializer from collect.permissions import CollectionPermission +from collect.graphs import list_to_graph_collection class CollectionViewSet(ModelViewSet): ''' @@ -58,7 +59,12 @@ def get_graph(self, request: Request, collection: str, **kwargs) -> Graph: collection_obj = EDPOPCollection(collection_graph(collection_uri), collection_uri) g = Graph() - g += collection_obj._class_triples() - g += EDPOPCollection.records._stored_triples(collection_obj) + g.add((collection_obj.uri, RDF.type, EDPOPCOL.Collection)) + g.add((collection_obj.uri, RDF.type, AS.Collection)) + + items_node = BNode() + g.add((collection_obj.uri, AS.items, items_node)) + g.add((collection_obj.uri, AS.totalItems, Literal(len(collection_obj.records)))) + g += list_to_graph_collection(collection_obj.records, items_node) return g