From f0499ec14864119bde61d631ebb320f29af39dae Mon Sep 17 00:00:00 2001 From: scott Date: Wed, 18 Oct 2023 13:56:22 -0600 Subject: [PATCH 1/2] Add wiki identifier for provider values in the wiki markup --- .../enrichments/EnrichmentDriver.scala | 2 -- .../scala/dpla/ingestion3/model/package.scala | 33 ++++++++++++++----- .../data/EnrichedRecordFixture.scala | 5 +-- .../model/WikiMarkupStringTest.scala | 4 +-- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/main/scala/dpla/ingestion3/enrichments/EnrichmentDriver.scala b/src/main/scala/dpla/ingestion3/enrichments/EnrichmentDriver.scala index f6286b281..4c4b115d8 100644 --- a/src/main/scala/dpla/ingestion3/enrichments/EnrichmentDriver.scala +++ b/src/main/scala/dpla/ingestion3/enrichments/EnrichmentDriver.scala @@ -72,9 +72,7 @@ class EnrichmentDriver(conf: i3Conf) extends Serializable { enriched.copy( provider = wikiEntityEnrichment.enrichEntity(enriched.provider), - dataProvider = enrichDataProvider(enriched), - sourceResource = enriched.sourceResource.copy( date = enriched.sourceResource.date.map(date => dateEnrichment.generateBeginEnd(date.originalSourceDate)).distinct, language = enriched.sourceResource.language.map(languageEnrichment.enrichLanguage).distinct, diff --git a/src/main/scala/dpla/ingestion3/model/package.scala b/src/main/scala/dpla/ingestion3/model/package.scala index 99bb09684..7fff1cbdb 100644 --- a/src/main/scala/dpla/ingestion3/model/package.scala +++ b/src/main/scala/dpla/ingestion3/model/package.scala @@ -232,6 +232,7 @@ package object model { */ def buildWikiMarkup(record: OreAggregation): String = { val dataProviderWikiUri = getDataProviderWikiId(record) + val providerWikiUri = getProviderWikiId(record) val dplaId = getDplaId(record) val permissionsTemplate = getWikiPermissionTemplate(record.edmRights) val permissions = record.edmRights.toString match { @@ -248,7 +249,7 @@ package object model { | | permission = {{${permissions}}} | | source = {{ DPLA | | ${escapeWikiChars(dataProviderWikiUri)} - | | hub = ${escapeWikiChars(record.provider.name.getOrElse(""))} + | | ${escapeWikiChars(providerWikiUri)} | | url = ${escapeWikiChars(record.isShownAt.uri.toString)} | | dpla_id = $dplaId | | local_id = ${record.sourceResource.identifier.map(escapeWikiChars).mkString("; ")} @@ -313,18 +314,32 @@ package object model { * @param record * @return */ - def getDataProviderWikiId(record: OreAggregation): String = - record - .dataProvider + private def getDataProviderWikiId(record: OreAggregation): String = { + getWikiId(record.dataProvider) match { + case Some(uri) => uri + case None => + throw new RuntimeException(s"dataProvider ${record.dataProvider.name.getOrElse("__MISSING__")} " + + s"in ${getDplaId(record)} does not have wiki identifier ") + } + } + + private def getProviderWikiId(record: OreAggregation): String = { + getWikiId(record.provider) match { + case Some(uri) => uri + case None => + throw new RuntimeException(s"provider ${record.provider.name.getOrElse("__MISSING__")} " + + s"in ${getDplaId(record)} does not have wiki identifier ") + } + } + private def getWikiId(agent: EdmAgent): Option[String] = { + agent .exactMatch .map(_.toString) .find(_.startsWith(WikiUri.baseWikiUri)) match { - case Some(uri) => uri.replace(WikiUri.baseWikiUri, "") - case None => - throw new RuntimeException(s"dataProvider ${record.dataProvider.name.getOrElse("__MISSING__")} " + - s"in ${getDplaId(record)} does not have wiki identifier ") + case Some(uri) => Some(uri.replace(WikiUri.baseWikiUri, "")) + case None => None } - + } // Taken from // https://stackoverflow.com/questions/40128816/remove-json-field-when-empty-value-in-serialize-with-json4s diff --git a/src/test/scala/dpla/ingestion3/data/EnrichedRecordFixture.scala b/src/test/scala/dpla/ingestion3/data/EnrichedRecordFixture.scala index 7a6c8cc7f..b2cd52824 100644 --- a/src/test/scala/dpla/ingestion3/data/EnrichedRecordFixture.scala +++ b/src/test/scala/dpla/ingestion3/data/EnrichedRecordFixture.scala @@ -14,8 +14,9 @@ object EnrichedRecordFixture { dplaUri = URI("https://dp.la/item/123"), originalRecord = "The Original Record", provider = EdmAgent( - uri = Some(URI("http://dp.la/api/contributor/thedataprovider")), - name = Some("The Provider") + uri = Some(URI("http://dp.la/api/contributor/bscdn")), + name = Some("Big Sky Digital Network"), + exactMatch = Seq(URI(s"${WikiUri.baseWikiUri}Q83878447")) ), intermediateProvider = Some( EdmAgent(name = Some("The Intermediate Provider")) diff --git a/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala b/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala index c6b8af9a1..ca6050a81 100644 --- a/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala +++ b/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala @@ -16,7 +16,7 @@ class WikiMarkupStringTest extends FlatSpec { | | permission = {{NoC-US | Q83878447}} | | source = {{ DPLA | | Q83878447 - | | hub = The Provider + | | Q83878447 | | url = https://example.org/record/123 | | dpla_id = 4b1bd605bd1d75ee23baadb0e1f24457 | | local_id = us-history-13243; j-doe-archives-2343 @@ -40,7 +40,7 @@ class WikiMarkupStringTest extends FlatSpec { | | permission = {{Cc-by-sa-1.0}} | | source = {{ DPLA | | Q83878447 - | | hub = The Provider + | | Q83878447 | | url = https://example.org/record/123 | | dpla_id = 4b1bd605bd1d75ee23baadb0e1f24457 | | local_id = us-history-13243; j-doe-archives-2343 From 77e1aadb1a8821b776b1b66ec0b1beba1289a2e9 Mon Sep 17 00:00:00 2001 From: scott Date: Thu, 19 Oct 2023 09:40:06 -0600 Subject: [PATCH 2/2] Add 'hub = ' to markup --- src/main/scala/dpla/ingestion3/model/package.scala | 2 +- .../scala/dpla/ingestion3/model/WikiMarkupStringTest.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/dpla/ingestion3/model/package.scala b/src/main/scala/dpla/ingestion3/model/package.scala index 7fff1cbdb..4a197dc25 100644 --- a/src/main/scala/dpla/ingestion3/model/package.scala +++ b/src/main/scala/dpla/ingestion3/model/package.scala @@ -249,7 +249,7 @@ package object model { | | permission = {{${permissions}}} | | source = {{ DPLA | | ${escapeWikiChars(dataProviderWikiUri)} - | | ${escapeWikiChars(providerWikiUri)} + | | hub = ${escapeWikiChars(providerWikiUri)} | | url = ${escapeWikiChars(record.isShownAt.uri.toString)} | | dpla_id = $dplaId | | local_id = ${record.sourceResource.identifier.map(escapeWikiChars).mkString("; ")} diff --git a/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala b/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala index ca6050a81..b73ae5ab9 100644 --- a/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala +++ b/src/test/scala/dpla/ingestion3/model/WikiMarkupStringTest.scala @@ -16,7 +16,7 @@ class WikiMarkupStringTest extends FlatSpec { | | permission = {{NoC-US | Q83878447}} | | source = {{ DPLA | | Q83878447 - | | Q83878447 + | | hub = Q83878447 | | url = https://example.org/record/123 | | dpla_id = 4b1bd605bd1d75ee23baadb0e1f24457 | | local_id = us-history-13243; j-doe-archives-2343 @@ -40,7 +40,7 @@ class WikiMarkupStringTest extends FlatSpec { | | permission = {{Cc-by-sa-1.0}} | | source = {{ DPLA | | Q83878447 - | | Q83878447 + | | hub = Q83878447 | | url = https://example.org/record/123 | | dpla_id = 4b1bd605bd1d75ee23baadb0e1f24457 | | local_id = us-history-13243; j-doe-archives-2343