Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added handling of structured content for edition values. #54

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 64 additions & 10 deletions csv2cmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from csv import DictReader
from datetime import datetime
from os import path
from xml.etree.ElementTree import Element, SubElement, Comment, ElementTree
from xml.etree.ElementTree import (Element, SubElement, Comment, ElementTree,
fromstring, ParseError)

__license__ = "MIT"
__version__ = '2.0.1'
Expand Down Expand Up @@ -341,8 +342,28 @@ def createPlaceName(placeNameText, placeNameRef):

def createEdition(biblText, biblType, biblID):
"""Create a new bibliographic entry."""
bibl = Element('bibl')
bibl.text = biblText
try:
bibl = fromstring(biblText)
if bibl.tag != 'bibl':
logging.warning('Structured edition "%s" has wrong root ' +
'element <%s> given in csv file. Only ' +
'<bibl>-element is allowed.',
biblText, bibl.tag)
raise Exception
except ParseError as e:
bibl = Element('bibl')
bibl.text = biblText
# error code 2 is syntax error and this should be the case for all
# unstructured titles. Higher error codes are XML specific like
# completeness, well-formedness or mismatching tags etc. We assume,
# that those codes are produced by bad but wanted structures, so we
# leave a warning message.
if e.code > 2:
logging.warning('If edition value "%s" should be structured. ' +
'It\'s not parsable. Error: %s', biblText, e)
except Exception:
bibl = Element('bibl')
bibl.text = biblText
bibl.set('type', biblType)
bibl.set('xml:id', biblID)
return bibl
Expand Down Expand Up @@ -435,7 +456,8 @@ def processPlace(letter, correspondent):
root = Element('TEI')
root.set('xmlns', ns.get('tei'))
root.append(
Comment(' Generated from table of letters with csv2cmi ' + __version__ + ' '))
Comment(' Generated from table of letters with csv2cmi ' +
__version__ + ' '))

# teiHeader
teiHeader = SubElement(root, 'teiHeader')
Expand Down Expand Up @@ -558,7 +580,7 @@ def processPlace(letter, correspondent):
profileDesc.append(entry)

# replace short titles if configured
for bibl in sourceDesc.findall('bibl'):
for idx, bibl in enumerate(sourceDesc.findall('bibl')):
# Try to use bibliographic text as key for section in config file
editionKey = bibl.text
try:
Expand All @@ -568,16 +590,48 @@ def processPlace(letter, correspondent):
except configparser.NoOptionError:
# if type is not set, use the default one
pass
bibl.text = editionTitle
bibl.set('type', editionType)
# Remember id to add it later, in case we got structured content.
id = bibl.get('xml:id')
try:
# Try to parse title as XML to allow structured content
bibl_new = fromstring(editionTitle)
if bibl_new.tag != 'bibl':
logging.warning('Structured title for key "%s" has wrong ' +
'root element <%s> given in ini file. Only ' +
'<bibl>-element is allowed.',
editionKey, bibl_new.tag)
raise Exception
bibl_new.set('xml:id', id)
bibl_new.set('type', editionType)
# Setting bibl = bibl_new doesn't work. We have to explicitly
# replace the original bibl element by the new structured bibl
# element.
sourceDesc.remove(bibl)
sourceDesc.insert(idx, bibl_new)
except ParseError as e:
bibl.text = editionTitle
bibl.set('type', editionType)
# error code 2 is syntax error and this should be the case for all
# unstructured titles. Higher error codes are XML specific like
# completeness, well-formedness or mismatching tags etc. We assume,
# that those codes are produced by bad but wanted structures, so we
# leave a warning message.
if e.code > 2:
logging.warning('If title for key "%s" in ini file should ' +
'be structured. It\'s not parsable. Error: %s',
editionKey, e)
except Exception:
bibl.text = editionTitle
bibl.set('type', editionType)
except configparser.NoOptionError:
logging.warning(
'Incomplete section %s in ini file. Title and type option must be set.', editionKey)
'Incomplete section %s in ini file. ' +
'Title and type option must be set.', editionKey)
except configparser.NoSectionError:
# if there is no matching section, we assume that there should be no one
# if there is no matching section, we assume that there should be
# no one.
pass


# generate empty body
root.append(createTextstructure())

Expand Down
2 changes: 2 additions & 0 deletions examples/shorttitle-replacement/Example.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ sender,senderID,senderPlace,senderPlaceID,senderDate,senderDateText,addressee,ad
Georges-Louis Le Sage,116948078,Genf,,1762-01-27,27. Januar 1762,Friedrich Heinrich Jacobi,118556312,,,,,JBWI1,2,
Friedrich Heinrich Jacobi,118556312,,,1773-02-02,2. Februar 1773,Johann Georg Jacobi,118775782,,,,,JBWI4,*276.2,
Johann Wolfgang Goethe,118540238,,,"[1774-09,1774-10]",September oder Oktober 1774,Friedrich Heinrich Jacobi,118556312,,,,,JBWI10,*349.2,
Phillipp Jacob Spener,,Frankfurt a. M.,,1666-09-21,21. September 1666,Gottlieb Spizel,,Augsburg,,,,SPBW-DD-1,1,
Phillipp Jacob Spener,,Frankfurt a. M.,,1666-11-01,1. November 1666,Balthasar Scheidt,,Straßburg,,,,"<bibl>Philipp Jakob Spener, Briefe aus der Dresdner Zeit 1686 - 1691, Band 1: 1686-1687, hrsg. von Johannes Wallmann in Zusammenarbeit mit Martin Friedrich, Klaus vom Orde und Peter Blastenbrei, Tübingen : Mohr Siebeck, 2003, ISBN 3-16-147427-9, <ref target=""http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa2-77749"">urn:nbn:de:bsz:14-qucosa2-77749</ref>.</bibl>",2,
Binary file modified examples/shorttitle-replacement/Example.ods
Binary file not shown.
2 changes: 1 addition & 1 deletion examples/shorttitle-replacement/Example.xml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
<?xml version='1.0' encoding='utf-8'?>
<TEI xmlns="http://www.tei-c.org/ns/1.0"><!-- Generated from table of letters with csv2cmi 2.0.0-beta --><teiHeader><fileDesc><titleStmt><title xml:id="title_4u2r1isd_vb14qyqn">Example file to demonstrate the replacement of shorttitles</title><editor>Uwe Kretschmer</editor></titleStmt><publicationStmt><publisher>Sächsische Akademie der Wissenschaften zu Leipzig</publisher><idno type="url">https://raw.githubusercontent.com/saw-leipzig/csv2cmi/master/example/shorttitle-replacement/Example.xml</idno><date when="2019-03-15T19:24:00.409619" /><availability><licence target="https://creativecommons.org/licenses/by/4.0/">This file is licensed under the terms of the Creative-Commons-License CC-BY 4.0</licence></availability></publicationStmt><sourceDesc><bibl type="online" xml:id="edition_ismw7t4y_2ve0ob2p">Friedrich Heinrich Jacobi, Band I.1: Briefwechsel 1762–1775, 1981. frommann-holzbog</bibl><bibl type="print" xml:id="edition_95ssee8d_i0m60lgc">Friedrich Heinrich Jacobi, Band I.4: Briefwechsel 1785. Nachtrag zum Briefwechsel 1764–1784. 2003, frommann-holzbog</bibl><bibl type="print" xml:id="edition_tyin9jcs_rbceg1n4">Friedrich Heinrich Jacobi, Band I.10: Briefwechsel Juni 1792 bis September 1794. Nachtrag zum Briefwechsel 1769–1789. 2015, frommann-holzbog</bibl></sourceDesc></fileDesc><profileDesc><correspDesc key="2" source="#edition_ismw7t4y_2ve0ob2p" xml:id="letter_16nlaemq_rh3p2814"><correspAction type="sent" xml:id="sender_l3ca2e5x_tuemr26n"><persName ref="http://d-nb.info/gnd/116948078">Georges-Louis Le Sage</persName><placeName>Genf</placeName><date when="1762-01-27">27. Januar 1762</date></correspAction><correspAction type="received" xml:id="addressee_w004vuqi_yujtan6d"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName></correspAction></correspDesc><correspDesc key="*276.2" source="#edition_95ssee8d_i0m60lgc" xml:id="letter_55vke6aa_57er6r8e"><correspAction type="sent" xml:id="sender_9sjzsc0l_sd9ypb29"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName><date when="1773-02-02">2. Februar 1773</date></correspAction><correspAction type="received" xml:id="addressee_ub0ykgtr_izjvyx0p"><persName ref="http://d-nb.info/gnd/118775782">Johann Georg Jacobi</persName></correspAction></correspDesc><correspDesc key="*349.2" source="#edition_tyin9jcs_rbceg1n4" xml:id="letter_ha9wsagx_qlh6tmua"><correspAction type="sent" xml:id="sender_0k692gos_1t4ajst5"><persName ref="http://d-nb.info/gnd/118540238">Johann Wolfgang Goethe</persName><date notAfter="1774-10" notBefore="1774-09">September oder Oktober 1774</date></correspAction><correspAction type="received" xml:id="addressee_pq1jdzb4_0uig88t0"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName></correspAction></correspDesc></profileDesc></teiHeader><text><body><p /></body></text></TEI>
<TEI xmlns="http://www.tei-c.org/ns/1.0"><!-- Generated from table of letters with csv2cmi 2.0.1 --><teiHeader><fileDesc><titleStmt><title xml:id="title_02cxyw59_n2ra3can">Example file to demonstrate the replacement of shorttitles</title><editor>Uwe Kretschmer</editor></titleStmt><publicationStmt><publisher>Sächsische Akademie der Wissenschaften zu Leipzig</publisher><idno type="url">https://raw.githubusercontent.com/saw-leipzig/csv2cmi/master/example/shorttitle-replacement/Example.xml</idno><date when="2019-04-11T16:39:54.501822" /><availability><licence target="https://creativecommons.org/licenses/by/4.0/">This file is licensed under the terms of the Creative-Commons-License CC-BY 4.0</licence></availability></publicationStmt><sourceDesc><bibl type="online" xml:id="edition_9i7pia7d_m8uukean">Friedrich Heinrich Jacobi, Band I.1: Briefwechsel 1762–1775, 1981. frommann-holzbog</bibl><bibl type="print" xml:id="edition_i4cf54xv_74qrd6bw">Friedrich Heinrich Jacobi, Band I.4: Briefwechsel 1785. Nachtrag zum Briefwechsel 1764–1784. 2003, frommann-holzbog</bibl><bibl type="print" xml:id="edition_6da1ctpa_jumqixmn">Friedrich Heinrich Jacobi, Band I.10: Briefwechsel Juni 1792 bis September 1794. Nachtrag zum Briefwechsel 1769–1789. 2015, frommann-holzbog</bibl><bibl type="hybrid" xml:id="edition_jxdl1u8t_kczysdhi">Philipp Jakob Spener, Briefe aus der Dresdner Zeit 1686 - 1691, Band 1: 1686-1687, hrsg. von Johannes Wallmann in Zusammenarbeit mit Martin Friedrich, Klaus vom Orde und Peter Blastenbrei, Tübingen : Mohr Siebeck, 2003, ISBN 3-16-147427-9, <ref target="http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa2-77749">urn:nbn:de:bsz:14-qucosa2-77749</ref>.</bibl><bibl type="print" xml:id="edition_9jqqyyqd_w3phlhcm">Philipp Jakob Spener, Briefe aus der Dresdner Zeit 1686 - 1691, Band 1: 1686-1687, hrsg. von Johannes Wallmann in Zusammenarbeit mit Martin Friedrich, Klaus vom Orde und Peter Blastenbrei, Tübingen : Mohr Siebeck, 2003, ISBN 3-16-147427-9, <ref target="http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa2-77749">urn:nbn:de:bsz:14-qucosa2-77749</ref>.</bibl></sourceDesc></fileDesc><profileDesc><correspDesc key="2" source="#edition_9i7pia7d_m8uukean" xml:id="letter_xnhu3z1z_fb6ng8d1"><correspAction type="sent" xml:id="sender_bkhihqej_eywknir6"><persName ref="http://d-nb.info/gnd/116948078">Georges-Louis Le Sage</persName><placeName>Genf</placeName><date when="1762-01-27">27. Januar 1762</date></correspAction><correspAction type="received" xml:id="addressee_13avrqxg_fs9biza6"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName></correspAction></correspDesc><correspDesc key="*276.2" source="#edition_i4cf54xv_74qrd6bw" xml:id="letter_4vfa4nbw_pkcqnx7m"><correspAction type="sent" xml:id="sender_f6ds38fk_a1avpk6e"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName><date when="1773-02-02">2. Februar 1773</date></correspAction><correspAction type="received" xml:id="addressee_ah7z45b0_w9z7v6pk"><persName ref="http://d-nb.info/gnd/118775782">Johann Georg Jacobi</persName></correspAction></correspDesc><correspDesc key="*349.2" source="#edition_6da1ctpa_jumqixmn" xml:id="letter_llazj6gf_2y88yv5y"><correspAction type="sent" xml:id="sender_lwf2e2nm_7rurrj3o"><persName ref="http://d-nb.info/gnd/118540238">Johann Wolfgang Goethe</persName><date notAfter="1774-10" notBefore="1774-09">September oder Oktober 1774</date></correspAction><correspAction type="received" xml:id="addressee_s88cxp7b_ipe9l8jk"><persName ref="http://d-nb.info/gnd/118556312">Friedrich Heinrich Jacobi</persName></correspAction></correspDesc><correspDesc key="1" source="#edition_jxdl1u8t_kczysdhi" xml:id="letter_36n7gf1v_l6qpulsy"><correspAction type="sent" xml:id="sender_umt4wyt0_qjowcmnk"><persName>Phillipp Jacob Spener</persName><placeName>Frankfurt a. M.</placeName><date when="1666-09-21">21. September 1666</date></correspAction><correspAction type="received" xml:id="addressee_5m3b4l70_29nc00mi"><persName>Gottlieb Spizel</persName><placeName>Augsburg</placeName></correspAction></correspDesc><correspDesc key="2" source="#edition_9jqqyyqd_w3phlhcm" xml:id="letter_ou00b2hv_pl3fnxht"><correspAction type="sent" xml:id="sender_z9f5h1kd_7gdyc0jy"><persName>Phillipp Jacob Spener</persName><placeName>Frankfurt a. M.</placeName><date when="1666-11-01">1. November 1666</date></correspAction><correspAction type="received" xml:id="addressee_773twqht_qjk6jyks"><persName>Balthasar Scheidt</persName><placeName>Straßburg</placeName></correspAction></correspDesc></profileDesc></teiHeader><text><body><p /></body></text></TEI>
7 changes: 7 additions & 0 deletions examples/shorttitle-replacement/csv2cmi.ini
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ type = print
[JBWI10]
title = Friedrich Heinrich Jacobi, Band I.10: Briefwechsel Juni 1792 bis September 1794. Nachtrag zum Briefwechsel 1769–1789. 2015, frommann-holzbog
# type is optional, defaults to print

# Especially in case You have a online or hybrid edition You may want to set a link to an online ressource within the TEI bibl-statement.
# To do so, You can just insert the complete and well-formed bibl XML structure like below. The attributes xml:id and type will be overwritten
# by the program yo you don't need to add them here. Wrong example would be <bibl xml:id="myID123" type="online">Text</bibl>.
[SPBW-DD-1]
title = <bibl>Philipp Jakob Spener, Briefe aus der Dresdner Zeit 1686 - 1691, Band 1: 1686-1687, hrsg. von Johannes Wallmann in Zusammenarbeit mit Martin Friedrich, Klaus vom Orde und Peter Blastenbrei, Tübingen : Mohr Siebeck, 2003, ISBN 3-16-147427-9, <ref target="http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa2-77749">urn:nbn:de:bsz:14-qucosa2-77749</ref>.</bibl>
type = hybrid
10 changes: 6 additions & 4 deletions tests/tests.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
sender,senderID,senderPlace,senderPlaceID,senderDate,addressee,addresseeID,addresseePlace,addresseePlaceID,edition,key,note
Donald Trump,http://id.loc.gov/authorities/names/n85387872,Washington,http://www.geonames.org/4140963,2016-04-01,Donald Duck,http://id.loc.gov/authorities/names/no2016036349,Entenhausen,http://d-nb.info/gnd/4203723-2,Letters of Love an Ludicrousness,,LOC with wrong addresseePlaceID
Πλάτων,http://d-nb.info/gnd/118594893,Athens,http://www.geonames.org/264371,[..-0350],Imperium Romanum,http://d-nb.info/gnd/1115828762,Rom,http://www.geonames.org/3169070/rome.html,Complete edition of never written letters,89274,EDTF one of a set with negative year; wrong rdf:type in addressee
Πλάτων,http://viaf.org/viaf/108159964,Athens,http://www.geonames.org/264371,"{-400,-390,-370}",𐎧𐏁𐏂,http://viaf.org/viaf/293793834,Hañgmatana,,Complete edition of never written letters,10342,missing entry in GeoNames; countries can‘t be addressed with VIAF
"sender","senderID","senderPlace","senderPlaceID","senderDate","addressee","addresseeID","addresseePlace","addresseePlaceID","edition","key","note"
"Donald Trump","http://id.loc.gov/authorities/names/n85387872","Washington","http://www.geonames.org/4140963","2016-04-01","Donald Duck","http://id.loc.gov/authorities/names/no2016036349","Entenhausen","http://d-nb.info/gnd/4203723-2","Letters of Love an Ludicrousness",,"LOC with wrong addresseePlaceID"
"Πλάτων","http://d-nb.info/gnd/118594893","Athens","http://www.geonames.org/264371","[..-0350]","Imperium Romanum","http://d-nb.info/gnd/1115828762","Rom","http://www.geonames.org/3169070/rome.html","Complete edition of never written letters",89274,"EDTF one of a set with negative year; wrong rdf:type in addressee"
"Πλάτων","http://viaf.org/viaf/108159964","Athens","http://www.geonames.org/264371","{-400,-390,-370}","𐎧𐏁𐏂","http://viaf.org/viaf/293793834","Hañgmatana",,"Complete edition of never written letters",10342,"missing entry in GeoNames; countries can‘t be addressed with VIAF"
"sender",,,,"2019-04-11","addressee",,,,"<wrongRoot>Structured <ref target=""URI"">edition</ref> title.</wrongRoot>",10000,"Only <bibl>-element is allowed as root for structured content of edition."
"sender",,,,"2019-04-11","addressee",,,,"<some>bad</xml>",10001,"No well-formed XML."