-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MAINTENANCE] Complete tests for MetsDocument (#919)
- Loading branch information
1 parent
8b7880e
commit fe70610
Showing
4 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<alto:alto xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xmlns:alto="http://www.loc.gov/standards/alto/ns-v2#"> | ||
<alto:Description> | ||
<alto:MeasurementUnit>pixel</alto:MeasurementUnit> | ||
<alto:OCRProcessing ID="IdOcr"> | ||
<alto:ocrProcessingStep> | ||
<alto:processingDateTime>2020-05-14</alto:processingDateTime> | ||
<alto:processingSoftware> | ||
<alto:softwareCreator>ABBYY</alto:softwareCreator> | ||
<alto:softwareName>ABBYY FineReader Engine</alto:softwareName> | ||
<alto:softwareVersion>12</alto:softwareVersion> | ||
</alto:processingSoftware> | ||
</alto:ocrProcessingStep> | ||
</alto:OCRProcessing> | ||
</alto:Description> | ||
<alto:Styles> </alto:Styles> | ||
<alto:Layout> | ||
<alto:Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="2546" WIDTH="1801"> | ||
<alto:PrintSpace HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"> | ||
<alto:Illustration ID="Page1_Block1" HEIGHT="2546" WIDTH="1801" VPOS="0" HPOS="0"/> | ||
<alto:TextBlock ID="Page1_Block2" HEIGHT="241" WIDTH="1064" VPOS="2068" HPOS="470" language="de"> | ||
<alto:Shape> | ||
<alto:Polygon POINTS="1506,2068 1533,2068 1533,2283 1534,2283 1534,2306 1509,2306 1509,2307 1104,2307 1104,2308 700,2308 700,2309 471,2309 471,2286 470,2286 470,2071 697,2071 697,2070 1101,2070 1101,2069 1506,2069 1506,2068"/> | ||
</alto:Shape> | ||
<alto:TextLine HEIGHT="102" WIDTH="628" VPOS="2076" HPOS="477"> | ||
<alto:String WC="0.79777777194976807" CONTENT="Bürgertum" HEIGHT="95" WIDTH="437" VPOS="2083" HPOS="477"/> | ||
<alto:SP WIDTH="34" VPOS="2107" HPOS="915"/> | ||
<alto:String WC="0.66333335638046265" CONTENT="und" HEIGHT="76" WIDTH="155" VPOS="2076" HPOS="950"/> | ||
</alto:TextLine> | ||
<alto:TextLine HEIGHT="104" WIDTH="1051" VPOS="2199" HPOS="477"> | ||
<alto:String WC="0.83142858743667603" CONTENT="Bürgerlichkeit" HEIGHT="102" WIDTH="574" VPOS="2201" HPOS="477"/> | ||
<alto:SP WIDTH="32" VPOS="2206" HPOS="1051"/> | ||
<alto:String WC="1." CONTENT="in" HEIGHT="68" WIDTH="74" VPOS="2205" HPOS="1084"/> | ||
<alto:SP WIDTH="34" VPOS="2204" HPOS="1159"/> | ||
<alto:String WC="0.8028571605682373" CONTENT="Dresden" HEIGHT="75" WIDTH="333" VPOS="2199" HPOS="1194"/> | ||
</alto:TextLine> | ||
</alto:TextBlock> | ||
<alto:TextBlock ID="Page1_Block3" HEIGHT="290" WIDTH="775" VPOS="307" HPOS="466" language="de"> | ||
<alto:Shape> | ||
<alto:Polygon POINTS="1101,307 1241,307 1241,595 1104,595 1104,596 700,596 700,597 466,597 466,309 697,309 697,308 1101,308 1101,307"/> | ||
</alto:Shape> | ||
<alto:TextLine HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473"> | ||
<alto:String WC="0.75625002384185791" CONTENT="DRESDNER" HEIGHT="98" WIDTH="752" VPOS="315" HPOS="473"/> | ||
</alto:TextLine> | ||
<alto:TextLine HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473"> | ||
<alto:String WC="0.70399999618530273" CONTENT="HEFTE" HEIGHT="97" WIDTH="448" VPOS="492" HPOS="473"/> | ||
</alto:TextLine> | ||
</alto:TextBlock> | ||
<alto:GraphicalElement ID="Page1_Block4" HEIGHT="14" WIDTH="1674" VPOS="266" HPOS="55"/> | ||
<alto:GraphicalElement ID="Page1_Block5" HEIGHT="15" WIDTH="1674" VPOS="442" HPOS="55"/> | ||
<alto:GraphicalElement ID="Page1_Block6" HEIGHT="30" WIDTH="629" VPOS="680" HPOS="477"/> | ||
<alto:GraphicalElement ID="Page1_Block7" HEIGHT="8" WIDTH="170" VPOS="1963" HPOS="635"/> | ||
<alto:GraphicalElement ID="Page1_Block8" HEIGHT="141" WIDTH="11" VPOS="1019" HPOS="1197"/> | ||
<alto:GraphicalElement ID="Page1_Block9" HEIGHT="168" WIDTH="12" VPOS="948" HPOS="1411"/> | ||
<alto:Illustration ID="Page1_Block10" HEIGHT="175" WIDTH="88" VPOS="1469" HPOS="544"> | ||
<alto:Shape> | ||
<alto:Polygon POINTS="544,1469 631,1469 631,1474 632,1474 632,1644 545,1644 545,1477 544,1477 544,1469"/> | ||
</alto:Shape> | ||
</alto:Illustration> | ||
<alto:Illustration ID="Page1_Block11" HEIGHT="207" WIDTH="61" VPOS="1657" HPOS="790"/> | ||
</alto:PrintSpace> | ||
</alto:Page> | ||
</alto:Layout> | ||
</alto:alto> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<mets:mets xmlns:mets="http://www.loc.gov/METS/" | ||
xmlns:xlink="http://www.w3.org/1999/xlink" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://www.loc.gov/standards/mets/mets.xsd"> | ||
<mets:fileSec> | ||
<mets:fileGrp USE="FULLTEXT"> | ||
<mets:file ID="FULLTEXT_0001" MIMETYPE="application/pdf"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_0001.xml"/> | ||
</mets:file> | ||
<mets:file ID="FULLTEXT_0002" MIMETYPE="application/pdf"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_002.xml"/> | ||
</mets:file> | ||
<mets:file ID="FULLTEXT_0003" MIMETYPE="application/pdf"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="http://web:8001/Tests/Fixtures/MetsDocument/fulltext_0003.xml"/> | ||
</mets:file> | ||
<mets:file ID="FULLTEXT_0004" MIMETYPE="application/pdf"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/fulltext_0003.xml"/> | ||
</mets:file> | ||
</mets:fileGrp> | ||
<mets:fileGrp USE="DEFAULT"> | ||
<mets:file ID="FILE_0001" MIMETYPE="image/png"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0001.png"/> | ||
</mets:file> | ||
<mets:file ID="FILE_0002" MIMETYPE="image/png"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0002.png"/> | ||
</mets:file> | ||
<mets:file ID="FILE_0003" MIMETYPE="image/png"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0003.png"/> | ||
</mets:file> | ||
<mets:file ID="FILE_0004" MIMETYPE="image/png"> | ||
<mets:FLocat LOCTYPE="URL" xlink:href="https://example.com/image/0004.png"/> | ||
</mets:file> | ||
</mets:fileGrp> | ||
</mets:fileSec> | ||
<mets:structMap TYPE="LOGICAL"> | ||
<mets:div ID="LOG_0000" LABEL="Example Book" TYPE="manuscript"> | ||
<mets:div ID="LOG_0001" LABEL="First Chapter" TYPE="chapter"/> | ||
<mets:div ID="LOG_0002" LABEL="Second Chapter" TYPE="chapter"/> | ||
<mets:div ID="LOG_0003" LABEL="Third Chapter" TYPE="chapter"/> | ||
</mets:div> | ||
</mets:structMap> | ||
<mets:structMap TYPE="PHYSICAL"> | ||
<mets:div ID="PHYS_0000" TYPE="physSequence"> | ||
<mets:div ID="PHYS_0001" ORDER="1" TYPE="page"> | ||
<mets:fptr FILEID="FILE_0001"/> | ||
<mets:fptr FILEID="FULLTEXT_0001"/> | ||
</mets:div> | ||
<mets:div ID="PHYS_0002" ORDER="2" TYPE="page"> | ||
<mets:fptr FILEID="FILE_0002"/> | ||
<mets:fptr FILEID="FULLTEXT_0002"/> | ||
</mets:div> | ||
<mets:div ID="PHYS_0003" ORDER="3" TYPE="page"> | ||
<mets:fptr FILEID="FILE_0003"/> | ||
<mets:fptr FILEID="FULLTEXT_0003"/> | ||
</mets:div> | ||
<mets:div ID="PHYS_0004" ORDER="4" TYPE="page"> | ||
<mets:fptr FILEID="FILE_0004"/> | ||
<mets:fptr FILEID="FULLTEXT_0004"/> | ||
</mets:div> | ||
</mets:div> | ||
</mets:structMap> | ||
<mets:structLink> | ||
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0001"/> | ||
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0002"/> | ||
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0003"/> | ||
<mets:smLink xlink:from="LOG_0000" xlink:to="PHYS_0004"/> | ||
<mets:smLink xlink:from="LOG_0001" xlink:to="PHYS_0001"/> | ||
<mets:smLink xlink:from="LOG_0002" xlink:to="PHYS_0002"/> | ||
<mets:smLink xlink:from="LOG_0003" xlink:to="PHYS_0003"/> | ||
<mets:smLink xlink:from="LOG_0004" xlink:to="PHYS_0004"/> | ||
</mets:structLink> | ||
</mets:mets> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,13 @@ | ||
<?php | ||
/** | ||
* (c) Kitodo. Key to digital objects e.V. <[email protected]> | ||
* | ||
* This file is part of the Kitodo and TYPO3 projects. | ||
* | ||
* @license GNU General Public License version 3 or later. | ||
* For the full copyright and license information, please read the | ||
* LICENSE.txt file that was distributed with this source code. | ||
*/ | ||
|
||
namespace Kitodo\Dlf\Tests\Functional\Common; | ||
|
||
|
@@ -11,6 +20,7 @@ public function setUp(): void | |
{ | ||
parent::setUp(); | ||
|
||
$this->importDataSet(__DIR__ . '/../../Fixtures/Common/documents_1.xml'); | ||
$this->importDataSet(__DIR__ . '/../../Fixtures/Common/metadata.xml'); | ||
$this->importDataSet(__DIR__ . '/../../Fixtures/MetsDocument/metadata_mets.xml'); | ||
} | ||
|
@@ -128,4 +138,109 @@ public function returnsEmptyMetadataWhenNoDmdSec() | |
$metadata = $doc->getMetadata('LOG_0002', 20000); | ||
$this->assertEquals([], $metadata); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetDownloadLocation() | ||
{ | ||
$doc = $this->doc('two_dmdsec.xml'); | ||
|
||
$correct = $doc->getDownloadLocation('FILE_0000_DOWNLOAD'); | ||
$this->assertEquals('https://example.com/download?&CVT=jpeg', $correct); | ||
|
||
/* | ||
* The method `getDownloadLocation` should return a string, but returns null in some cases. | ||
* Therefor, a TypeError must be expected here. | ||
*/ | ||
$this->expectException('TypeError'); | ||
$doc->getDownloadLocation('ID_DOES_NOT_EXIST'); | ||
} | ||
|
||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetFileLocation() | ||
{ | ||
$doc = $this->doc('two_dmdsec.xml'); | ||
|
||
$correct = $doc->getFileLocation('FILE_0000_DEFAULT'); | ||
$this->assertEquals('https://digital.slub-dresden.de/data/kitodo/1703800435/video.mov', $correct); | ||
|
||
$incorrect = $doc->getFileLocation('ID_DOES_NOT_EXIST'); | ||
$this->assertEquals('', $incorrect); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetFileMimeType() | ||
{ | ||
$doc = $this->doc('two_dmdsec.xml'); | ||
|
||
$correct = $doc->getFileMimeType('FILE_0000_DEFAULT'); | ||
$this->assertEquals('video/quicktime', $correct); | ||
|
||
$incorrect = $doc->getFileMimeType('ID_DOES_NOT_EXIST'); | ||
$this->assertEquals('', $incorrect); | ||
} | ||
|
||
// FIXME: Method getPhysicalPage does not work as expected | ||
/** | ||
* @test | ||
*/ | ||
public function canGetPhysicalPage() | ||
{ | ||
$doc = $this->doc('mets_with_pages.xml'); | ||
|
||
// pass orderlabel and retrieve order | ||
$physicalPage = $doc->getPhysicalPage('1'); | ||
$this->assertEquals(1, $physicalPage); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetTitle() | ||
{ | ||
$doc = $this->doc('mets_with_pages.xml'); | ||
|
||
$correct = $doc->getTitle(1001); | ||
$this->assertEquals('10 Keyboard pieces - Go. S. 658', $correct); | ||
|
||
$incorrect = $doc->getTitle(1234); | ||
$this->assertEquals('', $incorrect); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetFullText() | ||
{ | ||
$doc = $this->doc('mets_with_pages.xml'); | ||
|
||
$fulltext = $doc->getFullText('PHYS_0003'); | ||
$expected = '<?xml version="1.0"?> | ||
<ocr><b/><b/></ocr> | ||
'; | ||
$this->assertEquals($expected, $fulltext); | ||
|
||
$incorrect = $doc->getFullText('ID_DOES_NOT_EXIST'); | ||
$this->assertEquals('', $incorrect); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function canGetStructureDepth() | ||
{ | ||
$doc = $this->doc('mets_with_pages.xml'); | ||
|
||
$correct = $doc->getStructureDepth('LOG_0001'); | ||
$this->assertEquals(3, $correct); | ||
|
||
$incorrect = $doc->getStructureDepth('ID_DOES_NOT_EXIST'); | ||
$this->assertEquals(0, $incorrect); | ||
} | ||
} |