Skip to content

Commit

Permalink
Parse mediabox and cropbox rectangle explicitly to floats (#987)
Browse files Browse the repository at this point in the history
  • Loading branch information
pietermarsman authored Jul 9, 2024
1 parent 3a789a4 commit 801a1cf
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- `TypeError` when corrupt PDF object reference cannot be parsed as int ([#972](https://github.com/pdfminer/pdfminer.six/pull/972))])
- `TypeError` when corrupt PDF literal cannot be converted to str ([#978](https://github.com/pdfminer/pdfminer.six/pull/978))
- `ValueError` when corrupt PDF specifies a negative xref location ([#980](http://github.com/pdfminer/pdfminer.six/pull/980))
- `ValueError` when corrupt PDF specifies an invalid mediabox ([#987](https://github.com/pdfminer/pdfminer.six/pull/987))

### Removed

Expand Down
15 changes: 9 additions & 6 deletions pdfminer/pdfpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
import logging
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple, Any

from pdfminer.utils import Rect
from pdfminer.utils import parse_rect
from . import settings
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed, PDFNoPageLabels
from .pdfparser import PDFParser
from .pdftypes import dict_value
from .pdfexceptions import PDFObjectNotFound
from .pdfexceptions import PDFObjectNotFound, PDFValueError
from .pdftypes import int_value
from .pdftypes import list_value
from .pdftypes import resolve1
Expand Down Expand Up @@ -63,11 +63,14 @@ def __init__(
mediabox_params: List[Any] = [
resolve1(mediabox_param) for mediabox_param in self.attrs["MediaBox"]
]
self.mediabox: Rect = resolve1(mediabox_params)
self.mediabox = parse_rect(resolve1(mediabox_params))
self.cropbox = self.mediabox
if "CropBox" in self.attrs:
self.cropbox: Rect = resolve1(self.attrs["CropBox"])
else:
self.cropbox = self.mediabox
try:
self.cropbox = parse_rect(resolve1(self.attrs["CropBox"]))
except PDFValueError:
pass

self.rotate = (int_value(self.attrs.get("Rotate", 0)) + 360) % 360
self.annots = self.attrs.get("Annots")
self.beads = self.attrs.get("B")
Expand Down
8 changes: 8 additions & 0 deletions pdfminer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,14 @@ def apply_png_predictor(
MATRIX_IDENTITY: Matrix = (1, 0, 0, 1, 0, 0)


def parse_rect(o: Any) -> Rect:
try:
(x0, y0, x1, y1) = o
return float(x0), float(y0), float(x1), float(y1)
except ValueError:
raise PDFValueError("Could not parse rectangle")


def mult_matrix(m1: Matrix, m0: Matrix) -> Matrix:
(a1, b1, c1, d1, e1, f1) = m1
(a0, b0, c0, d0, e0, f0) = m0
Expand Down

0 comments on commit 801a1cf

Please sign in to comment.