Skip to content
This repository has been archived by the owner on Jan 12, 2024. It is now read-only.

Commit

Permalink
✨(marion) add pdf_options argument to AbstractDocument.create method
Browse files Browse the repository at this point in the history
We want Marion consumers be able to use new pdf options offered by weasyprint 59
.0. In order to do that, we add a new `pdf_options` argument to the
`AbstractDocument.create` method.
Furthermore, weasyprint 59.0 with pdf compression enable is no more compatible
with pdfminer extract text. So until this issue is fixed, we disable by default
the pdf compression. But consumers are able to enable this option at their own
risks through the `pdf_options` argument.
  • Loading branch information
jbpenrath committed May 23, 2023
1 parent b6e9fc4 commit f71b888
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
### Added

- Add Django 4.2 compatibility
- Add `pdf_options` argument to `AbstractDocument.create` method

## [0.4.0] - 2022-08-05

Expand Down
35 changes: 31 additions & 4 deletions src/marion/marion/issuers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from pydantic import BaseModel
from pydantic.error_wrappers import ValidationError
from weasyprint import CSS, HTML
from weasyprint import CSS, DEFAULT_OPTIONS, HTML
from weasyprint.document import DocumentMetadata
from weasyprint.text.fonts import FontConfiguration

Expand Down Expand Up @@ -323,7 +323,16 @@ def get_django_context(self) -> Context:
"""Get the Django Context instance from the context model instance."""
return Context(self.context.dict())

def create(self, persist=True):
@staticmethod
def _clean_pdf_options(options: dict) -> dict:
"""Clean pdf options.
Remove any pdf options that is not in the DEFAULT_OPTIONS list.
"""
return {key: value for key, value in options.items() if key in DEFAULT_OPTIONS}

def create(self, persist=True, pdf_options: DEFAULT_OPTIONS = None):
"""Create document.
Given an HTML template, a CSS template and the required context to
Expand All @@ -342,6 +351,12 @@ def create(self, persist=True):
When persist is False, document is created without persisting. In
this case create returns the PDF document as bytes.
- pdf_options<dict>
Additional options to pass to Weasyprint's write_pdf method.
Check to see all available options:
https://doc.courtbouillon.org/weasyprint/stable/api_reference.html#weasyprint.DEFAULT_OPTIONS
"""

if self.context is None:
Expand All @@ -358,10 +373,22 @@ def create(self, persist=True):
document = html.render(stylesheets=[css], font_config=font_config)
document.metadata = self.metadata

common_options = {"zoom": 1}
cleaned_pdf_options = (
self._clean_pdf_options(pdf_options) if pdf_options else {}
)

if "uncompressed_pdf" not in cleaned_pdf_options:
# MARK: Disable PDF compression by default until this issue is fixed:
# https://github.com/Kozea/WeasyPrint/issues/1885
cleaned_pdf_options["uncompressed_pdf"] = True

if persist is False:
return document.write_pdf(zoom=1)
return document.write_pdf(**common_options, **cleaned_pdf_options)

document_path = self.get_document_path()
document.write_pdf(target=document_path, zoom=1)
document.write_pdf(
target=document_path, **common_options, **cleaned_pdf_options
)

return document_path
105 changes: 104 additions & 1 deletion src/marion/marion/tests/issuers/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pytest
from pdfminer.high_level import extract_text as pdf_extract_text
from pydantic import BaseModel
from weasyprint.document import DocumentMetadata
from weasyprint.document import Document, DocumentMetadata

from marion.defaults import DOCUMENTS_ROOT
from marion.exceptions import (
Expand Down Expand Up @@ -469,6 +469,109 @@ def fetch_context(self):
)


def test_abstract_document_create_with_pdf_options():
"""Test AbstractDocument create method with pdf_options"""

# pylint: disable=missing-class-docstring
class ContextModel(BaseModel):
pass

# pylint: disable=missing-class-docstring
class ContextQueryModel(BaseModel):
pass

class TestDocument(AbstractDocument):
context_model = ContextModel
context_query_model = ContextQueryModel

def get_html(self):
return Template("<body>An empty document</body>")

def get_css(self):
return Template("")

def fetch_context(self):
return {}

test_document = TestDocument()

with patch.object(Document, "write_pdf") as mocked_write_pdf:
test_document.create(
pdf_options={
"presentational_hints": True,
"pdf_version": "1.5",
"jpeg_quality": 50,
"optimize_images": True,
}
)

mocked_write_pdf.assert_called_once()
kwargs = mocked_write_pdf.call_args[1]

# PDF compression should be disabled by default
assert kwargs["uncompressed_pdf"] is True

assert kwargs["presentational_hints"] is True
assert kwargs["optimize_images"] is True
assert kwargs["pdf_version"] == "1.5"
assert kwargs["jpeg_quality"] == 50


def test_abstract_document_clean_pdf_options():
"""
When pdf_options are passed to the create method, they should be cleaned.
"""

# pylint: disable=missing-class-docstring
class ContextModel(BaseModel):
pass

# pylint: disable=missing-class-docstring
class ContextQueryModel(BaseModel):
pass

class TestDocument(AbstractDocument):
context_model = ContextModel
context_query_model = ContextQueryModel

def get_html(self):
return Template("<body>An empty document</body>")

def get_css(self):
return Template("")

def fetch_context(self):
return {}

test_document = TestDocument()

with patch.object(Document, "write_pdf") as mocked_write_pdf:
test_document_file_path = test_document.create(
pdf_options={
"target": "unknown.pdf",
"zoom": 2,
"unknown_option": "unknown",
"jpeg_quality": 50,
"presentational_hints": True,
"uncompressed_pdf": False,
}
)

mocked_write_pdf.assert_called_once()
kwargs = mocked_write_pdf.call_args[1]

# Valid options should have been kept
assert kwargs["presentational_hints"] is True
assert kwargs["uncompressed_pdf"] is False
assert kwargs["jpeg_quality"] == 50

# Invalid options should have been removed
assert kwargs.get("unknown_option") is None
assert kwargs["zoom"] == 1
assert kwargs["target"] != "unknown.pdf"
assert kwargs["target"] == test_document_file_path


def test_abstract_document_jinja_template_engine(settings):
"""Test document rendering with jinja templates"""

Expand Down

0 comments on commit f71b888

Please sign in to comment.