From cf27d2ca1fa5bda051d9ed79a45461a2c80e127e Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:51:20 -0700 Subject: [PATCH 1/6] Fixes an issue with merging PDFs and the ordering --- .docker/docker-compose.ci-test.yml | 6 +-- .github/workflows/ci.yml | 9 +++- CHANGELOG.md | 10 ++++ src/gotenberg_client/_base.py | 7 ++- src/gotenberg_client/_merge.py | 11 +++- src/gotenberg_client/_types_compat.py | 4 +- src/gotenberg_client/_utils.py | 2 +- tests/samples/a_merge_second.pdf | Bin 0 -> 6723 bytes tests/samples/sample1.pdf | Bin tests/samples/z_first_merge.pdf | Bin 0 -> 6714 bytes tests/test_merge.py | 69 ++++++++++++++++++-------- tests/utils.py | 12 ++--- 12 files changed, 91 insertions(+), 39 deletions(-) create mode 100644 tests/samples/a_merge_second.pdf mode change 100755 => 100644 tests/samples/sample1.pdf create mode 100644 tests/samples/z_first_merge.pdf diff --git a/.docker/docker-compose.ci-test.yml b/.docker/docker-compose.ci-test.yml index e47ea4a..c01b639 100644 --- a/.docker/docker-compose.ci-test.yml +++ b/.docker/docker-compose.ci-test.yml @@ -4,9 +4,9 @@ version: "3" services: - gotenberg: + gotenberg-client-test-server: image: docker.io/gotenberg/gotenberg:7.9.2 - hostname: gotenberg - container_name: gotenberg + hostname: gotenberg-client-test-server + container_name: gotenberg-client-test-server network_mode: host restart: unless-stopped diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d166e59..4f5e6da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,11 @@ jobs: docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml up --detach echo "Wait for container to be started" sleep 5 + - + name: Install poppler-utils + run: | + sudo apt-get update + sudo apt-get install --yes --no-install-recommends poppler-utils - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 @@ -72,7 +77,9 @@ jobs: cache: 'pip' - name: Install Hatch - run: pip install --upgrade hatch + run: | + python3 -m pip install --upgrade pip + pip install --upgrade hatch - name: Run tests run: hatch run cov diff --git a/CHANGELOG.md b/CHANGELOG.md index de445a1..9130e4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- An issue with the sorting of merging PDFs. Expanded testing to cover the merged ordering + +### Changed + +- Multiple merge calls on the same route will maintain the ordering of all files, rather than just per merge call + ## [0.2.0] - 2023-10-16 ### Added diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index 41a69e8..4beb317 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -79,23 +79,22 @@ def get_files(self) -> RequestFiles: files = {} for filename in self._file_map: file_path = self._file_map[filename] - # Gotenberg requires these to have the specific name - filepath_name = filename if filename in {"index.html", "header.html", "footer.html"} else file_path.name # Helpful but not necessary to provide the mime type when possible mime_type = guess_mime_type(file_path) if mime_type is not None: files.update( - {filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")), mime_type)}, + {filename: (filename, self._stack.enter_context(file_path.open("rb")), mime_type)}, ) else: # pragma: no cover - files.update({filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")))}) # type: ignore + files.update({filename: (filename, self._stack.enter_context(file_path.open("rb")))}) # type: ignore return files def _add_file_map(self, filepath: Path, name: Optional[str] = None) -> None: """ Small helper to handle bookkeeping of files for later opening. The name is optional to support those things which are required to have a certain name + generally for ordering or just to be found at all """ if name is None: name = filepath.name diff --git a/src/gotenberg_client/_merge.py b/src/gotenberg_client/_merge.py index 52848a1..70ff94e 100644 --- a/src/gotenberg_client/_merge.py +++ b/src/gotenberg_client/_merge.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import List +from httpx import Client + from gotenberg_client._base import BaseApi from gotenberg_client._base import BaseRoute @@ -13,15 +15,20 @@ class MergeRoute(BaseRoute): Handles the merging of a given set of files """ + def __init__(self, client: Client, api_route: str) -> None: + super().__init__(client, api_route) + self._next = 1 + def merge(self, files: List[Path]) -> "MergeRoute": """ Adds the given files into the file mapping. This method will maintain the ordering of the list. Calling this method multiple times may not merge in the expected ordering """ - for idx, filepath in enumerate(files): + for filepath in files: # Include index to enforce ordering - self._add_file_map(filepath, f"{idx}_{filepath.name}") + self._add_file_map(filepath, f"{self._next}_{filepath.name}") + self._next += 1 return self diff --git a/src/gotenberg_client/_types_compat.py b/src/gotenberg_client/_types_compat.py index 91bfbc1..8ccf339 100644 --- a/src/gotenberg_client/_types_compat.py +++ b/src/gotenberg_client/_types_compat.py @@ -4,7 +4,7 @@ import sys -if sys.version_info >= (3, 11): +if sys.version_info >= (3, 11): # pragma: no cover from typing import Self -else: +else: # pragma: no cover from typing_extensions import Self # noqa: F401 diff --git a/src/gotenberg_client/_utils.py b/src/gotenberg_client/_utils.py index eec25ff..5c80146 100644 --- a/src/gotenberg_client/_utils.py +++ b/src/gotenberg_client/_utils.py @@ -19,7 +19,7 @@ def optional_to_form(value: Optional[Union[bool, int, float, str]], name: str) - return {name: str(value).lower()} -def guess_mime_type_stdlib(url: Path) -> Optional[str]: +def guess_mime_type_stdlib(url: Path) -> Optional[str]: # pragma: no cover """ Uses the standard library to guess a mimetype """ diff --git a/tests/samples/a_merge_second.pdf b/tests/samples/a_merge_second.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2955c8d5d133267d97e5279dc7a30886fdd95fb9 GIT binary patch literal 6723 zcmb7}2{@GB+s74!K`47fcWsN+zW7!)dY6r!XA z!4n;5tHXbf!q)>2NUPzfI5!djTntCRQvd{LVEo$znZG7z_)xV>sNjMSX%iKIO7^1N z=mStGC>$bfj{{&*P%sOX%~l0#0yyH_D0CYf#TAe?AQADu;z4e?w+5G|Qt@OWAgzw~ zcDBbGYpL4)2paWQ476Bj5tH#m>Q>0oY9t~R>{0+(dJ59UcnZmjY!BuHgKmzf5Q!x4 zIgGXy#t>{(p*aaEFyinmI)4^6`Ef7pOpX|(@ZtNyy#Y%XaN1w z$kOnv3*Mgo05tklP*wM42w=A{aC%x)>%3L#8F5JWOY2np6u)a9=^?@8{pi*^KnvN1-y=H!KEWu@S7I0~E!0d=%FC<#Y^&Jh?{83Z`0hV#(II}@D#!`Ot1cQ*%Q z5Kg$9y zo|k5_FzRNWxLG9^DG`Q{E0Ab?FnRx5Y4()dRHLkX?EAaXlOxdg(XFwQCu-CEXVwK- zohKO&rVdIk`L=#mn5p~}Bz%<%9y2A(f1zyUZlbQTW-xTA8>l@5eCR)5#XT5R&=N)onW~9lqKOLjpo=iA3)A)@G~+E9)fQ+w64nWbGI} zbWq=BrE?FDY+Y<{nd?;J#h3$jpFdCc`rHX_-!sQ%Q3RRqPvuQ?;1diAZ&i5sITYzW zKb-ykU;fkMLgL3tzGumN79H>GADEfFeO0Q^!ygqQ70a+Y=kkk?+Pg0~GjzBFsP}uC zm&qC(WbHTO7XN;>@trJG9;$r5bmDG#W!{Rab(QbCXBS=j1jlCm_6r`RRxJ*0%r^Od zrc^l*gL=o!*mye%zmWPK4-8rVd!D!T^t!p&*cl;WW0PW_X~qQX3N)Ud_?G+L<_Dy@!S|--SDI7wUl3wjS+$Q3`qRH&kM*W zSi%NH(iA?p-K5q|*Bl6Du(>+LR85J-9FFU0BPE;hcI&)#S;;wO;(4j~*6zgmM_5EE zY$!7Qs#s!7`Pwy$kE14Xabr(*j7yJ*nuc1CGbe(R(KWOtJuq9kfAaj zJ$`XV#f0i9lY!34BV`u{J(i)8l9TaMr%%gyeESk_yykMRAGh@2fO%4)@jk18;l-W< z1B0E5Z{Xp}OxlmQ7hmcbGm!a&w|_K#@gnE!sl4F}U978p7E8#N==K5sqJTwIV2gXH zq?`MK;5Ugmg_xqTG4{ZH19CxnIv<)PcQ#c#s2kcbu@>qlZ}duZCHi6LfudX|ZliSh zR4nJ!C6nXR0o%UsuHW-%>`lQ_1`gAKimy^V59b#WE-7B}w^R0==R0>2?cQ6iW9Q*} z)oN12Xl7?)JO6-9PrXr;w4-_*TIcE4(=`dc-+gYbE2;rG7rKoXQ`z@~A&A+?t_4imFpJb_13Cp9k?<4B3St!*}z4qVj z=jck)n)t`u9A;_S%c4iZyp1SGLMSDOy1JOf&z6Jzr08eol6Qaz{;N@Z7`Ji4?-qMh@b+?12KS z%E9S!`L;&gu8m~yFd#Vd3FX4%5$ zqiLkZ6wNri{&7klty3l?mbb;5HlgCmZf19oOXjYDEa#Kp>Lk+4b>zr$W-kHQ`g_iv~Wn-tR zy1FN3<_m>vD$fq;^BTHtEH1{5c5*e%hfow0e?`MpIL9;C!Rfkn2$OLeuK1MXt~EH@n5$K2=ZL4ZRp-Q_<=qQb zyLHEt&^C`LU5y(hwQGIKZ6-|BS9fRk?Qku5c1iXAliSDjEtd9+Xk#}B@EeC1jatvKWO*!nUiHJINJNA3xWvO7N-P=Zv6^u_ z_L?`AgcZW~AillqRp0;Q=JsA>Fgl)ZI67x%PyYq{_G$x_p3<9AD%F059SH3sE0!G` zA4F3QOt+N3d{O^i>`2;6^NdLeR^YAiKyWCEcbM%C*e0fOU*lQqR{@whz;*B$N% zgb0YL-gIE*7ZrYExKmqT_OkICw#xJDme;O~e&EerIn%|`JbpyQ35j!!WXdEkmrZ8v zjVR}Ks?T)42;Vz^zXfqg^$28azEs;N+q`SH^3)gdnAf%lyB^`_??8aVo1!;SxZ^BC zDwd@j>D!Wf*9p^}^Z&4W6Y(SMSOdcb%Zl9t$KxZX0~yW#r+`6b@tJ6>fl( zY)fx?XQID4Y=w0h?x;>P%~TH)-&^}|yZMWE?ab~ur7YTewx6<*?>}?%U*~w{=+axOfepHXr7C_L}>PFq=_ih`RZ{PI374 z1)dt^9R#$@AL=Vg?7ge>y**8BFwt-zp zuk%~Z2Ug?GH?ytfGd@#CSJh@gK zXKdjdv|A;xsi#R1xKMTZy*QtI37=CegO>(pUcq&-x%E3qD({L`7)!;qGPr7z05 zkPo(li=aJRpc_>il)3MzPgbu+O{&$@WwyeJ^2{j){XMIXQmT)I`~O3*NWJhjCBoCU za%olUe(U{Xf$WMzmb|qqXL;(8D>)@O8ZU?R1Z4Yma_2FY#P~&K?27JF@+k^6Kk!_I z?}qlSW=b=(R;;eYDNZW-atoAnuujV$`(S!un;oZObVam#7rFMEdFG2QIpB^}m_VR* zImbjVbmpQRXMp^3g^Fjp6elxVCJYpWWNXYVU%W5TeeM)n#<+?%d*)y&^&uCSQ!O|+ zhQ9pTFLPSek3ARzxIFVKh$>~h0>7RdyAQ9~zAI9p=j@}&VSkS1IpGbztJkyLHTYA` z+-Mp(!4zGp@v`v~Y@Hg#?RY5s6{Ct%v&&c>d)y(jnQm^p@N=K5#Jnhlh{Pq4>%o8*pr)yur9OSFm#SQv5EYAMHE zwyupW+*P*sfFxgqn-<2n(mP8`&cj?$XUatJ7VD$#%eE!tcH%yDwPs+`A^$ zd+0_K{IP+h+}^C{Q}?vepf*a0rDtE5HtG=TxR#Cf?CN+FJ=hj&_emw#{_#7t{U!uq z<>!(f`4W#|`_W0~vZf22Dn?cM6N{Us} z-H-XOW{;pqJ)Nd-u~&JPoHf2oA!@S?Nm3ha^H5mCWQN62C$YwY$JT<0N~hz+T9QPQ zR{b0d_E3zxYYj0GZ)7~&54RC?V6*mk%kk+3@i`xuo{yu_-OEBl&v`xnx$$ngR{V*R zb-WC+_f$^8!DB(wc3j4X_SXvXos3)ZxTSYzTUyJ4UH$D?5xyen(X=a~CmF85{m19( zgOclS@wbg;>aZ_lFEPESy7D14iMKu{9xL>IJ65m`(NBs6t{dhny}=#lTNHfmm4&^z zaz9c*<@sKNn+5aFP2)@#btlHTc|4_7^qGU%c#`J})elccefXCrCF!eT!fLF;o|&b{ zU6YYL%uzcko;r!g)SX`@!o{n*IJ+XF+MD9Ik-0@}{b}~;0e9HXTuAyVFAClHIze=C zkCg4h9>v#Hx2UHQy;9oyzf{e1?$gvCz1!uTT9RP-5t{o^u=enPe?qQL(s(_lvz0Tuil$7^f!q8tK`~Iax!qx zs_1h0s$Pf|POO#VPIJ58LJgDCxr2c-s9w@Bg{1BhyCZAE#s2E+ff@~djL8{u0?-`C zBj3J7=3Z|Qk98r))$9oO4F8_ar}o&Ugu7TI@!BFWopSm1>yHj%rV}mDs85QY;86sr8|S(9p3CEMNghq$RXLX|z6947(_;*j+3y7WkDxUU@YJa*k&*^4o9KEMKX>Ef=&e*mQ(^1G{K8r zg#UOB-<);U@lW%uS@eeP0|xp4$uQ{38f?)BdKZP~%qMuy%%G4uvWivbFi z*&LKX(R=949tuwHq0~1Aw|Z!L4-Pt`v&bI-g4;+h^w09xdy#1*m%bbHqS1=sOvL}( zFb|RkZS(0L+Si|mk4B|7$<_a&Z(4;xDE+?)TZ0G|lqTArqLbAAf!-W&L@MntPylNf z9npf|&XiWYh0p93Z*hKLVXOCJpEd|KZ$v`(R8WQ2`o=rbAc?6-TDhw+RK81rhI&BN~R9 GkpBUk(v literal 0 HcmV?d00001 diff --git a/tests/samples/sample1.pdf b/tests/samples/sample1.pdf old mode 100755 new mode 100644 diff --git a/tests/samples/z_first_merge.pdf b/tests/samples/z_first_merge.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4f74613f97a8cf297ce4c453e346b34c54a5364c GIT binary patch literal 6714 zcmb7}2{@Ep`@k&~O`*t69$C`DEM|tW4`YzXE<_E+G?p2Sv4&KNl9VOc${Jr7E2Zi3!Q}wzT8=eEYABNP#qePK&R3GIq=?uMs@afB7zT~ zs3|A}u%K8JSm4Ze^bcV&m4YMCL3=nA1Pp~UVt()1gII&i|S2t0%L;1)@QI33Kd+3GfH6w zff^B=$pmexA7IB=M`2J3Fy;j)BM}?7_P=FnCNgY!(ZMG4 zfcPb+L=Qvifn8y9PtB7BTk7P+ah*LqOPzw@y5~<^f4?fJRKfXrAZG-#@^FqX_V^%I z*jE3Up_heaKETa=I1%NigLByBnJ(9+=fw|$Lqk|q-o0Kq&hCCSR(T0J(#jpjF(eWz zvdT{6z4Kb{=2y;HzbgX%+H0E{@{Z*@N?n#tls+B3OB!>n@mgba=Hm~YzN(~5OX<6c z&lH5oi(iq#WO?1pqxx^+p?wcI`2Cit%-06;C6M}U!G z9MQ{(M)m|x-g?H32_E3#Fg3!0*8zQklbgI5k>u@8U@W^6NM2yQ)&;eh9VCxLqZE`8 zFh$V3JOXSbBpe1up#d}+u7F0sVHiaKql{5dLZTVB_7JE6cyWz!@j%_fyVX|FcEl;IUCcGF;ne8TfkhLI1Cwd17R$>dzfi}Fppz#)BUtOdOcYF)L|HB(-0-%nAs z5`Wb3zN5TAwWN@;b~3wcx--99QzfNiimsy4LC;o9DYpwhJzM$twDgvu{2BcR$7dJR zf+q&No~~-kNjh)o(=+2F&)?jCNb|t*mVn)1&t51MmH58BDp-{koU&u^QgB^w@4~f` zlxw9KouuiotHKc|DE6zz$>DB^E3-$}ZeOWLIPhMIn&%&gIU@kt1Ir zT|pXQ$!j-E&uXjXYtiK;?~DHP_^51lO`+bjx&zR^@eCxiWNonoUHl1=s0!QO5UOgVq zORq=TTUlYP&en$4N>t$t$*21zGX7D^RI*x6`%k<`aG!n*`kjNJPBFR7Bq90DEJfER12y_1kBVnrjk;162vZ2FJ7R ze2kR1b7Q2%x(y|2&+M^1Ze12Y?cUbW{BYz#3ADy@JR&d8 z(y|Mms9O;=*0oaSkyYwgnO#tg^zGPV-B3I1?eo04{8Fu>opVcD>#drPx#pjJ&u-!w z7V7O_nP#cK8*;&t2R40RY5TE}ttYZ~t-0x_vM;LzVSMUZAFQUA=P%hu&T-mTD}2h8 z;(MfLZK1YRD5fN&$t4w;L{KXXX-!JWGVJE(XWhjghb|Bv%K)nDQ~W5@M;47Yq<#r{PO9}?pm=JotW>_NeW_D;TXi{N@VTz& z=REk>8P$4sPkuRHwzmma6mlYiy-_mA`!;WGf0?Pe8{g4*kR*EyXkaa}Vm~@fDGtv- z9btF1^fbMEVX_m$s`BK9TgGX*LB)Wj67`IX3~QbT$;Q2BA0SsQ9$oWAuJRzjm63;G_C}p+qW6q49^8n=>K)S^Sev0z^q+E$5_)j==XEsJ<9P zuh!IiG(mP)O?8`XU1(mKUX{AlbNMaCrVrD8z*hZ{s>hA$&BBwD!rML`EblOm@E|w@ zY{?Ry^xsApdn{=`-gsQ~Mx04WS$+nzvm%Sye$gjlX*o;glSq7S&(PRCn{64T&V8?3 ztz!m9U2WZ4X1q8Dv#Lryq(C{CLpSFEBx>t7g#cFL~ zUFv@pv!mZHCu$B%(Bk6EY;9+0RJQgl4~r$QZ5q4DQN{O(_ds7j(DmIy8CjkgjuD2e z`6Kr+pH{=pxSaVIlK-)kg1=uvPSfz%qMm|wN+^L_7bankkgNW|^N3GzHEP@v>Qei% zEiA*!`+AK}Z==JL^QzO+Xfs<&3p-83eZfv;f!tmZ>=voJF4s9q%h+~|yO$=!c|>Vb zlyfhLMjPBTC-?=fS)mD3`!YiHbOK`95{Gir>P&wbFDiKv`V=KRsKA$-c#M*9sc1o{Epd8cC}3pv#qr{X<#Ej- zH_i{KxV^p)6H8AVeYL~PY;Cg4DN~|tpE8kzg=??!#)hTskRzpuh0-ho z>sX0Ou|TYo37+sINo%iYx;C`$$k4gG5tS!dT#Fhj(yN8d_k_CwV;v31x*Y*#>^JzL z2N!(>{e?SD+`I5V@1?=h>0Plkz?4Y%TZ!-h$Xy^_*WPfv`>^Se3;I^FQ!0q#-N2M$ zi?&gG&Qob(;$ZqMtSd5GH%Q==TfwCjd;jKLNjXnttF~*jm}*NO_KB9{1%fhf<0Q|> zNveBViD=z;>%7@!D~ji_$VH5LBTv4HI~T9S;`>k^RB3tXxgxBVp3W3h#N%8UQG>axf&;soR)@;S@DeV7SD^^Z=TPv9yl4|8w2u5pdb z@=E7a@gqZr%@&y`%l$!F8aixQK4Su``?R$hSVV*_$mX`l`W1vkZB}#HY*Jj$T^kjf zfoIR*kMZWuyIh|w_-cE}j+1;WwqhwWMbGs2@64b*e|fP?($DrRPi}Afm-{{3eg!+0 zw&WFt*4vhSa5mF3eMTD*8s@GqG3hB)!* zA8R9O{|Y}3q`@!T{p`?g+`*9ABzNDy;lc4HxXU0*fe`M9lX_sB#HwC&-quf+qJdR! zT(TQTDk-j`m75=wu@5FKiC}q@IK|F~dswtQ{B|sj58qf>x!rmYmSzHN z1CX7$t@-|6->78#!(GYI6%nW067-M6qi=ied`^o!U(az=ZLZ0*d2eV}4u@3{XBS5h zu{gd6rP^iam-kjoTMMsC24yHDB4mn($+u~YkO~T*Z_20$z&t+MS(qWbsfx0MO>wUqz+SOby+tq=P zs)%+|Ss!PfPO&Xhyxf;^6ZS5wX7^xU7kptak&CAb)!aFJVQ(%+TE~OMHjlbC+cpK4 z`<2_5xLVA7GY$s2;L3(&c;sVd2-jJ~0pE^&T6Vx5U zNQGN@9!lD@`%QrUC%1N3*~D)>u)Ui_(S--cBvjm zbJLca6|8Thvb!KpT~1O%cjNm*JbGvkzqzC~^{4>ZHeq7w`eooBbJ)`|9JBdl)` zw&Y^dp%NQA*$8ut6*f}UA8Qtv9%LDwK5>g<**c@+*ejRkY23|~3f!?Fr_d1lWwnb( zs)Pt=$B&hxQsg-987{W3R>GP-_zISZNd3Cq_&!(kqe!bUMK_cEcci*|BIR`tQ1+?1 zHkc66ITr%>VgrP$tO98^HwUF%UEd3RbU&6TGifb<%LiAm*=|6@$Gt2q|K-VqBI1Fm z>n&yM>Ro3`?wLA8y_fWUS7US%+p3C*5{Q#dmMF=$6===nGJn6*SXX2iIG7@r#%fxK zKYa7p?t~KOCS02trCIs|z{}k@6f)F#*bfCY9;$#J!G2A9Je63)Bl^CHSB7^bH3~9X0M=^ zvcrxyR(??u|0aKp+E&4vmU-ayu$i1g^XJ2_Rih~bbCPj+4UQ7}0x!8U&Ps(W1mK3f~dd{zIvwE|)v%uc*v&vQ(?KKtIzDnI$z*srCMqZ%pSxF7d4y*V%=MIOp; zyxy>7%u`k=npOSNsB6gyS4MJZcNXkU>>%aL z`R^QSM~Mm?;3e;y8^=`W^CGiV(s-#C32Jkg_?>xrx-RvL=D|8!ui0;{7)=Z9e@;hV zqbG2m`Myp2=s*@*IIMkdL-E5P#PXr;BmsXi0U%i;kwMPl56EFb zs7TxgK#$Ozi8L~W1jrdUgBXKM_Xj7`4G;)} zrtH8Gjb=WgG0Z^;u|6oS4@l+!TVF#lHiyBG>yyY0Jv?&~y{?Bwtv}(`2c`94T>+u1 zv5p{rI0%-JndqPCaq^}y04}o{%%m}rL8cIY7R;0C$tXVa#d!M@?lE}OIR<&Rzh-4zdpEz>RgN-ryO6Gk*_Yb~Q-Lad`*WMU)ekay`G_bc|09+ZuXomF*K%-EKD8L2yImZZwaRHQ{b8sXGPXC-k zg9!ECb1*mtM9Y85!4M$d{P&!qB1qW(ltW>Z|K1mk`b!Rm0X_SdE)0nVYx1W(82ax$ zU>Nvcalql2zv4h4kblL2LZZO_`g0F79Pw9dCUx2M;>!>0N5$YAV6h) kaBlqQU`D0V0S1C*;#V&^fktPR35`$!iL#80?h!oXznpN;>Hq)$ literal 0 HcmV?d00001 diff --git a/tests/test_merge.py b/tests/test_merge.py index fc869fc..3239dba 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,7 +1,7 @@ import shutil +import subprocess import tempfile from pathlib import Path -from typing import List import pikepdf import pytest @@ -15,18 +15,27 @@ from tests.utils import call_run_with_server_error_handling -@pytest.fixture() -def create_files(): +def extract_text(pdf_path: Path) -> str: """ - Creates 2 files in a temporary directory and cleans them up - after their use + Using pdftotext from poppler, extracts the text of a PDF into a file, + then reads the file contents and returns it """ - temp_dir = Path(tempfile.mkdtemp()) - test_file = SAMPLE_DIR / "sample1.pdf" - other_test_file = temp_dir / "sample2.pdf" - other_test_file.write_bytes(test_file.read_bytes()) - yield [test_file, other_test_file] - shutil.rmtree(temp_dir, ignore_errors=True) + with tempfile.NamedTemporaryFile( + mode="w+", + ) as tmp: + subprocess.run( + [ # noqa: S603 + shutil.which("pdftotext"), + "-q", + "-layout", + "-enc", + "UTF-8", + str(pdf_path), + tmp.name, + ], + check=True, + ) + return tmp.read() class TestMergePdfs: @@ -37,12 +46,15 @@ class TestMergePdfs: def test_merge_files_pdf_a( self, client: GotenbergClient, - create_files: List[Path], gt_format: PdfAFormat, pike_format: str, ): with client.merge.merge() as route: - resp = call_run_with_server_error_handling(route.merge(create_files).pdf_format(gt_format)) + resp = call_run_with_server_error_handling( + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]).pdf_format( + gt_format, + ), + ) assert resp.status_code == codes.OK assert "Content-Type" in resp.headers @@ -58,14 +70,31 @@ def test_merge_files_pdf_a( if SAVE_OUTPUTS: (SAVE_DIR / f"test_libre_office_convert_xlsx_format_{pike_format}.pdf").write_bytes(resp.content) - def test_pdf_a_multiple_file( + def test_merge_multiple_file( self, client: GotenbergClient, - create_files: List[Path], ): - with client.merge.merge() as route: - resp = call_run_with_server_error_handling(route.merge(create_files)) + if shutil.which("pdftotext") is None: + pytest.skip("No pdftotext executable found") + else: + with client.merge.merge() as route: + # By default, these would not merge correctly + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]) + resp = call_run_with_server_error_handling(route) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.NamedTemporaryFile(mode="wb") as tmp: + tmp.write(resp.content) + + text = extract_text(tmp.name) + lines = text.split("\n") + # Extra is empty line + assert len(lines) == 3 + assert "first PDF to be merged." in lines[0] + assert "second PDF to be merged." in lines[1] - assert resp.status_code == codes.OK - assert "Content-Type" in resp.headers - assert resp.headers["Content-Type"] == "application/pdf" + if SAVE_OUTPUTS: + (SAVE_DIR / "test_pdf_a_multiple_file.pdf").write_bytes(resp.content) diff --git a/tests/utils.py b/tests/utils.py index 0db432b..29f97d7 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -33,16 +33,16 @@ def call_run_with_server_error_handling(route: BaseRoute) -> Response: one attempt to parse. This will wait the following: - - Attempt 1 - 20s following failure - - Attempt 2 - 40s following failure - - Attempt 3 - 80s following failure - - Attempt 4 - 160s - - Attempt 5 - 320s + - Attempt 1 - 5s following failure + - Attempt 2 - 10s following failure + - Attempt 3 - 20s following failure + - Attempt 4 - 40s following failure + - Attempt 5 - 80s following failure """ result = None succeeded = False - retry_time = 20.0 + retry_time = 5.0 retry_count = 0 max_retry_count = 5 From e4483336483232d123e28ea3ca46b349e1ff6f86 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Oct 2023 09:45:18 -0700 Subject: [PATCH 2/6] Enables additional Ruff linting rules --- .pre-commit-config.yaml | 4 ++-- pyproject.toml | 23 ++++++++++++++++++++--- src/gotenberg_client/_base.py | 2 +- src/gotenberg_client/_client.py | 2 -- src/gotenberg_client/_health.py | 6 +++--- src/gotenberg_client/_utils.py | 4 ++-- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3680700..a50d88d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,10 +37,10 @@ repos: exclude: "(^Pipfile\\.lock$)" # Python hooks - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.0.292' + rev: 'v0.1.0' hooks: - id: ruff - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.10.0 hooks: - id: black diff --git a/pyproject.toml b/pyproject.toml index b11e949..14cc037 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,7 +90,7 @@ python = ["3.8", "3.9", "3.10", "3.11", "3.12"] [tool.hatch.envs.pre-commit] dependencies = [ - "pre-commit>=3.4.0", + "pre-commit>=3.5.0", ] [tool.hatch.envs.pre-commit.scripts] @@ -101,8 +101,8 @@ update = ["pre-commit autoupdate"] detached = true dependencies = [ "black>=23.9.1", - "mypy>=1.0.0", - "ruff>=0.0.292", + "mypy>=1.6.0", + "ruff>=0.1.0", "httpx", ] @@ -112,6 +112,7 @@ typing = [ "mypy --install-types --non-interactive {args:src/gotenberg_client}" ] style = [ + "ruff --version", "ruff {args:.}", "black --check --diff {args:.}", ] @@ -135,25 +136,33 @@ fix = true output-format = "grouped" target-version = "py38" line-length = 120 +# https://docs.astral.sh/ruff/rules/ extend-select = [ "A", "ARG", "B", "C", + "C4", "COM", "DTZ", "E", "EM", + "ERA", "EXE", "F", "FBT", + "FLY", "I", "ICN", "INP", + "INT", "ISC", "N", + "PERF", "PIE", + "PGH", "PTH", + "PL", "PLC", "PLE", "PLR", @@ -164,8 +173,14 @@ extend-select = [ "RUF", "S", "SIM", + "SLF", "T", + "T10", + "T20", + "TCH", + "TD", "TID", + "TRY", "UP", "W", "YTT", @@ -179,6 +194,8 @@ ignore = [ "S105", "S106", "S107", # Ignore complexity "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", + # Ignore no author and missing issue link in TODO tags + "TD002", "TD003" ] [tool.ruff.isort] diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index 4beb317..ed9b333 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -87,7 +87,7 @@ def get_files(self) -> RequestFiles: {filename: (filename, self._stack.enter_context(file_path.open("rb")), mime_type)}, ) else: # pragma: no cover - files.update({filename: (filename, self._stack.enter_context(file_path.open("rb")))}) # type: ignore + files.update({filename: (filename, self._stack.enter_context(file_path.open("rb")))}) # type: ignore [dict-item] return files def _add_file_map(self, filepath: Path, name: Optional[str] = None) -> None: diff --git a/src/gotenberg_client/_client.py b/src/gotenberg_client/_client.py index 6a8ce84..dc6bf4c 100644 --- a/src/gotenberg_client/_client.py +++ b/src/gotenberg_client/_client.py @@ -37,7 +37,6 @@ def __init__( logging.getLogger("httpx").setLevel(log_level) logging.getLogger("httpcore").setLevel(log_level) - # TODO Brotli? if find_spec("brotli") is not None: self._client.headers.update({"Accept-Encoding": "gzip,deflate,br"}) else: @@ -49,7 +48,6 @@ def __init__( self.pdf_a = PdfAApi(self._client) self.merge = MergeApi(self._client) self.health = HealthCheckApi(self._client) - # TODO def add_headers(self, header: Dict[str, str]) -> None: # pragma: no cover """ diff --git a/src/gotenberg_client/_health.py b/src/gotenberg_client/_health.py index 3f717bc..479ee74 100644 --- a/src/gotenberg_client/_health.py +++ b/src/gotenberg_client/_health.py @@ -78,9 +78,9 @@ def _extract_status(self, module: ModuleOptions) -> ModuleStatus: status = StatusOptions(self.data["details"][module.value]["status"]) # mypy is quite wrong here, it's clearly marked as a datetime.datetime, not Any - timestamp = self._extract_datetime(self.data["details"][module.value]["timestamp"]) # type: ignore - # Also wrong here - return ModuleStatus(status, timestamp) # type: ignore + # but ... + timestamp: datetime.datetime = self._extract_datetime(self.data["details"][module.value]["timestamp"]) + return ModuleStatus(status, timestamp) @staticmethod @no_type_check diff --git a/src/gotenberg_client/_utils.py b/src/gotenberg_client/_utils.py index 5c80146..72977aa 100644 --- a/src/gotenberg_client/_utils.py +++ b/src/gotenberg_client/_utils.py @@ -33,9 +33,9 @@ def guess_mime_type_magic(url: Path) -> Optional[str]: """ Uses libmagic to guess the mimetype """ - import magic # type: ignore + import magic # type: ignore [import-not-found] - return magic.from_file(url, mime=True) # type: ignore + return magic.from_file(url, mime=True) # type: ignore [misc] # Use the best option From 82e6cb7bda0bedf165dc3292e27af90aec93191c Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Oct 2023 09:56:23 -0700 Subject: [PATCH 3/6] Adds additional project classifiers --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 14cc037..6436f7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ description = 'A Python client for interfacing with the Gotenberg API' readme = "README.md" requires-python = ">=3.8" license = "MPL-2.0" -keywords = [] +keywords = ["api", "pdf", "html", "client"] authors = [ { name = "Trenton H", email = "rda0128ou@mozmail.com" }, ] @@ -18,7 +18,7 @@ classifiers = [ "Operating System :: OS Independent", "Intended Audience :: Developers", "Environment :: Web Environment", - "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", From 9f2440580b796e5857cabb04fe292f29ee50e962 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:51:09 -0700 Subject: [PATCH 4/6] Adds support for output filename and request tracing --- CHANGELOG.md | 4 ++ src/gotenberg_client/_base.py | 13 ++++++- src/gotenberg_client/_convert/chromium.py | 2 +- tests/test_merge.py | 25 +------------ tests/test_misc_stuff.py | 45 +++++++++++++++++++++++ tests/utils.py | 27 ++++++++++++++ 6 files changed, 89 insertions(+), 27 deletions(-) create mode 100644 tests/test_misc_stuff.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9130e4e..06925db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Support for the output filename and request tracing for all routes + ### Fixed - An issue with the sorting of merging PDFs. Expanded testing to cover the merged ordering diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index ed9b333..1701039 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -32,6 +32,7 @@ def __init__(self, client: Client, api_route: str) -> None: self._stack = ExitStack() self._form_data: Dict[str, str] = {} self._file_map: Dict[str, Path] = {} + self._headers: Dict[str, str] = {} def __enter__(self) -> Self: self.reset() @@ -66,11 +67,11 @@ def run(self) -> Response: Response. TODO: It would be nice to return a simpler response to the user """ - resp = self._client.post(url=self._route, data=self._form_data, files=self.get_files()) + resp = self._client.post(url=self._route, headers=self._headers, data=self._form_data, files=self._get_files()) resp.raise_for_status() return resp - def get_files(self) -> RequestFiles: + def _get_files(self) -> RequestFiles: """ Deals with opening all provided files for multi-part uploads, including pushing their new contexts onto the stack to ensure resources like file @@ -110,6 +111,14 @@ def pdf_format(self, pdf_format: PdfAFormat) -> "BaseRoute": self._form_data.update(pdf_format.to_form()) return self + def trace(self, trace_id: str) -> "BaseRoute": + self._headers["Gotenberg-Trace"] = trace_id + return self + + def output_name(self, filename: str) -> "BaseRoute": + self._headers["Gotenberg-Output-Filename"] = filename + return self + class BaseApi: """ diff --git a/src/gotenberg_client/_convert/chromium.py b/src/gotenberg_client/_convert/chromium.py index 5ffc18f..dfbc461 100644 --- a/src/gotenberg_client/_convert/chromium.py +++ b/src/gotenberg_client/_convert/chromium.py @@ -140,7 +140,7 @@ def url(self, url: str) -> Self: self._form_data["url"] = url return self - def get_files(self) -> ForceMultipartDict: + def _get_files(self) -> ForceMultipartDict: return FORCE_MULTIPART diff --git a/tests/test_merge.py b/tests/test_merge.py index 3239dba..49be37a 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -1,5 +1,4 @@ import shutil -import subprocess import tempfile from pathlib import Path @@ -13,29 +12,7 @@ from tests.conftest import SAVE_DIR from tests.conftest import SAVE_OUTPUTS from tests.utils import call_run_with_server_error_handling - - -def extract_text(pdf_path: Path) -> str: - """ - Using pdftotext from poppler, extracts the text of a PDF into a file, - then reads the file contents and returns it - """ - with tempfile.NamedTemporaryFile( - mode="w+", - ) as tmp: - subprocess.run( - [ # noqa: S603 - shutil.which("pdftotext"), - "-q", - "-layout", - "-enc", - "UTF-8", - str(pdf_path), - tmp.name, - ], - check=True, - ) - return tmp.read() +from tests.utils import extract_text class TestMergePdfs: diff --git a/tests/test_misc_stuff.py b/tests/test_misc_stuff.py new file mode 100644 index 0000000..f6e9cb4 --- /dev/null +++ b/tests/test_misc_stuff.py @@ -0,0 +1,45 @@ +import uuid + +from httpx import codes + +from gotenberg_client._client import GotenbergClient +from tests.conftest import SAMPLE_DIR +from tests.utils import call_run_with_server_error_handling + + +class TestMiscFunctionality: + def test_trace_id_header( + self, + client: GotenbergClient, + ): + trace_id = str(uuid.uuid4()) + with client.merge.merge() as route: + resp = call_run_with_server_error_handling( + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]).trace( + trace_id, + ), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + assert "Gotenberg-Trace" in resp.headers + assert resp.headers["Gotenberg-Trace"] == trace_id + + def test_output_filename( + self, + client: GotenbergClient, + ): + filename = "my-cool-file" + with client.merge.merge() as route: + resp = call_run_with_server_error_handling( + route.merge([SAMPLE_DIR / "z_first_merge.pdf", SAMPLE_DIR / "a_merge_second.pdf"]).output_name( + filename, + ), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + assert "Content-Disposition" in resp.headers + assert f"{filename}.pdf" in resp.headers["Content-Disposition"] diff --git a/tests/utils.py b/tests/utils.py index 29f97d7..1578004 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,9 @@ +import shutil +import subprocess +import tempfile import time import warnings +from pathlib import Path from httpx import HTTPStatusError from httpx import Response @@ -60,3 +64,26 @@ def call_run_with_server_error_handling(route: BaseRoute) -> Response: retry_time = retry_time * 2.0 return result + + +def extract_text(pdf_path: Path) -> str: + """ + Using pdftotext from poppler, extracts the text of a PDF into a file, + then reads the file contents and returns it + """ + with tempfile.NamedTemporaryFile( + mode="w+", + ) as tmp: + subprocess.run( + [ # noqa: S603 + shutil.which("pdftotext"), + "-q", + "-layout", + "-enc", + "UTF-8", + str(pdf_path), + tmp.name, + ], + check=True, + ) + return tmp.read() From 49cd25d04d7e2bdfef3eed27342683d4bd076932 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:56:31 -0700 Subject: [PATCH 5/6] Removes Brotli and compression related things, Gotenberg doesn't ever seem to --- CHANGELOG.md | 4 ++++ pyproject.toml | 2 -- src/gotenberg_client/_client.py | 6 ------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06925db..e17b97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for the output filename and request tracing for all routes +### Removed + +- References to compression and Brotli. Gotenberg doesn't seem to ever compress response data + ### Fixed - An issue with the sorting of merging PDFs. Expanded testing to cover the merged ordering diff --git a/pyproject.toml b/pyproject.toml index 6436f7c..5434bf7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ Source = "https://github.com/stumpylog/gotenberg-client/" Changelog = "https://github.com/stumpylog/gotenberg-client/blob/main/CHANGELOG.md" [project.optional-dependencies] -compression = ["httpx[http2,brotli] ~= 0.24"] magic = ["python-magic"] [tool.hatch.version] @@ -60,7 +59,6 @@ dependencies = [ "pytest-httpx ~= 0.22; python_version < '3.9'", "pikepdf", "python-magic", - "brotli", ] [tool.hatch.envs.default.scripts] diff --git a/src/gotenberg_client/_client.py b/src/gotenberg_client/_client.py index dc6bf4c..fc54e2e 100644 --- a/src/gotenberg_client/_client.py +++ b/src/gotenberg_client/_client.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: MPL-2.0 import logging -from importlib.util import find_spec from types import TracebackType from typing import Dict from typing import Optional @@ -37,11 +36,6 @@ def __init__( logging.getLogger("httpx").setLevel(log_level) logging.getLogger("httpcore").setLevel(log_level) - if find_spec("brotli") is not None: - self._client.headers.update({"Accept-Encoding": "gzip,deflate,br"}) - else: - self._client.headers.update({"Accept-Encoding": "gzip,deflate"}) - # Add the resources self.chromium = ChromiumApi(self._client) self.libre_office = LibreOfficeApi(self._client) From 19449b4630724063d307525a5ef6da13d19dec43 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue, 17 Oct 2023 11:11:05 -0700 Subject: [PATCH 6/6] Bumps version to 0.3.0 --- CHANGELOG.md | 2 +- src/gotenberg_client/__about__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e17b97a..016a814 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.3.0] - 2023-10-17 ### Added diff --git a/src/gotenberg_client/__about__.py b/src/gotenberg_client/__about__.py index 69bbe97..1812219 100644 --- a/src/gotenberg_client/__about__.py +++ b/src/gotenberg_client/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2023-present Trenton H # # SPDX-License-Identifier: MPL-2.0 -__version__ = "0.2.0" +__version__ = "0.3.0"