Skip to content

Commit

Permalink
embedded file checking, WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
u-fischer committed Dec 19, 2024
1 parent 69c414c commit d4c630a
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 36 deletions.
116 changes: 87 additions & 29 deletions l3pdffile.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,10 @@
%
% \begin{verbatim}
% \pdf_object_new:n {module/filespec/A} % a new filespec object
% \pdf_object_ref:n {module/filespec/A} % a reference
% \pdf_object_unnamed_write:nn { stream }{ {...}{content} } %writing the stream
% % filling and writing the filespec dictionary:
% \pdf_object_ref:n {module/filespec/A} % reference it somewhere, e.g. in AF
% % now write the stream
% \pdf_object_unnamed_write:nn { stream }{ {...}{content} }
% % and fill and write the filespec dictionary:
% \pdffile_filespec:nnn {module/filespec/A}{A.xml}{\pdf_object_ref_last:}
% \end{verbatim}
%
Expand All @@ -362,6 +363,24 @@
% are not supported.
% \end{variable}
%
% \begin{variable} {\g_pdffile_embed_pdfa_int,\g_pdffile_embed_nonpdfa_int}
% These two integers hold the number of embedded files in PDF/A format
% and non-PDF/A format and can be used for a rough test for the requirements
% in l3pdfmeta |no_embed_content| (both should be zero)
% and |only_pdfa_embed_content| (the second should be zero).
% The commands |\pdffile_embed_stream:...| and |\pdffile_embed_file:...|
% increase the integers. As the code can currently not detect if an embedded
% file follows a PDF/A standard it simply goes by the extension: files embedded
% as |.pdf| increase the first integer.
%
% |\pdffile_filespec:nnn| does \emph{not} increase the integers,
% if this command is used it lies in the responsability of the
% author to adjust the integers.
%
% The integers are public so that user
% can query and adjust the values, e.g. in tests for a standard compliancy.
% \end{variable}
%
% \begin{variable}{\l_pdffile_source_name_str}
% This variable is set at the begin of \cs{pdffile_embed_file:nnn}. It can be
% (and is) used in the file dictionaries, see table~\ref{tab:filedict} for examples.
Expand Down Expand Up @@ -444,6 +463,8 @@
#1
}
% \end{macrocode}
%
% \subsection{Variables}
% \begin{variable}
% {
% \l_@@_tmpa_tl,
Expand Down Expand Up @@ -490,6 +511,28 @@
,.xml = application/xml
}
% \end{macrocode}
%
% \begin{variable} {\g_pdffile_embed_pdfa_int,\g_pdffile_embed_nonpdfa_int}
% These two integers hold the number of embedded files in PDF/A format
% and non-PDF/A format and can be used for a rough test for the requirements
% in l3pdfmeta |no_embed_content| (both should be zero)
% and |only_pdfa_embed_content| (the second should be zero).
% The commands |\pdffile_embed_stream:...| and |\pdffile_embed_file:...|
% increase the integers. As the code can currently not detect if an embedded
% file follows a PDF/A standard it simply goes by the extension: files embedded
% as |.pdf| increase the first integer.
%
% |\pdffile_filespec:nnn| does \emph{not} increase the integers,
% if this command is used it lies in the responsability of the
% author to adjust the integers.
%
% The integers are public so that user
% can query and adjust the values, e.g. in tests for a standard compliancy.
% \begin{macrocode}
\int_new:N\g_pdffile_embed_pdfa_int
\int_new:N\g_pdffile_embed_nonpdfa_int
% \end{macrocode}
% \end{variable}
% \begin{variable}
% {
% \l_pdffile_source_name_str
Expand Down Expand Up @@ -556,43 +599,54 @@
% \begin{macro}{\pdffile_embed_file:nnn, \pdffile_embed_stream:nnn, \pdffile_embed_stream:nnN}
% At first a command to set the mimetype. It either uses the current value
% in the file dictionary, or tries to guess it from the extension.
% \begin{macro}{\@@_mimetype_set:nN,\@@_mimetype_set:VN}
%
% \begin{macro}{\@@_mimetype_set:nNN,\@@_mimetype_set:VNN}
% \begin{macro}{\@@_fstream_write:nN, \@@_fstream_write:VN}
% \begin{macro}{\@@_stream_write:nN, \@@_stream_write:VN}
% \begin{macrocode}
%#1 file name,
%#2 tl to return the (printed) value for the guessed mimetype
\cs_new_protected:Npn \@@_mimetype_set:nN #1 #2
%#3 tl to return the file extension (that is a string)
\cs_new_protected:Npn \@@_mimetype_set:nNN #1 #2 #3
{
\file_parse_full_name:nNNN
{#1}
\l_@@_tmpa_str %unused
\l_@@_tmpb_str %unused
\l_@@_ext_str
%check if Subtype has been set
\pdfdict_get:nnN { l_pdffile}{Subtype}\l_@@_tmpa_tl
%if not look up in the prop:
\quark_if_no_value:NT \l_@@_tmpa_tl
{
\prop_get:NVNTF
\g_pdffile_mimetypes_prop
\l_@@_ext_str
%check if Subtype has been set
\pdfdict_get:nnN { l_pdffile}{Subtype}\l_@@_tmpa_tl
%if not look up in the prop:
\quark_if_no_value:NT \l_@@_tmpa_tl
\l_@@_tmpb_tl
{
\prop_get:NVNTF
\g_pdffile_mimetypes_prop
\l_@@_ext_str
\l_@@_tmpb_tl
{
\tl_set:Ne #2 {/Subtype~\pdf_name_from_unicode_e:V \l_@@_tmpb_tl}
}
{
\msg_warning:nne { pdffile }{ mimetype-missing} {#1}
\tl_clear:N #2
}
\tl_set:Ne #2 {/Subtype~\pdf_name_from_unicode_e:V \l_@@_tmpb_tl}
}
{
\msg_warning:nne { pdffile }{ mimetype-missing} {#1}
\tl_clear:N #2
}
}
}
\tl_set_eq:NN #3 \l_@@_ext_str
}

\cs_generate_variant:Nn \@@_mimetype_set:nNN {VNN}

\cs_generate_variant:Nn \@@_mimetype_set:nN {VN}
% #1 tl containing a file extension
\cs_new_protected:Npn \@@_count_embed:N #1
{
\str_if_eq:VnTF #1 {.pdf}
{\int_gincr:N \g_pdffile_embed_pdfa_int }
{\int_gincr:N \g_pdffile_embed_nonpdfa_int }
}

%#1 file name,
%#2 tl, should be empty or contain /Subtype /mimetype
% e.g. result from \@@_mimetype_set:NN
% e.g. result from \@@_mimetype_set:nNN
\cs_new_protected:Npn \@@_fstream_write:nN #1 #2
{
\pdf_object_unnamed_write:ne { fstream }
Expand All @@ -617,7 +671,7 @@

%#1 file content
%#2 tl, should be empty or contain /Subtype /mimtype
% e.g. result from \@@_mimetype_set:NN
% e.g. result from \@@_mimetype_set:nNN
\cs_new_protected:Npn \@@_stream_write:nN #1 #2
{
\pdf_object_unnamed_write:ne { stream }
Expand Down Expand Up @@ -714,9 +768,11 @@
{
\file_get_full_name:nNTF {#1} \l_pdffile_source_name_str
{
\@@_mimetype_set:VN
\@@_mimetype_set:VNN
\l_pdffile_source_name_str
\l_@@_automimetype_tl
\l_@@_tmpa_tl
\@@_count_embed:N \l_@@_tmpa_tl
\@@_fstream_write:VN
\l_pdffile_source_name_str
\l_@@_automimetype_tl
Expand Down Expand Up @@ -778,8 +834,9 @@
{ #3 }
{{stream}{}{\tl_if_blank:nTF {#2}{stream.txt}{\exp_not:n{#2}}}}
\tl_if_blank:nTF {#2}
{ \@@_mimetype_set:nN {stream.txt}\l_@@_automimetype_tl}
{ \@@_mimetype_set:nN { #2 } \l_@@_automimetype_tl }
{ \@@_mimetype_set:nNN {stream.txt}\l_@@_automimetype_tl \l_@@_tmpa_tl}
{ \@@_mimetype_set:nNN { #2 } \l_@@_automimetype_tl \l_@@_tmpa_tl }
\@@_count_embed:N \l_@@_tmpa_tl
\@@_stream_write:nN
{ #1 }
\l_@@_automimetype_tl
Expand All @@ -797,8 +854,9 @@
\cs_new_protected:Npn \pdffile_embed_stream:nnN #1 #2 #3
{
\tl_if_blank:nTF {#2}
{ \@@_mimetype_set:nN {stream.txt}\l_@@_automimetype_tl}
{ \@@_mimetype_set:nN { #2 } \l_@@_automimetype_tl }
{ \@@_mimetype_set:nNN {stream.txt}\l_@@_automimetype_tl \l_@@_tmpa_tl}
{ \@@_mimetype_set:nNN { #2 } \l_@@_automimetype_tl \l_@@_tmpa_tl }
\@@_count_embed:N\l_@@_tmpa_tl
\@@_stream_write:nN
{ #1 }
\l_@@_automimetype_tl
Expand Down
53 changes: 46 additions & 7 deletions l3pdfmeta.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,17 @@
% \item[|no_embed_content|] no |/EF| key in filespec, no |/Type/EmbeddedFiles|.
% \emph{This will be checked in future by \pkg{l3pdffiles}
% for the files it embeds.}
% The restrictment is set for only PDF/A-1b.
% PDF/A-2b and PDF/A3-b lifted this restriction: PDF/A-2b allows
% The restriction is set only for PDF/A-1 versions.
% PDF/A-2 and PDF/A-3 lifted this restriction: PDF/A-2 allows
% to embed other PDF documents conforming to either PDF/A-1 or PDF/A-2,
% and PDF/A-3 allows any embedded files. I don't see a way to test the
% PDF/A-2b requirement so currently it will simply allow everything. Perhaps
% and PDF/A-3 and PDF/A-4F allows any embedded files.
%
% \item[|only_pdfa_embed_content|]
% This is set for PDF/A-2a, PDF/A-2b, PDF/A-2u and PDF/A-4.
% I don't see a way to test the
% PDF/A-2 requirement so currently it will simply allow everything. Perhaps
% a test for at least the PDF-format will be added in future.
%
% \item[|Catalog_no_OCProperties|] don't add |/OCProperties| to the catalog
% {\em l3pdfmeta removes this entry at the end of the document}
% \item[|Catalog_OCProperties_no_AS|]
Expand Down Expand Up @@ -971,7 +976,10 @@
% embedding files is allowed (with restrictions)
\prop_gremove:cn
{ g_@@_standard_pdf/A-2B_prop }
{ embed_content}
{ no_embed_content }
\prop_gput:cn
{ g_@@_standard_pdf/A-2B_prop }
{ only_pdfa_embed_content }
\prop_gput:cnn
{ g_@@_standard_pdf/A-2B_prop }{max_pdf_version}{1.7}
\prop_gput:cnn
Expand Down Expand Up @@ -1017,10 +1025,10 @@
{ g_@@_standard_pdf/A-3B_prop }{year}{2012}
\prop_gput:cnn
{ g_@@_standard_pdf/A-3B_prop }{level}{3}
% embedding files is allowed (with restrictions)
% embedding files is allowed
\prop_gremove:cn
{ g_@@_standard_pdf/A-3B_prop }
{ embed_content}
{ only_pdfa_embed_content }
%A-3u ==============
\prop_new:c { g_@@_standard_pdf/A-3U_prop }
\prop_gset_eq:cc
Expand Down Expand Up @@ -1062,6 +1070,9 @@
{ g_@@_standard_pdf/A-4_prop }{no_CharSet}{}
\prop_gput:cnn
{ g_@@_standard_pdf/A-4_prop }{Trailer_no_Info}{}
\prop_gput:cn
{ g_@@_standard_pdf/A-4_prop }
{ only_pdfa_embed_content }
\prop_gremove:cn
{ g_@@_standard_pdf/A-4_prop }{conformance}
\prop_gremove:cn
Expand All @@ -1078,6 +1089,10 @@
% containsEmbeddedFiles == true ISO 19005-4:2020, Clause: 6.9, Test number: 5
\prop_gput:cnn
{ g_@@_standard_pdf/A-4F_prop }{Catalog_EmbeddedFiles}{}
% can contain any file
\prop_gremove:cn
{ g_@@_standard_pdf/A-4_prop }
{ only_pdfa_embed_content }
% \end{macrocode}
% \end{variable}
%
Expand Down Expand Up @@ -1113,6 +1128,30 @@
}
}
% \end{macrocode}
% Before writing the xml we check if there are embedded files we know of. For A-4 we adjust
% the standard to A-4F is needed.
% \begin{macrocode}
\AddToHook{pdfmeta/xmp}
{
\pdfmeta_standard_verify:nF{no_embed_content}
{
\bool_lazy_and:nnT
{ ! \int_if_zero_p:N \g_pdffile_embed_pdfa_int }
{ ! \int_if_zero_p:N \g_pdffile_embed_nonpdfa_int }
{
% not allowed warning
}
}
\pdfmeta_standard_verify:nF {only_pdfa_embed_content}
{
\int_if_zero:NF \g_pdffile_embed_nonpdfa_int
{
% not allowed warning, change standard
}
}
}
% \end{macrocode}
%
% \subsubsection{Colorprofiles and Outputintents}
% The following provides a minimum of interface to add a color profile
% and an outputintent need for PDF/A for now. There will be need to extend it later,
Expand Down

0 comments on commit d4c630a

Please sign in to comment.