From a7eeaa39593a3c2f9c0f455e8bae820a92cbaac1 Mon Sep 17 00:00:00 2001 From: bruce miller Date: Mon, 25 Sep 2023 21:15:09 -0400 Subject: [PATCH] Non urls (#2213) * url.sty macros don't create hyperlinks; Safer internal macronames * hyperref macros with * don't make hyperlinks; safer internal macro names * \cref* doesn't create hyperlinks * Update url.sty test cases --- lib/LaTeXML/Package/cleveref.sty.ltxml | 36 +++++++++-------- lib/LaTeXML/Package/hyperref.sty.ltxml | 22 ++++++++--- lib/LaTeXML/Package/url.sty.ltxml | 19 ++++----- t/expansion/urls.xml | 54 +++++++++++++------------- t/tokenize/percent.xml | 10 ++--- t/tokenize/url.xml | 20 +++++----- 6 files changed, 88 insertions(+), 73 deletions(-) diff --git a/lib/LaTeXML/Package/cleveref.sty.ltxml b/lib/LaTeXML/Package/cleveref.sty.ltxml index 593044171a..3336bb1498 100644 --- a/lib/LaTeXML/Package/cleveref.sty.ltxml +++ b/lib/LaTeXML/Package/cleveref.sty.ltxml @@ -45,10 +45,11 @@ sub splitLabels { return split(/\s*,\s*/, ToString($labels)); } sub crefMulti { - my ($labels, $showtype, $capitalized) = @_; + my ($starred, $labels, $showtype, $capitalized) = @_; my @labels = splitLabels($labels); if (scalar(@labels) < 2) { return Invocation(T_CS('\lx@cref'), + $starred, T_OTHER($showtype ? ($capitalized ? "creftypecap~refnum" @@ -57,6 +58,7 @@ sub crefMulti { $labels[0]); } else { my @tokens = (Invocation(T_CS('\lx@cref'), + $starred, T_OTHER($showtype ? ($capitalized ? "creftypepluralcap~refnum" @@ -65,37 +67,37 @@ sub crefMulti { shift(@labels))); if (scalar(@labels == 1)) { push(@tokens, T_CS('\crefpairconjunction'), - Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), $labels[0])); } + Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), $labels[0])); } else { while (scalar(@labels) > 1) { push(@tokens, T_CS('\crefmiddleconjunction'), - Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), shift(@labels))); } + Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), shift(@labels))); } push(@tokens, T_CS('\creflastconjunction'), - Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), shift(@labels))); } + Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), shift(@labels))); } return @tokens; } } # Since we're not grouping by type, we're ignoring \crefpairgroupconjunction, etc -DefConstructor('\lx@cref {} Semiverbatim', - "", - properties => sub { (label => CleanLabel($_[2])); }); +DefConstructor('\lx@cref OptionalMatch:* {} Semiverbatim', + "", + properties => sub { (label => CleanLabel($_[3])); }); -DefMacro('\cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 0); }); -DefMacro('\Cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 1); }); +DefMacro('\cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 0); }); +DefMacro('\Cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 1); }); DefMacro('\crefrange OptionalMatch:* Semiverbatim Semiverbatim', - '\lx@cref{creftypeplural~refnum}{#2}\crefrangeconjunction\ref{#3}'); + '\lx@cref#1{creftypeplural~refnum}{#2}\crefrangeconjunction\ref{#3}'); DefMacro('\Crefrange OptionalMatch:* Semiverbatim Semiverbatim', - '\lx@cref{creftypepluralcap~refnum}{#2}\crefrangeconjunction\ref{#3}'); + '\lx@cref#1{creftypepluralcap~refnum}{#2}\crefrangeconjunction\ref{#3}'); # Make page refs same as regular? -DefMacro('\cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 0); }); -DefMacro('\Cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 1); }); +DefMacro('\cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 0); }); +DefMacro('\Cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 1); }); # More likely with page ranges that the types are different? DefMacro('\cpagerefrange OptionalMatch:* Semiverbatim Semiverbatim', - '\lx@cref{creftype~refnum}{#2}\crefrangeconjunction\lx@cref{creftype~refnum}{#3}'); + '\lx@cref#1{creftype~refnum}{#2}\crefrangeconjunction\lx@cref#1{creftype~refnum}{#3}'); DefMacro('\Cpagerefrange OptionalMatch:* Semiverbatim Semiverbatim', - '\lx@cref{creftypecap~refnum}{#2}\crefrangeconjunction\lx@ref{creftype~refnum{#3}'); + '\lx@cref#1{creftypecap~refnum}{#2}\crefrangeconjunction\lx@cref#1{creftype~refnum{#3}'); DefMacro('\namecref Semiverbatim', '\lx@cref{creftype}{#1}'); DefMacro('\nameCref Semiverbatim', '\lx@cref{creftypecap}{#1}'); @@ -104,8 +106,8 @@ DefMacro('\nameCrefs Semiverbatim', '\lx@cref{creftypepluralcap}{#1}'); DefMacro('\lcnamecref Semiverbatim', '\lx@cref{creftype}{#1}'); DefMacro('\lcnamecrefs Semiverbatim', '\lx@cref{creftypeplural}{#1}'); -DefMacro('\labelcref Semiverbatim', sub { crefMulti($_[1], 0, 0); }); -DefMacro('\labelcpageref Semiverbatim', sub { crefMulti($_[1], 0, 0); }); +DefMacro('\labelcref Semiverbatim', sub { crefMulti(undef, $_[1], 0, 0); }); +DefMacro('\labelcpageref Semiverbatim', sub { crefMulti(undef, $_[1], 0, 0); }); # No, this isn't quite the same thing... DefPrimitive('\crefalias {}{}', sub { diff --git a/lib/LaTeXML/Package/hyperref.sty.ltxml b/lib/LaTeXML/Package/hyperref.sty.ltxml index f8aba2e6f2..a5417de254 100644 --- a/lib/LaTeXML/Package/hyperref.sty.ltxml +++ b/lib/LaTeXML/Package/hyperref.sty.ltxml @@ -153,12 +153,12 @@ DefRegister('\pdfcompresslevel', Number(0)); # Additional User Macros # \href{url}{text} -DefMacro('\href HyperVerbatim {}', '\@@Url\href{}{}{#1}{#2}'); +DefMacro('\href HyperVerbatim {}', '\lx@hyper@url@\href{}{}{#1}{#2}'); -# \url{url} from url.sty... well sorta +# Redefine \url{url} from url.sty... # It's slightly different in that it expands the argument # Redefine \@url to sanitize the argument less -DefMacro('\@Url Token', sub { +DefMacro('\lx@hyper@url Token', sub { my ($gullet, $cmd) = @_; my ($open, $close, $url); $open = $gullet->readToken; @@ -174,14 +174,26 @@ DefMacro('\@Url Token', sub { my @toks = grep { $_->getCatcode != CC_SPACE; } $url->unlist; # Identical with url's \@Url except, let CS's through! @toks = map { (($_->getCatcode == CC_CS) ? $_ : T_OTHER(ToString($_))) } @toks; - (Invocation(T_CS('\@@Url'), + (Invocation(T_CS('\lx@hyper@url@'), T_OTHER(ToString($cmd)), Tokens($open), Tokens($close), Tokens(@toks), Tokens(T_CS('\UrlFont'), T_CS('\UrlLeft'), @toks, T_CS('\UrlRight')))->unlist, T_CS('\endgroup')); }); +# RE-define from url w +DefMacro('\url', '\begingroup\lx@hyper@url\url', locked => 1); + +DefConstructor('\lx@hyper@url@ Undigested {}{} Semiverbatim {}', + "?#isMath(#5)" # Allow this to work in Math! + . " (#5)", + properties => sub { (href => ComposeURL(LookupValue('BASE_URL'), $_[4]), + class => sub { my $c = ToString($_[1]); $c =~ s/^\\//; 'ltx_' . $c; }); }, + sizer => '#5', + reversion => '#1#2#4#3'); + # \nolinkurl{url} -DefConstructor('\nolinkurl Semiverbatim', '#1'); +DefConstructor('\nolinkurl Semiverbatim', + "#1"); # \hyperbaseurl{url} DefPrimitive('\hyperbaseurl Semiverbatim', sub { AssignValue(BASE_URL => ToString($_[1])); }); diff --git a/lib/LaTeXML/Package/url.sty.ltxml b/lib/LaTeXML/Package/url.sty.ltxml index 6f9c9c8bb4..e886757f01 100644 --- a/lib/LaTeXML/Package/url.sty.ltxml +++ b/lib/LaTeXML/Package/url.sty.ltxml @@ -38,13 +38,13 @@ Let('\UrlRight', '\@empty'); # \DeclareUrlCommand\cmd{settings} # Have this expand into \@Url w/ the declared cmd as arg, so it gets reflected in XML. -DefMacro('\DeclareUrlCommand{}{}', '\def#1{\begingroup #2\@Url#1}'); +DefMacro('\DeclareUrlCommand{}{}', '\def#1{\begingroup #2\lx@url@url#1}'); # This is an extended version of \Url that takes an extra token as 1st arg. # That token is the cs that invoked it, so that it can be reflected in the generated XML, # as well as used to generate the reversion. # In any case, we read the verbatim arg, and build a Whatsit for @@Url -DefMacro('\@Url Token', sub { +DefMacro('\lx@url@url Token', sub { my ($gullet, $cmd) = @_; my ($open, $close, $url); StartSemiverbatim('%'); @@ -58,7 +58,7 @@ DefMacro('\@Url Token', sub { EndSemiverbatim(); my @toks = grep { $_->getCatcode != CC_SPACE; } (ref $url ? $url->unlist : ()); @toks = map { T_OTHER(ToString($_)) } @toks; - (Invocation(T_CS('\@@Url'), + (Invocation(T_CS('\lx@url@url@nolink'), T_OTHER(ToString($cmd)), Tokens($open), Tokens($close), Tokens(@toks), Tokens(T_CS('\UrlFont'), T_CS('\UrlLeft'), @toks, T_CS('\UrlRight')))->unlist, @@ -68,20 +68,21 @@ DefMacro('\@Url Token', sub { DefMacro('\Url', sub { my ($gullet) = @_; $gullet->unread(T_OTHER('\Url')); - (T_CS('\@Url')); }); + (T_CS('\lx@url@url')); }); # \@@Url cmd {open}{close}{url}{formattedurl} -DefConstructor('\@@Url Undigested {}{} Semiverbatim {}', - "?#isMath(#5)" # Allow this to work in Math! - . " (#5)", +#DefConstructor('\@@Url Undigested {}{} Semiverbatim {}', +DefConstructor('\lx@url@url@nolink Undigested {}{} Semiverbatim {}', + "?#isMath(#5)" # Allow this to work in Math! + . " (#5)", properties => sub { (href => ComposeURL(LookupValue('BASE_URL'), $_[4]), class => sub { my $c = ToString($_[1]); $c =~ s/^\\//; 'ltx_' . $c; }); }, sizer => '#5', reversion => '#1#2#4#3'); # These are the expansions of \DeclareUrlCommand -DefMacro('\path', '\begingroup\urlstyle{tt}\@Url\path'); -DefMacro('\url', '\begingroup\@Url\url', locked => 1); +DefMacro('\path', '\begingroup\urlstyle{tt}\lx@url@url\path'); +DefMacro('\url', '\begingroup\lx@url@url\url', locked => 1); # \urldef{newcmd}\cmd{arg} # Kinda tricky, since we need to get the expansion of \cmd as the value of \newcmd diff --git a/t/expansion/urls.xml b/t/expansion/urls.xml index 61d89a5dae..24988c9c3e 100644 --- a/t/expansion/urls.xml +++ b/t/expansion/urls.xml @@ -22,7 +22,7 @@ 1st item -

https://example.com/wordword;

+

https://example.com/wordword;

@@ -31,7 +31,7 @@ 2nd item -

https://example.com/~User;

+

https://example.com/~User;

@@ -40,7 +40,7 @@ 3rd item -

https://example.com/someplace#id;

+

https://example.com/someplace#id;

@@ -49,7 +49,7 @@ 4th item -

https://example.com/user@password;

+

https://example.com/user@password;

@@ -58,7 +58,7 @@ 5th item -

https://example.com/user&param;

+

https://example.com/user&param;

@@ -67,7 +67,7 @@ 6th item -

https://example.com/with_underscore;

+

https://example.com/with_underscore;

@@ -76,7 +76,7 @@ 7th item -

https://example.com/with^caret;

+

https://example.com/with^caret;

@@ -85,7 +85,7 @@ 8th item -

https://example.com/with$dollar;

+

https://example.com/with$dollar;

@@ -94,7 +94,7 @@ 9th item -

https://example.com/encoded%code.

+

https://example.com/encoded%code.

@@ -110,7 +110,7 @@ 1st item -

https://example.com/someplace\#id;

+

https://example.com/someplace\#id;

@@ -119,7 +119,7 @@ 2nd item -

https://example.com/with.{}braces;

+

https://example.com/with.{}braces;

@@ -128,7 +128,7 @@ 3rd item -

https://example.com/withslash\&ampersand;

+

https://example.com/withslash\&ampersand;

@@ -137,7 +137,7 @@ 4th item -

https://example.com/withslash\_underscore;

+

https://example.com/withslash\_underscore;

@@ -146,7 +146,7 @@ 5th item -

https://example.com/withslash\^caret;

+

https://example.com/withslash\^caret;

@@ -155,7 +155,7 @@ 6th item -

https://example.com/withslash\$dollar;

+

https://example.com/withslash\$dollar;

@@ -164,7 +164,7 @@ 7th item -

https://example.com/withslash\%percent;

+

https://example.com/withslash\%percent;

@@ -173,7 +173,7 @@ 8th item -

https://example.com/unexpandedmacro.\macro.

+

https://example.com/unexpandedmacro.\macro.

@@ -185,42 +185,42 @@ 1 1 footnote 1 - https://example.com/wordword + https://example.com/wordword 2 2 footnote 2 - https://example.com/~User + https://example.com/~User 3 3 footnote 3 - https://example.com/someplace#id + https://example.com/someplace#id 4 4 footnote 4 - https://example.com/user@password + https://example.com/user@password 5 5 footnote 5 - https://example.com/user&param + https://example.com/user&param 6 6 footnote 6 - https://example.com/with_underscore + https://example.com/with_underscore 7 7 footnote 7 - https://example.com/with^caret + https://example.com/with^caret 8 8 footnote 8 - https://example.com/with$dollar + https://example.com/with$dollar Probably unexpected URLs in footnotes @@ -228,12 +228,12 @@ 9 9 footnote 9 - https://example.com/lostpercent + https://example.com/lostpercent 10 10 footnote 10 - https://example.com/slash\%percent + https://example.com/slash\%percent diff --git a/t/tokenize/percent.xml b/t/tokenize/percent.xml index fb788ac6fc..cdd43b1086 100644 --- a/t/tokenize/percent.xml +++ b/t/tokenize/percent.xml @@ -17,17 +17,17 @@ In section

-

http://foo.com/dont%00ignorethis +

http://foo.com/dont%00ignorethis and -http://foo.com/dont%00ignorethis +http://foo.com/dont%00ignorethis However -http://foo.com/dont ignore this either

+http://foo.com/dont ignore this either

-

Consider http://\host/page versus http://\host/page.

+

Consider http://\host/page versus http://\host/page.

-

Consider a_b versus a_b.

+

Consider a_b versus a_b.

When delimited, extra braces are allowed by verb, but not url. diff --git a/t/tokenize/url.xml b/t/tokenize/url.xml index faf967622d..9309697a40 100644 --- a/t/tokenize/url.xml +++ b/t/tokenize/url.xml @@ -13,10 +13,10 @@ <tag close=" ">1</tag>Basic URLS -

Basic url: http://example.com/~user or http://example.com/~user.

+

Basic url: http://example.com/~user or http://example.com/~user.

-

Path url: /foo/bar/baz or /foo/bar/baz

+

Path url: /foo/bar/baz or /foo/bar/baz

@@ -28,16 +28,16 @@ <tag close=" ">2</tag>Verbatimness

Special characters neutralized: -http://example.com/foo_bar, -http://example.com/foo#bar, -http://example.com/foo&bar.

+http://example.com/foo_bar, +http://example.com/foo#bar, +http://example.com/foo&bar.

And even though \baz gives index.html; -C:\foo\bar\baz

+C:\foo\bar\baz

-

OTOH, you get this: http://example.com/\~{}user

+

OTOH, you get this: http://example.com/\~{}user

@@ -48,7 +48,7 @@ <tag close=" ">3</tag>Styles -

Email: myself%node@gateway.net or myself%node@gateway.net.

+

Email: myself%node@gateway.net or myself%node@gateway.net.

@@ -59,7 +59,7 @@ <tag close=" ">4</tag>Defined urls -

Myself: myself%node@gateway.net or myself%node@gateway.net.

+

Myself: myself%node@gateway.net or myself%node@gateway.net.

@@ -70,7 +70,7 @@ <tag close=" ">5</tag>Bracketting -

Fancy url: <url: http://example.com/~user> or <url: http://example.com/~user>.

+

Fancy url: <url: http://example.com/~user> or <url: http://example.com/~user>.