From a7eeaa39593a3c2f9c0f455e8bae820a92cbaac1 Mon Sep 17 00:00:00 2001
From: bruce miller
Date: Mon, 25 Sep 2023 21:15:09 -0400
Subject: [PATCH] Non urls (#2213)
* url.sty macros don't create hyperlinks; Safer internal macronames
* hyperref macros with * don't make hyperlinks; safer internal macro names
* \cref* doesn't create hyperlinks
* Update url.sty test cases
---
lib/LaTeXML/Package/cleveref.sty.ltxml | 36 +++++++++--------
lib/LaTeXML/Package/hyperref.sty.ltxml | 22 ++++++++---
lib/LaTeXML/Package/url.sty.ltxml | 19 ++++-----
t/expansion/urls.xml | 54 +++++++++++++-------------
t/tokenize/percent.xml | 10 ++---
t/tokenize/url.xml | 20 +++++-----
6 files changed, 88 insertions(+), 73 deletions(-)
diff --git a/lib/LaTeXML/Package/cleveref.sty.ltxml b/lib/LaTeXML/Package/cleveref.sty.ltxml
index 593044171a..3336bb1498 100644
--- a/lib/LaTeXML/Package/cleveref.sty.ltxml
+++ b/lib/LaTeXML/Package/cleveref.sty.ltxml
@@ -45,10 +45,11 @@ sub splitLabels {
return split(/\s*,\s*/, ToString($labels)); }
sub crefMulti {
- my ($labels, $showtype, $capitalized) = @_;
+ my ($starred, $labels, $showtype, $capitalized) = @_;
my @labels = splitLabels($labels);
if (scalar(@labels) < 2) {
return Invocation(T_CS('\lx@cref'),
+ $starred,
T_OTHER($showtype
? ($capitalized
? "creftypecap~refnum"
@@ -57,6 +58,7 @@ sub crefMulti {
$labels[0]); }
else {
my @tokens = (Invocation(T_CS('\lx@cref'),
+ $starred,
T_OTHER($showtype
? ($capitalized
? "creftypepluralcap~refnum"
@@ -65,37 +67,37 @@ sub crefMulti {
shift(@labels)));
if (scalar(@labels == 1)) {
push(@tokens, T_CS('\crefpairconjunction'),
- Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), $labels[0])); }
+ Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), $labels[0])); }
else {
while (scalar(@labels) > 1) {
push(@tokens, T_CS('\crefmiddleconjunction'),
- Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), shift(@labels))); }
+ Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), shift(@labels))); }
push(@tokens, T_CS('\creflastconjunction'),
- Invocation(T_CS('\lx@cref'), T_OTHER('refnum'), shift(@labels))); }
+ Invocation(T_CS('\lx@cref'), $starred, T_OTHER('refnum'), shift(@labels))); }
return @tokens; } }
# Since we're not grouping by type, we're ignoring \crefpairgroupconjunction, etc
-DefConstructor('\lx@cref {} Semiverbatim',
- "",
- properties => sub { (label => CleanLabel($_[2])); });
+DefConstructor('\lx@cref OptionalMatch:* {} Semiverbatim',
+ "",
+ properties => sub { (label => CleanLabel($_[3])); });
-DefMacro('\cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 0); });
-DefMacro('\Cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 1); });
+DefMacro('\cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 0); });
+DefMacro('\Cref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 1); });
DefMacro('\crefrange OptionalMatch:* Semiverbatim Semiverbatim',
- '\lx@cref{creftypeplural~refnum}{#2}\crefrangeconjunction\ref{#3}');
+ '\lx@cref#1{creftypeplural~refnum}{#2}\crefrangeconjunction\ref{#3}');
DefMacro('\Crefrange OptionalMatch:* Semiverbatim Semiverbatim',
- '\lx@cref{creftypepluralcap~refnum}{#2}\crefrangeconjunction\ref{#3}');
+ '\lx@cref#1{creftypepluralcap~refnum}{#2}\crefrangeconjunction\ref{#3}');
# Make page refs same as regular?
-DefMacro('\cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 0); });
-DefMacro('\Cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[2], 1, 1); });
+DefMacro('\cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 0); });
+DefMacro('\Cpageref OptionalMatch:* Semiverbatim', sub { crefMulti($_[1], $_[2], 1, 1); });
# More likely with page ranges that the types are different?
DefMacro('\cpagerefrange OptionalMatch:* Semiverbatim Semiverbatim',
- '\lx@cref{creftype~refnum}{#2}\crefrangeconjunction\lx@cref{creftype~refnum}{#3}');
+ '\lx@cref#1{creftype~refnum}{#2}\crefrangeconjunction\lx@cref#1{creftype~refnum}{#3}');
DefMacro('\Cpagerefrange OptionalMatch:* Semiverbatim Semiverbatim',
- '\lx@cref{creftypecap~refnum}{#2}\crefrangeconjunction\lx@ref{creftype~refnum{#3}');
+ '\lx@cref#1{creftypecap~refnum}{#2}\crefrangeconjunction\lx@cref#1{creftype~refnum{#3}');
DefMacro('\namecref Semiverbatim', '\lx@cref{creftype}{#1}');
DefMacro('\nameCref Semiverbatim', '\lx@cref{creftypecap}{#1}');
@@ -104,8 +106,8 @@ DefMacro('\nameCrefs Semiverbatim', '\lx@cref{creftypepluralcap}{#1}');
DefMacro('\lcnamecref Semiverbatim', '\lx@cref{creftype}{#1}');
DefMacro('\lcnamecrefs Semiverbatim', '\lx@cref{creftypeplural}{#1}');
-DefMacro('\labelcref Semiverbatim', sub { crefMulti($_[1], 0, 0); });
-DefMacro('\labelcpageref Semiverbatim', sub { crefMulti($_[1], 0, 0); });
+DefMacro('\labelcref Semiverbatim', sub { crefMulti(undef, $_[1], 0, 0); });
+DefMacro('\labelcpageref Semiverbatim', sub { crefMulti(undef, $_[1], 0, 0); });
# No, this isn't quite the same thing...
DefPrimitive('\crefalias {}{}', sub {
diff --git a/lib/LaTeXML/Package/hyperref.sty.ltxml b/lib/LaTeXML/Package/hyperref.sty.ltxml
index f8aba2e6f2..a5417de254 100644
--- a/lib/LaTeXML/Package/hyperref.sty.ltxml
+++ b/lib/LaTeXML/Package/hyperref.sty.ltxml
@@ -153,12 +153,12 @@ DefRegister('\pdfcompresslevel', Number(0));
# Additional User Macros
# \href{url}{text}
-DefMacro('\href HyperVerbatim {}', '\@@Url\href{}{}{#1}{#2}');
+DefMacro('\href HyperVerbatim {}', '\lx@hyper@url@\href{}{}{#1}{#2}');
-# \url{url} from url.sty... well sorta
+# Redefine \url{url} from url.sty...
# It's slightly different in that it expands the argument
# Redefine \@url to sanitize the argument less
-DefMacro('\@Url Token', sub {
+DefMacro('\lx@hyper@url Token', sub {
my ($gullet, $cmd) = @_;
my ($open, $close, $url);
$open = $gullet->readToken;
@@ -174,14 +174,26 @@ DefMacro('\@Url Token', sub {
my @toks = grep { $_->getCatcode != CC_SPACE; } $url->unlist;
# Identical with url's \@Url except, let CS's through!
@toks = map { (($_->getCatcode == CC_CS) ? $_ : T_OTHER(ToString($_))) } @toks;
- (Invocation(T_CS('\@@Url'),
+ (Invocation(T_CS('\lx@hyper@url@'),
T_OTHER(ToString($cmd)), Tokens($open), Tokens($close),
Tokens(@toks),
Tokens(T_CS('\UrlFont'), T_CS('\UrlLeft'), @toks, T_CS('\UrlRight')))->unlist,
T_CS('\endgroup')); });
+# RE-define from url w
+DefMacro('\url', '\begingroup\lx@hyper@url\url', locked => 1);
+
+DefConstructor('\lx@hyper@url@ Undigested {}{} Semiverbatim {}',
+ "?#isMath(#5)" # Allow this to work in Math!
+ . " (#5)",
+ properties => sub { (href => ComposeURL(LookupValue('BASE_URL'), $_[4]),
+ class => sub { my $c = ToString($_[1]); $c =~ s/^\\//; 'ltx_' . $c; }); },
+ sizer => '#5',
+ reversion => '#1#2#4#3');
+
# \nolinkurl{url}
-DefConstructor('\nolinkurl Semiverbatim', '#1');
+DefConstructor('\nolinkurl Semiverbatim',
+ "#1");
# \hyperbaseurl{url}
DefPrimitive('\hyperbaseurl Semiverbatim', sub { AssignValue(BASE_URL => ToString($_[1])); });
diff --git a/lib/LaTeXML/Package/url.sty.ltxml b/lib/LaTeXML/Package/url.sty.ltxml
index 6f9c9c8bb4..e886757f01 100644
--- a/lib/LaTeXML/Package/url.sty.ltxml
+++ b/lib/LaTeXML/Package/url.sty.ltxml
@@ -38,13 +38,13 @@ Let('\UrlRight', '\@empty');
# \DeclareUrlCommand\cmd{settings}
# Have this expand into \@Url w/ the declared cmd as arg, so it gets reflected in XML.
-DefMacro('\DeclareUrlCommand{}{}', '\def#1{\begingroup #2\@Url#1}');
+DefMacro('\DeclareUrlCommand{}{}', '\def#1{\begingroup #2\lx@url@url#1}');
# This is an extended version of \Url that takes an extra token as 1st arg.
# That token is the cs that invoked it, so that it can be reflected in the generated XML,
# as well as used to generate the reversion.
# In any case, we read the verbatim arg, and build a Whatsit for @@Url
-DefMacro('\@Url Token', sub {
+DefMacro('\lx@url@url Token', sub {
my ($gullet, $cmd) = @_;
my ($open, $close, $url);
StartSemiverbatim('%');
@@ -58,7 +58,7 @@ DefMacro('\@Url Token', sub {
EndSemiverbatim();
my @toks = grep { $_->getCatcode != CC_SPACE; } (ref $url ? $url->unlist : ());
@toks = map { T_OTHER(ToString($_)) } @toks;
- (Invocation(T_CS('\@@Url'),
+ (Invocation(T_CS('\lx@url@url@nolink'),
T_OTHER(ToString($cmd)), Tokens($open), Tokens($close),
Tokens(@toks),
Tokens(T_CS('\UrlFont'), T_CS('\UrlLeft'), @toks, T_CS('\UrlRight')))->unlist,
@@ -68,20 +68,21 @@ DefMacro('\@Url Token', sub {
DefMacro('\Url', sub {
my ($gullet) = @_;
$gullet->unread(T_OTHER('\Url'));
- (T_CS('\@Url')); });
+ (T_CS('\lx@url@url')); });
# \@@Url cmd {open}{close}{url}{formattedurl}
-DefConstructor('\@@Url Undigested {}{} Semiverbatim {}',
- "?#isMath(#5)" # Allow this to work in Math!
- . " (#5)",
+#DefConstructor('\@@Url Undigested {}{} Semiverbatim {}',
+DefConstructor('\lx@url@url@nolink Undigested {}{} Semiverbatim {}',
+ "?#isMath(#5)" # Allow this to work in Math!
+ . " (#5)",
properties => sub { (href => ComposeURL(LookupValue('BASE_URL'), $_[4]),
class => sub { my $c = ToString($_[1]); $c =~ s/^\\//; 'ltx_' . $c; }); },
sizer => '#5',
reversion => '#1#2#4#3');
# These are the expansions of \DeclareUrlCommand
-DefMacro('\path', '\begingroup\urlstyle{tt}\@Url\path');
-DefMacro('\url', '\begingroup\@Url\url', locked => 1);
+DefMacro('\path', '\begingroup\urlstyle{tt}\lx@url@url\path');
+DefMacro('\url', '\begingroup\lx@url@url\url', locked => 1);
# \urldef{newcmd}\cmd{arg}
# Kinda tricky, since we need to get the expansion of \cmd as the value of \newcmd
diff --git a/t/expansion/urls.xml b/t/expansion/urls.xml
index 61d89a5dae..24988c9c3e 100644
--- a/t/expansion/urls.xml
+++ b/t/expansion/urls.xml
@@ -22,7 +22,7 @@
1st item
- [https://example.com/wordword];
+ [https://example.com/wordword];
-
@@ -31,7 +31,7 @@
2nd item
-
[https://example.com/~User];
+ [https://example.com/~User];
-
@@ -40,7 +40,7 @@
3rd item
-
[https://example.com/someplace#id];
+ [https://example.com/someplace#id];
-
@@ -49,7 +49,7 @@
4th item
-
[https://example.com/user@password];
+ [https://example.com/user@password];
-
@@ -58,7 +58,7 @@
5th item
-
[https://example.com/user¶m];
+ [https://example.com/user¶m];
-
@@ -67,7 +67,7 @@
6th item
-
[https://example.com/with_underscore];
+ [https://example.com/with_underscore];
-
@@ -76,7 +76,7 @@
7th item
-
[https://example.com/with^caret];
+ [https://example.com/with^caret];
-
@@ -85,7 +85,7 @@
8th item
-
[https://example.com/with$dollar];
+ [https://example.com/with$dollar];
-
@@ -94,7 +94,7 @@
9th item
-
[https://example.com/encoded%code].
+ [https://example.com/encoded%code].
@@ -110,7 +110,7 @@
1st item
- [https://example.com/someplace\#id];
+ [https://example.com/someplace\#id];
-
@@ -119,7 +119,7 @@
2nd item
-
[https://example.com/with.{}braces];
+ [https://example.com/with.{}braces];
-
@@ -128,7 +128,7 @@
3rd item
-
[https://example.com/withslash\&ersand];
+ [https://example.com/withslash\&ersand];
-
@@ -137,7 +137,7 @@
4th item
-
[https://example.com/withslash\_underscore];
+ [https://example.com/withslash\_underscore];
-
@@ -146,7 +146,7 @@
5th item
-
[https://example.com/withslash\^caret];
+ [https://example.com/withslash\^caret];
-
@@ -155,7 +155,7 @@
6th item
-
[https://example.com/withslash\$dollar];
+ [https://example.com/withslash\$dollar];
-
@@ -164,7 +164,7 @@
7th item
-
[https://example.com/withslash\%percent];
+ [https://example.com/withslash\%percent];
-
@@ -173,7 +173,7 @@
8th item
-
[https://example.com/unexpandedmacro.\macro].
+ [https://example.com/unexpandedmacro.\macro].
@@ -185,42 +185,42 @@
1
1
footnote 1
- [https://example.com/wordword]
+ [https://example.com/wordword]
2
2
footnote 2
- [https://example.com/~User]
+ [https://example.com/~User]
3
3
footnote 3
- [https://example.com/someplace#id]
+ [https://example.com/someplace#id]
4
4
footnote 4
- [https://example.com/user@password]
+ [https://example.com/user@password]
5
5
footnote 5
- [https://example.com/user¶m]
+ [https://example.com/user¶m]
6
6
footnote 6
- [https://example.com/with_underscore]
+ [https://example.com/with_underscore]
7
7
footnote 7
- [https://example.com/with^caret]
+ [https://example.com/with^caret]
8
8
footnote 8
- [https://example.com/with$dollar]
+ [https://example.com/with$dollar]
Probably unexpected URLs in footnotes
@@ -228,12 +228,12 @@
9
9
footnote 9
- [https://example.com/lostpercent]
+ [https://example.com/lostpercent]
10
10
footnote 10
- [https://example.com/slash\%percent]
+ [https://example.com/slash\%percent]
diff --git a/t/tokenize/percent.xml b/t/tokenize/percent.xml
index fb788ac6fc..cdd43b1086 100644
--- a/t/tokenize/percent.xml
+++ b/t/tokenize/percent.xml
@@ -17,17 +17,17 @@
In section
- [http://foo.com/dont%00ignorethis]
+
[http://foo.com/dont%00ignorethis]
and
-[http://foo.com/dont%00ignorethis]
+[http://foo.com/dont%00ignorethis]
However
-[http://foo.com/dont] ignore this either
+[http://foo.com/dont] ignore this either
- Consider http://\host/page versus [http://\host/page].
+ Consider http://\host/page versus [http://\host/page].
- Consider a_b versus [a_b].
+ Consider a_b versus [a_b].
When delimited, extra braces are allowed by verb, but not url.
diff --git a/t/tokenize/url.xml b/t/tokenize/url.xml
index faf967622d..9309697a40 100644
--- a/t/tokenize/url.xml
+++ b/t/tokenize/url.xml
@@ -13,10 +13,10 @@
1Basic URLS
- Basic url: [http://example.com/~user] or [http://example.com/~user].
+ Basic url: [http://example.com/~user] or [http://example.com/~user].
- Path url: [/foo/bar/baz] or [/foo/bar/baz]
+ Path url: [/foo/bar/baz] or [/foo/bar/baz]
@@ -28,16 +28,16 @@
2Verbatimness
Special characters neutralized:
-[http://example.com/foo_bar],
-[http://example.com/foo#bar],
-[http://example.com/foo&bar].
+[http://example.com/foo_bar],
+[http://example.com/foo#bar],
+[http://example.com/foo&bar].
And even though \baz gives index.html;
-[C:\foo\bar\baz]
+[C:\foo\bar\baz]
- OTOH, you get this: [http://example.com/\~{}user]
+ OTOH, you get this: [http://example.com/\~{}user]
@@ -48,7 +48,7 @@
3Styles
- Email: [myself%node@gateway.net] or [myself%node@gateway.net].
+ Email: [myself%node@gateway.net] or [myself%node@gateway.net].
@@ -59,7 +59,7 @@
4Defined urls
- Myself: [myself%node@gateway.net] or [myself%node@gateway.net].
+ Myself: [myself%node@gateway.net] or [myself%node@gateway.net].
@@ -70,7 +70,7 @@
5Bracketting
- Fancy url: [<url: http://example.com/~user>] or [<url: http://example.com/~user>].
+ Fancy url: [<url: http://example.com/~user>] or [<url: http://example.com/~user>].