diff --git a/MANIFEST b/MANIFEST index e35f5da70..6dfd9301f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -312,19 +312,51 @@ lib/LaTeXML/resources/Profiles/math.opt lib/LaTeXML/resources/Profiles/modern.opt lib/LaTeXML/resources/Profiles/standard.opt +#================================================== +# Supported Engines/Formats +#================================================== +lib/LaTeXML/Engine/AmSTeX.pool.ltxml +lib/LaTeXML/Engine/BibTeX.pool.ltxml +lib/LaTeXML/Engine/Base_Schema.pool.ltxml +lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml +lib/LaTeXML/Engine/Base_XMath.pool.ltxml +lib/LaTeXML/Engine/Base_Utility.pool.ltxml +lib/LaTeXML/Engine/Base_Deprecated.pool.ltxml +lib/LaTeXML/Engine/TeX.pool.ltxml +lib/LaTeXML/Engine/TeX_Box.pool.ltxml +lib/LaTeXML/Engine/TeX_Character.pool.ltxml +lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml +lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml +lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml +lib/LaTeXML/Engine/TeX_Glue.pool.ltxml +lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml +lib/LaTeXML/Engine/TeX_Inserts.pool.ltxml +lib/LaTeXML/Engine/TeX_Job.pool.ltxml +lib/LaTeXML/Engine/TeX_Kern.pool.ltxml +lib/LaTeXML/Engine/TeX_Logic.pool.ltxml +lib/LaTeXML/Engine/TeX_Macro.pool.ltxml +lib/LaTeXML/Engine/TeX_Marks.pool.ltxml +lib/LaTeXML/Engine/TeX_Math.pool.ltxml +lib/LaTeXML/Engine/TeX_Page.pool.ltxml +lib/LaTeXML/Engine/TeX_Paragraph.pool.ltxml +lib/LaTeXML/Engine/TeX_Penalties.pool.ltxml +lib/LaTeXML/Engine/TeX_Registers.pool.ltxml +lib/LaTeXML/Engine/TeX_Tables.pool.ltxml +lib/LaTeXML/Engine/plain.pool.ltxml +lib/LaTeXML/Engine/LaTeX.pool.ltxml +lib/LaTeXML/Engine/eTeX.pool.ltxml +lib/LaTeXML/Engine/expl3.pool.ltxml +lib/LaTeXML/Engine/pdfTeX.pool.ltxml + #================================================== # Supported Packages #================================================== lib/LaTeXML/Package.pm -lib/LaTeXML/Package/AmSTeX.pool.ltxml -lib/LaTeXML/Package/BibTeX.pool.ltxml lib/LaTeXML/Package/JHEP.cls.ltxml lib/LaTeXML/Package/JHEP2.cls.ltxml lib/LaTeXML/Package/JHEP3.cls.ltxml -lib/LaTeXML/Package/LaTeX.pool.ltxml lib/LaTeXML/Package/OmniBus.cls.ltxml lib/LaTeXML/Package/PoS.cls.ltxml -lib/LaTeXML/Package/TeX.pool.ltxml lib/LaTeXML/Package/a0poster.cls.ltxml lib/LaTeXML/Package/a0size.sty.ltxml lib/LaTeXML/Package/a4.sty.ltxml @@ -449,7 +481,6 @@ lib/LaTeXML/Package/dsfont.sty.ltxml lib/LaTeXML/Package/empheq.sty.ltxml lib/LaTeXML/Package/endnotes.sty.ltxml lib/LaTeXML/Package/english.sty.ltxml -lib/LaTeXML/Package/eTeX.pool.ltxml lib/LaTeXML/Package/ellipsis.sty.ltxml lib/LaTeXML/Package/elsart.cls.ltxml lib/LaTeXML/Package/elsart.sty.ltxml @@ -460,7 +491,6 @@ lib/LaTeXML/Package/ed.sty.ltxml lib/LaTeXML/Package/expl3.sty.ltxml lib/LaTeXML/Package/expl3.ltx.ltxml lib/LaTeXML/Package/expl3.lua.ltxml -lib/LaTeXML/Package/expl3.pool.ltxml lib/LaTeXML/Package/emulateapj.cls.ltxml lib/LaTeXML/Package/emulateapj.sty.ltxml lib/LaTeXML/Package/emulateapj5.sty.ltxml @@ -623,7 +653,6 @@ lib/LaTeXML/Package/overpic.sty.ltxml lib/LaTeXML/Package/palatino.sty.ltxml lib/LaTeXML/Package/paralist.sty.ltxml lib/LaTeXML/Package/parskip.sty.ltxml -lib/LaTeXML/Package/pdfTeX.pool.ltxml lib/LaTeXML/Package/pdflscape.sty.ltxml lib/LaTeXML/Package/pdfpages.sty.ltxml lib/LaTeXML/Package/pdfsync.sty.ltxml diff --git a/lib/LaTeXML/Core.pm b/lib/LaTeXML/Core.pm index 7052275ef..435949aa4 100644 --- a/lib/LaTeXML/Core.pm +++ b/lib/LaTeXML/Core.pm @@ -279,6 +279,7 @@ sub initializeState { $preload .= '.' . $type; } LaTeXML::Package::InputDefinitions($preload, type => $type, + ($type eq 'pool' ? (installation_subdir => 'Engine') : ()), handleoptions => $handleoptions, options => $options); } $state->assignValue('InitialPreloads' => undef, 'global'); diff --git a/lib/LaTeXML/Core/Stomach.pm b/lib/LaTeXML/Core/Stomach.pm index 3c2548a7b..24bfcf85e 100644 --- a/lib/LaTeXML/Core/Stomach.pm +++ b/lib/LaTeXML/Core/Stomach.pm @@ -213,7 +213,7 @@ INVOKE: sub makeMisdefinedError { my (@objects) = @_; - return LaTeXML::Core::Whatsit->new($STATE->lookupDefinition(T_CS('\@ERROR')), + return LaTeXML::Core::Whatsit->new($STATE->lookupDefinition(T_CS('\lx@ERROR')), ['misdefined', join('', map { ToString($_); } @objects)], font => $STATE->lookupValue('font'), ); } diff --git a/lib/LaTeXML/Package/AmSTeX.pool.ltxml b/lib/LaTeXML/Engine/AmSTeX.pool.ltxml similarity index 100% rename from lib/LaTeXML/Package/AmSTeX.pool.ltxml rename to lib/LaTeXML/Engine/AmSTeX.pool.ltxml diff --git a/lib/LaTeXML/Engine/Base_Deprecated.pool.ltxml b/lib/LaTeXML/Engine/Base_Deprecated.pool.ltxml new file mode 100644 index 000000000..50e025677 --- /dev/null +++ b/lib/LaTeXML/Engine/Base_Deprecated.pool.ltxml @@ -0,0 +1,95 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | Base_Deprecated | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +sub Deprecate { + my ($source, $deprecated, $better) = @_; + # Eventually, just Warn; and in fact, Warn ONCE only + $deprecated = ToString($deprecated); + if (!LookupMapping('DEPRECATED', $deprecated)) { + AssignMapping('DEPRECATED', $deprecated => 1); + Warn('deprecated', $deprecated, $source, + "The command $deprecated has been deprecated; Please use " . ToString($better)); } + return; } + +DefMacro('\lx@DEPRECATE{}{}', \&Deprecate); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +DefMacro('\@ERROR', + '\lx@DEPRECATE{\@ERROR}{\lx@ERROR}\lx@ERROR'); + +DefMacro('\normal@par', + '\lx@DEPRECATE{\normnal@par}{\lx@normal@par}\lx@normal@par'); +DefMacro('\inner@par', + '\lx@DEPRECATE{\inner@par}{\lx@normal@par}\lx@normal@par'); # Obsolete, but in case still used... + +#---------------------------------------------------------------------- +# This group should be renamed to \lx@somethings and deprecated +# NOTE: work through this systematically! +DefMacro('\FCN{}', + '\lx@DEPRECATE{\Fcn}{\lx@wrap}{\lx@wrap[role=FUNCTION]{#1}'); +DefMacro('\ROLE{}{}', + '\lx@DEPRECATE{\ROLE}{\lx@wrap}\lx@wrap[role={#1}]{#2}'); +DefMacro('\@SYMBOL{}', + '\lx@DEPRECATE{\@SYMBOL}{\lx@wrap}\lx@wrap[role=ID]{#1}'); +DefMacro('\@CSYMBOL{}', + '\lx@DEPRECATE{\@CSYMBOL}{\lx@symbol}\lx@symbol[meaning={#1}]{}'); +DefMacro('\@APPLY{}', + '\lx@DEPRECATE{\@APPLY}{\lx@apply}\lx@apply[]{#1}{}'); # Sorta broken? +DefMacro('\@MAYBEAPPLY{}{}', + '\lx@DEPRECATE{\@MAYBEAPPLY}{\lx@apply}\ifx.#2.#1\else\lx@apply{#1}{#2}\fi'); +DefMacro('\@WRAP{}', + '\lx@DEPRECATE{\@WRAP}{\lx@wrap}\lx@wrap[]{#1}'); +DefMacro('\@TOKEN{}', + '\lx@DEPRECATE{\@TOKEN}{\lx@symbol}\lx@symbol[name={#1}]{}'); +DefMacro('\@SUPERSCRIPT{}{}', + '\lx@DEPRECATE{\@SUPERSCRIPT}{\lx@superscipt}\ifx.#2.#1\else\lx@superscript[]{#1}{#2}\fi'); +DefMacro('\@SUBSCRIPT{}{}', + '\lx@DEPRECATE{\@SUBSCRIPT}{\lx@subscript}\ifx.#2.#1\else\lx@subscript[]{#1}{#2}\fi'); +# Let('\@PADDED', '\lx@padded'); +# Let('\DUAL', '\lx@dual'); +# Let('\@XMArg', '\lx@xmarg'); +# Let('\@XMRef', '\lx@xmref'); +# Let('\@APPLYFUNCTION', '\lx@ApplyFunction'); +# Let('\@INVISIBLETIMES', '\lx@InvisibleTimes'); +# Let('\@INVISIBLECOMMA', '\lx@InvisibleComma'); +# Let('\@INVISIBLEPLUS', '\lx@InvisiblePlus'); + +DefMacro('\@PADDED', + '\lx@DEPRECATE{\@PADDED}{\lx@padded}\lx@padded'); +DefMacro('\DUAL', + '\lx@DEPRECATE{\DUAL}{\lx@dual}\lx@dual'); +DefMacro('\@XMArg', + '\lx@DEPRECATE{\@XMArg}{\lx@xmarg}\lx@xmarg'); +DefMacro('\@XMRef', + '\lx@DEPRECATE{\@XMRef}{\lx@xmref}\lx@xmref'); +DefMacro('\@APPLYFUNCTION', + '\lx@DEPRECATE{\@APPLYFUNCTION}{\lx@ApplyFunction}\lx@ApplyFunction'); +DefMacro('\@INVISIBLETIMES', + '\lx@DEPRECATE{\@INVISIBLETIMES}{\lx@InvisibleTimes}\lx@InvisibleTimes'); +DefMacro('\@INVISIBLECOMMA', + '\lx@DEPRECATE{\@INVISIBLECOMMA}{\lx@InvisibleComma}\lx@InvisibleComma'); +DefMacro('\@INVISIBLEPLUS', + '\lx@DEPRECATE{\@INVISIBLEPLUS}{\lx@InvisiblePlus}\lx@InvisiblePlus'); + +# End of stuff to be deprecated. +#---------------------------------------------------------------------- + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; + diff --git a/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml b/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml new file mode 100644 index 000000000..764bc8c6f --- /dev/null +++ b/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml @@ -0,0 +1,634 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | Base_ParameterTypes | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#====================================================================== +# Define parsers for standard parameter types. + +DefParameterType('Plain', sub { + my ($gullet, $inner) = @_; + my $value = $gullet->readArg(); + if ($inner) { + ($value) = $inner->reparseArgument($gullet, $value); } + $value; }, + reversion => sub { + my ($arg, $inner) = @_; + (T_BEGIN, + ($inner ? $inner->revertArguments($arg) : Revert($arg)), + T_END); }); + +DefParameterType('DefPlain', sub { + my ($gullet, $inner) = @_; + my $value = $gullet->readBalanced(0, 1, 1); + if ($inner) { + ($value) = $inner->reparseArgument($gullet, $value); } + return $value; }, + reversion => sub { + my ($arg, $inner) = @_; + (T_BEGIN, + ($inner ? $inner->revertArguments($arg) : Revert($arg)), + T_END); }); + +DefParameterType('Optional', sub { + my ($gullet, $default, $inner) = @_; + my $value = $gullet->readOptional; + if (!$value && $default) { + $value = $default; } + elsif ($inner) { + ($value) = $inner->reparseArgument($gullet, $value); } + $value; }, + optional => 1, + reversion => sub { + my ($arg, $default, $inner) = @_; + my @rev_arg = $arg ? ( + $inner ? $inner->revertArguments($arg) : Revert($arg)) + : (); + if (@rev_arg) { + return (T_OTHER('['), @rev_arg, T_OTHER(']')); } + else { return (); } }); + +# This is a peculiar type of argument of the form +# = { +# however, does get expanded while searching for the initial { +# which IS required in contrast to a general argument; ie a single token is not correct. +DefParameterType('GeneralText', sub { + my ($gullet) = @_; + $gullet->unread($gullet->readXToken); # Force expansion to skip before required { + + return $gullet->readBalanced(0, 0, 1); }); + +DefParameterType('Until', sub { + my ($gullet, $until) = @_; + $gullet->readUntil($until); }, + reversion => sub { + my ($arg, $until) = @_; + (Revert($arg), Revert($until)); }); + +# Skip any spaces, but don't contribute an argument. +DefParameterType('SkipSpaces', sub { $_[0]->skipSpaces; 1; }, novalue => 1); + +DefParameterType('Skip1Space', sub { $_[0]->skip1Space; 1; }, novalue => 1); + +# Read the next token +DefParameterType('Token', sub { $_[0]->readToken; }); + +# Read the next token, after expanding any expandable ones. +DefParameterType('XToken', sub { $_[0]->readXToken; }); + +# Read a number +DefParameterType('Number', sub { $_[0]->readNumber; }); + +# Read a floating point number +DefParameterType('Float', sub { $_[0]->readFloat; }); + +sub ReadFloat { + my ($gullet) = @_; + $gullet->skipSpaces; + return ($gullet->readFloat || Float(0)); } + +# Read a dimension +DefParameterType('Dimension', sub { $_[0]->readDimension; }); + +# Read a Glue (aka skip) +DefParameterType('Glue', sub { $_[0]->readGlue; }); + +# Read a MuDimension (math) +DefParameterType('MuDimension', sub { $_[0]->readMuDimension; }); + +# Read a MuGlue (math) +DefParameterType('MuGlue', sub { $_[0]->readMuGlue; }); + +# Read until the next (balanced) open brace { +# used for the last TeX-style delimited argument +DefParameterType('UntilBrace', sub { + my ($gullet) = @_; + $gullet->readUntilBrace; }); + +# Yet another special case: Require a { but do not read it!!! +DefParameterType('RequireBrace', sub { + my ($gullet) = @_; + if (my $tok = $gullet->readToken) { + $gullet->unread($tok); + if ($tok->getCatcode != CC_BEGIN) { + Error('expected', '{', $gullet, "Expected a { here; Got " . Stringify($tok)); } + $tok; } }, + novalue => 1); + +DefParameterType('XUntil', sub { + my ($gullet, $until) = @_; + ($until) = $until->unlist; # Make sure it's a single token!!! + my ($token, @tokens) = (); + while ($token = $gullet->readXToken(0)) { + if ($token->equals($until)) { + last; } + elsif ($token->getCatcode == CC_BEGIN) { + push(@tokens, $token, $gullet->readBalanced, T_END); } + elsif (my $defn = LookupDefinition($token)) { + push(@tokens, Invocation($token, + ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ()))); } + else { + push(@tokens, $token); } } + Tokens(@tokens); }); + +# This reads a braced tokens list, expanding as it goes, +# but expanding \the-like commands only once. +DefParameterType('Expanded', sub { + my ($gullet) = @_; + $gullet->readBalanced(1, 0, 1); }, + reversion => sub { + my ($arg) = @_; + (T_BEGIN, Revert($arg), T_END); }); + +# This reads an expanded definition body, +# a braced tokens list, expanding as it goes, +# but expanding \the-like commands only once, +# and also packing # parameters +DefParameterType('DefExpanded', sub { + my ($gullet) = @_; + return $gullet->readBalanced(1, 1, 1); }, + reversion => sub { + my ($arg) = @_; + (T_BEGIN, Revert($arg), T_END); }); + +# Read a matching keyword, eg. Match:= +DefParameterType('Match', sub { shift->readMatch(@_); }); + +# Read a keyword; eg. Keyword:to +# (like Match, but ignores catcodes) +DefParameterType('Keyword', sub { shift->readKeyword(@_); }); + +# Read balanced material (?) +DefParameterType('Balanced', sub { $_[0]->readBalanced; }); + +# Read a Semiverbatim argument; ie w/ most catcodes neutralized. +DefParameterType('Semiverbatim', sub { $_[0]->readArg; }, semiverbatim => 1, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# Read a LaTeX-style optional argument (ie. in []), but the contents read as Semiverbatim. +DefParameterType('OptionalSemiverbatim', sub { $_[0]->readOptional; }, + semiverbatim => 1, optional => 1, + reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); + +# Be careful here: if % appears before the initial {, it's still a comment! +# Also, note that non-typewriter fonts will mess up some chars on digestion! +DefParameterType('Verbatim', sub { + my ($gullet) = @_; + $gullet->readUntil(T_BEGIN); + StartSemiverbatim('%', '\\'); + my $arg = $gullet->readBalanced(); + EndSemiverbatim(); + return $arg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(family => 'typewriter'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# Read Verbatim, but allows expanding command sequences +DefParameterType('HyperVerbatim', sub { + my ($gullet) = @_; + $gullet->readUntil(T_BEGIN); + StartSemiverbatim('%'); + DefMacroI('\%', undef, T_OTHER('%'), scope => 'local'); + DefMacroI('\#', undef, T_OTHER('#'), scope => 'local'); + DefMacroI('\&', undef, T_OTHER('&'), scope => 'local'); + DefMacroI('\textunderscore', undef, T_OTHER('_'), scope => 'local'); + Let('\_', '\textunderscore'); + DefMacroI('\hyper@tilde', undef, T_OTHER('~'), scope => 'local'); + Let('\~', '\hyper@tilde'); + Let('\textasciitilde', '\hyper@tilde'); + Let('\\\\', '\@backslashchar'); + my $arg = $gullet->readBalanced(1); + EndSemiverbatim(); + return $arg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(family => 'typewriter'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# Read an argument that will not be digested. +DefParameterType('Undigested', sub { $_[0]->readArg; }, undigested => 1, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# Read a LaTeX-style optional argument (ie. in []), but it will not be digested. +DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; }, + undigested => 1, optional => 1, + reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); + +# Read a keyword value (KeyVals), that will not be digested. +DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1); +DefParameterType('UndigestedDefKey', sub { + $_[0]->readArg->packParameters; }, undigested => 1); + +# Read a token as used when defining it, ie. it may be enclosed in braces. +DefParameterType('DefToken', sub { + my ($gullet) = @_; + my $token = $gullet->readToken; + while ($token && ($token->getCatcode == CC_BEGIN)) { + my $cc; + my @toks = grep { ($cc = $$_[1]) && ($cc != CC_SPACE) && ($cc != CC_COMMENT); } + $gullet->readBalanced->unlist; + $token = shift(@toks); + $gullet->unread(@toks); } + $token; }, + undigested => 1); + +# Stub register for misdefinitions, to avoid a cascade of Errors. +DefRegisterI('\lx@DUMMY@REGISTER', undef, Tokens()); + +# Read a variable, ie. a token (after expansion) that is a writable register. +DefParameterType('Variable', sub { + my ($gullet) = @_; + my $token = $gullet->readXToken; + my $defn = $token && LookupDefinition($token); + if ((defined $defn) && $defn->isRegister && !$defn->isReadonly) { + [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; } + else { + DefRegisterI($token, undef, Dimension(0)); # Don't really know what KIND of variable! + if ($token && ($token->getCatcode == CC_CS)) { + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($token), + "Defining it now."); + DefRegisterI($token, undef, Dimension(0)); # Dimension, or what? + return [LookupDefinition($token)]; } + else { + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($token), + "But it is not even definable."); + return [LookupDefinition(T_CS('\lx@DUMMY@REGISTER'))]; } } }, + reversion => sub { + my ($var) = @_; + my ($defn, @args) = @$var; + my $params = $defn->getParameters; + return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); + +# Same, but not necessarily writable +DefParameterType('Register', sub { + my ($gullet) = @_; + my $token = $gullet->readXToken; + my $defn = $token && LookupDefinition($token); + if ((defined $defn) && $defn->isRegister) { + [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; } + else { + if ($token && ($token->getCatcode == CC_CS)) { + if ($token->getString eq '\font') { + # \font is a bit of a register-like exception + return [$defn]; } + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($token), + "Defining it now."); + DefRegisterI($token, undef, Dimension(0)); # Dimension, or what? + return [LookupDefinition($token)]; } + else { + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($token), + "But it is not even definable."); + return [LookupDefinition(T_CS('\lx@DUMMY@REGISTER'))]; } } }, + reversion => sub { + my ($var) = @_; + my ($defn, @args) = @$var; + my $params = $defn->getParameters; + return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); + +DefParameterType('TeXFileName', sub { + my ($gullet) = @_; + my ($token, $cc, @tokens) = (); + $gullet->skipSpaces; + while (($token = $gullet->readXToken(0)) + && (($cc = $token->getCatcode) != CC_SPACE) && ($cc != CC_EOL) && ($cc != CC_COMMENT) && ($cc != CC_CS)) { + push(@tokens, $token); } + $gullet->unread($token) unless ($cc == CC_SPACE) || ($cc == CC_EOL) || ($cc == CC_COMMENT); + # Strip outer "" ??? + if ((scalar(@tokens) > 1) && ($tokens[0]->equals(T_OTHER('"'))) && ($tokens[-1]->equals(T_OTHER('"')))) { + shift(@tokens); pop(@tokens); } + Tokens(@tokens); }); + +# A LaTeX style directory List +DefParameterType('DirectoryList', sub { + my ($gullet) = @_; + my $arg_string = ToString($gullet->readArg); + my @dirs = (); + for my $dir (split(/,|\\par|\n+/, $arg_string)) { + $dir =~ s/^\s+//; + $dir =~ s/\s+$//; + next unless $dir; + while ($dir =~ s/^\s*\{([^\}]*)\}//) { + push @dirs, $1 if $1; } + push @dirs, $dir if $dir; } + LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, itemopen => T_BEGIN, itemclose => T_END, + type => LaTeXML::Package::parseParameters(ToString("Semiverbatim"), "CommaList")->[0], + values => [@dirs]); }); + +# This reads a Box as needed by \raise, \lower, \moveleft, \moveright. +# Hopefully there are no issues with the box being digested +# as part of the reader??? +DefParameterType('MoveableBox', sub { + my ($gullet) = @_; + $gullet->skipSpaces; + my ($box, @stuff) = $STATE->getStomach->invokeToken($gullet->readXToken); + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($box)) + unless $box; +#### && $box->isa('LaTeXML::Core::Whatsit') +#### && ($box->getDefinition->getCSName =~ /^(\\hbox|\\vbox||\\vtop)$/); + $box; }); + +# Read a parenthesis delimited argument. +# Note that this does NOT balance () within the argument. +DefParameterType('BalancedParen', sub { + my ($gullet) = @_; + my $tok = $gullet->readXToken; + if (ref $tok && ToString($tok) eq '(') { + $gullet->readUntil(T_OTHER(')')); + } else { + $gullet->unread($tok) if ref $tok; + undef; } }, + reversion => sub { + (T_OTHER('('), Revert($_[0]), T_OTHER(')')); }); + +# Read a digested argument, digesting as it is being read. +# The usual macro parameter (generally written as {}) gets tokenized and digested +# in separate stages, w/o recognizing any special macros or catcode changes within (eg. \url). +# Rarely, you need a parameter that gets digested AS IT'S READ until ending }. +# Note that this also recognizes args as \bgroup ... \engroup +# It is useful when the content would usually need to have been \protect'd +# in order to correctly deal with catcodes. +# BEWARE: This is NOT a shorthand for a simple digested {}! +DefParameterType('Digested', sub { + no warnings 'recursion'; + my ($gullet) = @_; + $gullet->skipSpaces; + my $ismath = $STATE->lookupValue('IN_MATH'); + my @list = (); + my $token; + do { $token = $gullet->readXToken(0); + } while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax')))); + if (!defined $token) { } + elsif ($token->getCatcode == CC_BEGIN) { + Digest($token); + push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces + else { + push(@list, $STATE->getStomach->invokeToken($token)); } + @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; + List(@list, mode => ($ismath ? 'math' : 'text')); }, + undigested => 1, # since _already_ digested. + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# A variation: Digest until we encounter a given token! +DefParameterType('DigestUntil', sub { + my ($gullet, $until) = @_; + ($until) = $until->unlist; # Make sure it's a single token!!! + $gullet->skipSpaces; + my $ismath = $STATE->lookupValue('IN_MATH'); + my @list = $STATE->getStomach->digestNextBody($until); + @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; + List(@list, mode => ($ismath ? 'math' : 'text')); }, + undigested => 1, # since _already_ digested. + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +# Reads until the current group has ended. +# This is useful for environment-like constructs, +# particularly alignments (which may or may not be actual environments), +# but which need special treatment of some of their content +# as the expansion is carried out. +DefParameterType('DigestedBody', sub { + my ($gullet) = @_; + my $ismath = $STATE->lookupValue('IN_MATH'); + my @list = $STATE->getStomach->digestNextBody(); + # In most (all?) cases, we're really looking for a single Whatsit here... + @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; + List(@list, mode => ($ismath ? 'math' : 'text')); }, + undigested => 1); + +# In addition to the standard TeX Dimension, there are various LaTeX constructs +# (particularly, the LaTeX picture environment, and the various pstricks packages) +# that take a different sort of length. They differ in two ways. +# (1) They do not accept a comma as decimal separator +# (they generally use it to separate coordinates), and +# (2) They accept a plain float which is scaled against a Dimension register. +# Actually, there are two subcases: +# (a) picture accepts a float, which is scaled against \unitlength +# (b) pstricks accepts a float, and optionally a unit, +# If the unit is omitted, it is relative to \psxunit or \psyunit. +# How to capture these ? +## DefParameterType('Length', sub { +## my($gullet,$unit)=@_; + +# CommaList expects something like {balancedstuff,...} +DefParameterType('CommaList', sub { + my ($gullet, $type) = @_; + my $typedef = $type && LaTeXML::Package::parseParameters(ToString($type), "CommaList")->[0]; + my @items = (); + if ($gullet->ifNext(T_BEGIN)) { + $gullet->readToken; + my @tokens = (); + my $comma = T_OTHER(','); + while (my $token = $gullet->readToken) { + my $cc = $token->getCatcode; + if ($cc == CC_END) { + push(@items, Tokens(@tokens)); + last; } + elsif ($token->equals($comma)) { + push(@items, Tokens(@tokens)); @tokens = (); } + elsif ($cc == CC_BEGIN) { + push(@tokens, $token, $gullet->readBalanced, T_END); } + else { + push(@tokens, $token); } } + if ($typedef) { + @items = map { [$typedef->reparseArgument($gullet, $_)]->[0] } @items; } } + else { + # If no brace, just read one item or token, but still make Array! + push(@items, ($typedef ? $typedef->readArguments($gullet, "CommaList") + : ($gullet->readToken))); } + LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, type => $typedef, + values => [@items]); }); + +### Support for Key / Value arguments. +## The very basic form is +## RequiredKeyVals: $keyset +## OptionalKeyVals: $keyset +## to parse Key-Value pairs from a given keyset (see the 'keyval' package +## documentation for more information). These types of KeyVal +## parameters will return a LaTeXML::Core::KeyVals object, which can then be +## used to access the values of the individual items. +## The difference between the two forms is that RequiredKeyVals expects a set of +## key-value pairs wrapped in T_BEGIN T_END, where as OptionalKeyVals optionally +## expects a set of KeyValue pairs wrapped in T_OTHER('[') T_OTHER(']') +## +## Several extension of the keyval package exist, the most common one we support +## is the xkeyval package. This introduces further variations on the keyval +## arguments parsing, in particular it allows to read keys from more than one +## keyset at once. These can be specified by giving comma-seperated values in +## the keyset argument. By default, a key will only be set in the **first** +## keyset it occurs in. By using +## RequiredKeyVals+: $keysets +## OptionalKeyVals+: $keysets +## the key will be set in all keysets instead. +## +## All keys to be parsed with these arguments should be declared using +## DefKeyVal in LaTeXML::Package. By default, an error is thrown if an unknown +## key is encountered. To surpress this behaviour, and instead store all +## undefined keys, use +## RequiredKeyVals*: $keysets +## OptionalKeyVals*: $keysets +## instead. The '*' and '+' modifiers can be combined by using: +## RequiredKeyVals*+: $keysets +## OptionalKeyVals*+: $keysets +## +## Furthermore, the xkeyval package supports giving prefixes to keys, +## RequiredKeyVals[*][+]: $prefix|$keysets +## OptionalKeyVals[*][+]: $prefix|$keysets +## +## Finally, it is possible to specify specific keys to skip when digesting the +## object. This can be achieved using comma-seperated key values in +## RequiredKeyVals[*][+]: $prefix|$keysets|$skip +## OptionalKeyVals[*][+]: $prefix|$keysets|$skip + +# function to handle all the +sub KeyVals_aux { + my ($gullet, $until, $spec, %options) = @_; + my ($star, $plus, $prefix, $keysets, $skip) = @{$spec}; + + # support both "keysets" and "prefix|keysets" + unless (defined($keysets)) { + $keysets = $prefix; + $prefix = undef; + + # to emulate old behaviour, throw no errors + # when we have a single keyset and no prefix (or no keyset at all) + $star = 1 if (!defined($keysets) || index(',', $keysets) == -1); } + + # create a new set of Key-Value arguments + my $keyvals = LaTeXML::Core::KeyVals->new( + $prefix, $keysets, + setAll => $plus, setInternals => 1, + skip => $skip, skipMissing => $star); + + # and read it from the gullet + $keyvals->readFrom($gullet, $until) if defined($until); + + # we still want to make use of the hash + return $keyvals; } + +sub RequiredKeyVals { + my ($star, $plus, $gullet, @keyspec) = @_; + my $until; + + if ($gullet->ifNext(T_BEGIN)) { + $until = T_END; } + else { + Error('expected', '{', $gullet, "Missing keyval arguments"); } + + return (KeyVals_aux($gullet, $until, [$star, $plus, @keyspec])); } + +DefParameterType('RequiredKeyVals', sub { RequiredKeyVals(0, 0, @_); }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('RequiredKeyVals*', sub { RequiredKeyVals(1, 0, @_); }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('RequiredKeyVals+', sub { RequiredKeyVals(0, 1, @_); }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('RequiredKeyVals*+', sub { RequiredKeyVals(1, 1, @_); }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +sub OptionalKeyVals { + my ($star, $plus, $gullet, @keyspec) = @_; + if ($gullet->ifNext(T_OTHER('['))) { + return (KeyVals_aux($gullet, T_OTHER(']'), [$star, $plus, @keyspec])); } + else { return (undef); } } + +sub revert_optional_keyvals { + my @r = grep { defined $_ } ($_[0] && Revert($_[0])); + return @r ? (T_OTHER('['), @r, T_OTHER(']')) : (); } + +DefParameterType('OptionalKeyVals', sub { OptionalKeyVals(0, 0, @_); }, + optional => 1, reversion => \&revert_optional_keyvals); +DefParameterType('OptionalKeyVals*', sub { OptionalKeyVals(1, 0, @_); }, + optional => 1, reversion => \&revert_optional_keyvals); +DefParameterType('OptionalKeyVals+', sub { OptionalKeyVals(0, 1, @_); }, + optional => 1, reversion => \&revert_optional_keyvals); +DefParameterType('OptionalKeyVals*+', sub { OptionalKeyVals(1, 1, @_); }, + optional => 1, reversion => \&revert_optional_keyvals); + +# Not sure that this is the most elegant solution, but... +# What I'd really like are some sort of parameter modifiers, mathstyle, font... until...? +DefParameterType('DisplayStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(mathstyle => 'display'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('TextStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(mathstyle => 'text'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('ScriptStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(mathstyle => 'script'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +DefParameterType('ScriptscriptStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(mathstyle => 'scriptscript'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +# Perverse naming convention: not script style, but in the style of a script relative to current. +DefParameterType('InScriptStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(scripted => 1); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); +# NOTE: the various parameter features don't combine easily!! +# I need a ScriptStyleUntil for \root!!! +# I also need to redo fractions using these new types.... +DefParameterType('OptionalInScriptStyle', sub { + $_[0]->readOptional; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(scripted => 1); }, + afterDigest => sub { + $_[0]->egroup; }, + optional => 1, + reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); +DefParameterType('InFractionStyle', sub { + $_[0]->readArg; }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(fraction => 1); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/Base_Schema.pool.ltxml b/lib/LaTeXML/Engine/Base_Schema.pool.ltxml new file mode 100644 index 000000000..4ae8b545b --- /dev/null +++ b/lib/LaTeXML/Engine/Base_Schema.pool.ltxml @@ -0,0 +1,110 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | Base_Schema | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# NOTE that these define the namespaces we'll (probably) use +# along with the prefixes to be used in "code" +# The generated XML will use the prefixes defined by RegisterDocumentNamespace(...) (if ever) +# or those prefixes defined by the Schema (typically RelaxNGSchema(..) +RegisterNamespace(ltx => "http://dlmf.nist.gov/LaTeXML"); +RegisterNamespace(svg => "http://www.w3.org/2000/svg"); +RegisterNamespace(xlink => "http://www.w3.org/1999/xlink"); # Needed for SVG +# Not directly used, but let's stake out the ground +RegisterNamespace(m => "http://www.w3.org/1998/Math/MathML"); +RegisterNamespace(xhtml => "http://www.w3.org/1999/xhtml"); +# Namespace for arbitrary data attributes (mapped to data-xxx in html5) +RegisterNamespace(data => "http://dlmf.nist.gov/LaTeXML/data"); + +# This is used for plain TeX, but needs to be undone for LaTeX (or...)! +RelaxNGSchema("LaTeXML"); +Tag('ltx:section', autoClose => 1); +Tag('ltx:document', autoClose => 1, autoOpen => 1); +Tag('ltx:document', afterOpen => sub { + my ($document, $root) = @_; + if (my $font = $document->getNodeFont($root)) { + if (my $bg = $font->getBackground) { + if ($bg ne 'white') { + $document->setAttribute($root, backgroundcolor => $bg); } } } }); +#====================================================================== + +DefMacroI("\\\@empty", undef, Tokens()); + +#====================================================================== +# Core ID functionality. +#====================================================================== +# DOCUMENTID is the ID of the document +# AND prefixes IDs on all other elements. +if (my $docid = LookupValue('DOCUMENTID')) { + # Wrap in T_OTHER so funny chars don't screw up (no space!) + DefMacroI('\thedocument@ID', undef, T_OTHER($docid)); } +else { + Let('\thedocument@ID', '\@empty'); } +NewCounter('@XMARG', 'document', idprefix => 'XM'); + +#====================================================================== + +Tag('ltx:document', afterOpen => \&ProcessPendingResources); +RequireResource('LaTeXML.css'); +#====================================================================== +# The default "initial context" for XML+RDFa specifies some default +# terms and prefixes, but no default vocabulary. +# Ought to have a default for @vocab, but settable? +# can we detect use of simple "term"s in attributes so we know whether we need @vocab? +# Ought to have a default set of prefixes from RDFa Core, +# but allow prefixes to be added. +# Probably ought to scan rdf attributes for all uses of prefixes, +# and include them in @prefix +# The following prefixes are listed in http://www.w3.org/2011/rdfa-context/rdfa-1.1 +{ + my %rdf_prefixes = ( + "cc" => "http://creativecommons.org/ns#", + "ctag" => "http://commontag.org/ns#", + "dc" => "http://purl.org/dc/terms/", + "dcterms" => "http://purl.org/dc/terms/", + "ical" => "http://www.w3.org/2002/12/cal/icaltzd#", + "foaf" => "http://xmlns.com/foaf/0.1/", + "gr" => "http://purl.org/goodrelations/v1#", + "grddl" => "http://www.w3.org/2003/g/data-view#", + "ma" => "http://www.w3.org/ns/ma-ont#", + "og" => "http://ogp.me/ns#", + "owl" => "http://www.w3.org/2002/07/owl#", + "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfa" => "http://www.w3.org/ns/rdfa#", + "rdfs" => "http://www.w3.org/2000/01/rdf-schema#", + "rev" => "http://purl.org/stuff/rev#", + "rif" => "http://www.w3.org/2007/rif#", + "rr" => "http://www.w3.org/ns/r2rml#", + "schema" => "http://schema.org/", + "sioc" => "http://rdfs.org/sioc/ns#", + "skos" => "http://www.w3.org/2004/02/skos/core#", + "skosxl" => "http://www.w3.org/2008/05/skos-xl#", + "v" => "http://rdf.data-vocabulary.org/#", + "vcard" => "http://www.w3.org/2006/vcard/ns#", + "void" => "http://rdfs.org/ns/void#", + "xhv" => "http://www.w3.org/1999/xhtml/vocab#", + "xml" => "http://www.w3.org/XML/1998/namespace", + "xsd" => "http://www.w3.org/2001/XMLSchema#", + "wdr" => "http://www.w3.org/2007/05/powder#", + "wdrs" => "http://www.w3.org/2007/05/powder-s#", + ); + + foreach my $p (keys %rdf_prefixes) { + AssignMapping('RDFa_prefixes', $p => $rdf_prefixes{$p}); } +} + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/Base_Utility.pool.ltxml b/lib/LaTeXML/Engine/Base_Utility.pool.ltxml new file mode 100644 index 000000000..cc147aa30 --- /dev/null +++ b/lib/LaTeXML/Engine/Base_Utility.pool.ltxml @@ -0,0 +1,462 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | Base_Utility | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#********************************************************************** +# LaTeX has a very particular notion of "Undefined", +# so let's get that squared away at the outset; it's useful for TeX, too! +# Naturally, it uses \csname to check, which ends up DEFINING the possibly undefined macro as \relax +DefMacro('\@ifundefined{}{}{}', sub { + my ($gullet, $name, $if, $else) = @_; + my $cs = T_CS('\\' . ToString(Expand($name))); + if (IsDefined($cs)) { + return $else->unlist; } + else { + $STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssign + return $if->unlist; } }, + locked => 1); + +sub isDefinable { + my ($token) = @_; + return unless $token; + my $meaning = LookupMeaning($token); + my $name = $token->getString; $name =~ s/^\\//; + return (((!defined $meaning) || ($meaning eq LookupMeaning(T_CS('\relax'))) + || LookupValue('2.09_COMPATIBILITY')) # Let redefinitions happen in compatibility mode. + && (($name ne 'relax') && ($name !~ /^end/))); } + +#====================================================================== + +DefPrimitiveI('\lx@ignorehardspaces', undef, sub { + my ($stomach) = @_; + my $gullet = $stomach->getGullet; + my ($token, @boxes); + while (($token = $gullet->readXToken) && (@boxes = $stomach->invokeToken($token))) { + while (@boxes && $boxes[0]->getProperty('isSpace')) { + shift(@boxes); } + last if @boxes; } + return @boxes; }); + +#====================================================================== + +sub aligningEnvironment { + my ($align, $class, $document, %props) = @_; + map { setAlignOrClass($document, $_, $align, $class) } + insertBlock($document, $props{body}); # Add class attribute to new nodes. + return; } + +# should be obsolete!!! +sub addClass { + my ($node, $class) = @_; + if ($node && $class && ($node->nodeType == XML_ELEMENT_NODE)) { + if ($node->hasAttribute('class')) { + $node->setAttribute(class => $node->getAttribute('class') . ' ' . $class); } + else { + $node->setAttribute(class => $class); } } + return; } + +DefConstructor('\@ADDCLASS Semiverbatim', sub { + $_[0]->addClass($_[0]->getElement, ToString($_[1])); }, + sizer => 0); + +sub setAlignOrClass { + my ($document, $node, $align, $class) = @_; + my $model = $document->getModel; + my $qname = $model->getNodeQName($node); + if ($qname eq 'ltx:tag') { } # HACK + elsif ($align && $document->canHaveAttribute($qname, 'align')) { + $node->setAttribute(align => $align); } + elsif ($class && $document->canHaveAttribute($qname, 'class')) { + $document->addClass($node, $class); } + return; } + +#====================================================================== +# A random collection of Tokens utility functions. +# [probably should be exported from Tokens.pm ?] +# [maybe need to do some reorganization?] +# Since this is used for textual tokens, typically to split author lists, +# we don't split within braces or math +sub SplitTokens { + my ($tokens, @delims) = @_; + my @items = (); + my @toks = (); + if ($tokens) { + my @tokens = $tokens->unlist; + my $t; + while ($t = shift(@tokens)) { + if (grep { Equals($t, $_) } @delims) { + push(@items, [@toks]); @toks = (); } + elsif ($t->defined_as(T_BEGIN)) { + push(@toks, $t); + my $level = 1; + while ($level && defined($t = shift(@tokens))) { + my $cc = $t->getCatcode; + $level++ if $cc == CC_BEGIN; + $level-- if $cc == CC_END; + push(@toks, $t); } } + elsif ($t->defined_as(T_MATH)) { + push(@toks, $t); + while (defined($t = shift(@tokens))) { + my $cc = $t->getCatcode; + push(@toks, $t); + last if $cc == CC_MATH; } } + else { + push(@toks, $t); } } } + return (@items, [@toks]); } + +sub andSplit { + my ($cs, $tokens) = @_; + return map { ($cs, T_BEGIN, @$_, T_END) } SplitTokens($tokens, T_CS('\and')); } + +sub orNull { + return (grep { defined } @_) ? @_ : undef; } + +# Inverse operation +sub JoinTokens { + my ($conjunction, @things) = @_; + if (!@things) { return (); } + my @result = (shift(@things)); + while (my $thing = shift(@things)) { + push(@result, $conjunction, $thing); } + return Tokens(@result); } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# General support for Front Matter. +# Not (yet) used by TeX (finish plain?) +# But provides support for LaTeX (and other formats?) for handling frontmatter. +# +# The idea is to accumulate any frontmatter material (title, author,...) +# rather than directly drop it into the digested stream. +# When we begin constructing the document, all accumulated material is output. +# See LaTeX.ltxml for usage. +# Note: could be circumstances where you'd want modular frontmatter? +# (ie. frontmatter for each sectional unit) +AssignValue(frontmatter => {}, 'global'); + +DefConditionalI('\if@in@preamble', undef, sub { LookupValue('inPreamble'); }); + +# Add a new frontmatter item that will be enclosed in <$tag %attr>... +# The content is the result of digesting $tokens. +# \@add@frontmatter[keys]{tag}[attributes]{content} +# keys can have +# replace (to replace the current entry, if any) +# ifnew (only add if no previous entry) +DefPrimitive('\@add@frontmatter OptionalKeyVals {} OptionalKeyVals {}', sub { + my ($stomach, $keys, $tag, $attr, $tokens) = @_; + # Digest this as if we're already in the document body! + my $frontmatter = LookupValue('frontmatter'); + my $inpreamble = LookupValue('inPreamble'); + AssignValue(inPreamble => 0); + # Be careful since the contents may also want to add frontmatter + # (which should be inside or after this one!) + # So, we append this entry before digesting + $tag = ToString($tag); + if ($keys && $keys->hasKey('replace') && $$frontmatter{$tag}) { # if replace and previous entries + $$frontmatter{$tag} = []; } # Remove previous entries + if ($keys && $keys->hasKey('ifnew') && $$frontmatter{$tag}) { # if ifnew and previous entries + return; } # Skip this one. + my $entry = [$tag, undef, 'to-be-filled-in']; + push(@{ $$frontmatter{$tag} }, $entry); + if ($attr) { + $$entry[1] = { $attr->beDigested($stomach)->getHash }; } + $$entry[2] = Digest(Tokens(T_BEGIN, $tokens, T_END)); + AssignValue(inPreamble => $inpreamble); + return; }, + beforeDigest => sub { + $_[0]->bgroup; }, + afterDigest => sub { + $_[0]->egroup; }); + +# Append a piece of data to an existing frontmatter item that is contained in <$tag> +# If $label is given, look for an item which has label=>$label, +# otherwise, just append to the last item in $tag. + +# \@add@to@frontmatter{tag}[label]{content} +DefPrimitive('\@add@to@frontmatter {} [] {}', sub { + my ($stomach, $tag, $label, $tokens) = @_; + $tag = ToString($tag); + $label = ToString($label) if $label; + my $frontmatter = LookupValue('frontmatter'); + + my $inpreamble = LookupValue('inPreamble'); + AssignValue(inPreamble => 0); + my $datum = Digest(Tokens(T_BEGIN, $tokens, T_END)); + AssignValue(inPreamble => $inpreamble); + if ($label) { + my $entry; + foreach my $item (@{ $$frontmatter{$tag} || [] }) { + my ($itag, $iattr, @stuff) = @$item; + if ($label eq ($$iattr{label} || '')) { + push(@$item, $datum); + return; } } } + elsif (my $list = $$frontmatter{$tag}) { + push(@{ $$list[-1] }, $datum); + return; } + push(@{ $$frontmatter{$tag} }, [$tag, ($label ? { label => $label } : undef), $datum]); + return; }, + beforeDigest => sub { + $_[0]->bgroup; }, + afterDigest => sub { + $_[0]->egroup; }); + +# This is called by afterOpen (by default on ) to +# output any frontmatter that was accumulated. + +my @frontmatter_elements = (qw(ltx:title ltx:toctitle ltx:subtitle + ltx:creator ltx:date + ltx:abstract ltx:keywords ltx:classification ltx:acknowledgements)); +my %frontmatter_elements = map { ($_ => 1) } @frontmatter_elements; + +# Insert FrontMatter into document, if not already added +sub insertFrontMatter { + my ($document) = @_; + return if LookupValue('frontmatter_done'); + my $frontmatter = LookupValue('frontmatter'); + my @set_keys = $frontmatter ? (keys %$frontmatter) : (); + # if doc ONLY has abstract as frontmatter, defer until abstract's document location + if ((scalar(@set_keys) == 1) && ($set_keys[0] eq 'ltx:abstract') && + !LookupValue('frontmatter_deferred')) { + AssignValue(frontmatter_deferred => 1, 'global'); + return; } + AssignValue(frontmatter_done => 1, 'global'); # OK, we're placing FrontMatter here, now. + foreach my $key (@frontmatter_elements, grep { !$frontmatter_elements{$_} } @set_keys) { + if (my $list = $$frontmatter{$key}) { + # Dubious, but assures that frontmatter appears in text mode... + local $LaTeXML::BOX = Box('', $STATE->lookupValue('font'), '', T_SPACE); + foreach my $item (@$list) { + my ($tag, $attr, @stuff) = @$item; + # add a dedicated class for frontmatter notes, + # in the case we want to style those uniformly. + if ($tag eq 'ltx:note') { + $attr ||= {}; + $$attr{class} = ($$attr{class} ? $$attr{class} . ' ' : '') . 'ltx_note_frontmatter'; } + $document->openElement($tag, ($attr ? %$attr : ()), + (scalar(@stuff) && $document->canHaveAttribute($tag, 'font') + ? (font => $stuff[0]->getFont, _force_font => 'true') : ())); + map { $document->absorb($_) } @stuff; + my $completed_node = $document->closeElement($tag); + # At this time, the frontmatter element should really carry the actual literal values intended. + # Thus, if we see an empty element, something went wrong -- including our bindings are too verbose, + # as e.g. \preprint{} always generates a ltx:note element. + # + # To solve this in a single location: prune here! + if (($tag ne "ltx:rdf") && !scalar($completed_node->childNodes)) { + $document->removeNode($completed_node); } } } } + return; } + +# Add FrontMatter at document begin, unless deferred to a better position. +Tag('ltx:document', 'afterOpen:late' => sub { + insertFrontMatter($_[0]) unless LookupValue('frontmatter_deferred'); }); +# Request Frontmatter to appear HERE (if not already done), +# deferring it from document begin. +DefConstructor('\lx@frontmatterhere', sub { insertFrontMatter($_[0]); }, + afterDigest => sub { AssignValue(frontmatter_deferred => 1, 'global'); }); + +# Maintain a list of classes that apply to the document root. +# This might involve global style options, like leqno. +Tag('ltx:document', 'afterOpen:late' => sub { + my ($document, $root) = @_; + if (my $classes = join(' ', LookupMappingKeys('DOCUMENT_CLASSES'))) { + $document->addClass($root, $classes); } }); + +#====================================================================== +# Tags & Titles +# The reference numbers, titles, captions etc, for various objects have +# different styling conventions, and the styling various depending on context. +# We'll use ltx:tags as a container for the various forms of ltx:tag with different @role's. +# The role=refnum form is simply formatted by \the and used by \ref; +# An ltx:tag w/o @role are for the numbers, often formatted differently, which +# appear alongside the object; Such a tag also may be embedded within the title or caption. +# Cross-references automatically generated by LaTeXML benefit from a bit more context: +# these are the role=typerefnum forms. +# Additional forms are needed for bibliographies, hyperref's autoref, etc. +# An additional complication is that while the "type" determines the formatting +# of the various forms, some types (eg. theorems) share the same counter. +# LaTeX defines this handling on an adhoc basis; defines \fnum@table, \fnum@figure for some types +# but \labelenumi, etc for others. + +# This section synthesizes a more uniform support for reference numbers, +# references to reference numbers, title formatting etc. +# It allows you to customize each of the forms for each type encountered. +# The design reflects LaTeX needs, more than TeX, but support starts here! + +# This collects up the various declared ltx:tag's into an ltx:tags +DefMacro('\lx@make@tags {}', sub { + my ($gullet, $type) = @_; + my @tags = (); + my $formatters = LookupValue('type_tag_formatter'); + foreach my $role (sort keys %{$formatters}) { + my $formatter = $$formatters{$role}; + push(@tags, Invocation(T_CS('\lx@tag@intags'), T_OTHER($role), + Invocation($formatter, $type))); } + return (T_CS('\lx@tags'), T_BEGIN, @tags, T_END); }); + +# Remove the last closed node, if it's empty. +sub removeEmptyElement { + my ($document, $whatsit) = @_; + my $node = $document->getNode->lastChild; # This should be the wrapper just added. + if (!$node->childNodes) { + $document->removeNode($node); } + return; } + +# \lx@tag[open][close]{stuff} +DefConstructor('\lx@tag[][][]{}', + "#4", + bounded => 1, mode => 'text', + afterConstruct => \&removeEmptyElement); + +# \lx@tag@intags{role}{stuff} +DefConstructor('\lx@tag@intags[]{}', + "#2", + bounded => 1, mode => 'text', + beforeDigest => sub { reenterTextMode(); neutralizeFont() }, + afterConstruct => \&removeEmptyElement); + +DefConstructor('\lx@tags{}', + "#1", + afterConstruct => \&removeEmptyElement); + +#---------------------------------------------------------------------- +# "refnum" is the lowest level reference number for an object is typically \the +# but be sure to use the right counter! This is how \ref will show the number. +# You'll typically customize this by defining \the (and \p@ '\lx@therefnum@@'); + +#---------------------------------------------------------------------- +# \lx@fnum@@{type} Gets the formatted form of the refnum, as part of the object, (no @role). +# Customize by defining \fnum@ or \name and \fnum@font@ +# Default uses \fnum@font@ \name prefix + space (if any) and \the. +# When using the "name", uses \name in preference to fallback \lx@name@ +DefMacro('\lx@refnum@compose{}{}', '\expandafter\lx@refnum@compose@\expandafter{#2}{#1}'); +DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2\space#1\fi'); +####DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2~#1\fi'); + +DefMacro('\lx@fnum@@{}', + '{\normalfont\@ifundefined{fnum@font@#1}{}{\csname fnum@font@#1\endcsname}' + . '\@ifundefined{fnum@#1}{\lx@@fnum@@{#1}}{\csname fnum@#1\endcsname}}'); + +# Really seems like name should take precedence over \lx@name@, +# since users might define it. +# BUT amsthm defines \thmname{}! +DefMacro('\lx@@fnum@@ {}', + '\@ifundefined{lx@name@#1}{' + . '\@ifundefined{#1name}{' + . '\lx@the@@{#1}' + . '}{' + . '\lx@refnum@compose{\csname #1name\endcsname}{\lx@the@@{#1}}' + . '}}{' + . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\lx@the@@{#1}}' + . '}'); + +AssignMapping('type_tag_formatter', '' => '\lx@fnum@@'); # Default! + +#---------------------------------------------------------------------- +# \lx@fnum@toc@{type} is similar, but formats the number for use within \toctitle +# Customize by defining \fnum@toc@ or \fnum@tocfont@ +# Default uses just \the, else composes using \lx@@fnum@@{type} +DefMacro('\lx@fnum@toc@@{}', + '{\normalfont\@ifundefined{fnum@tocfont@#1}{}{\csname fnum@tocfont@#1\endcsname}' + . '\@ifundefined{fnum@toc@#1}{\lx@the@@{#1}}{\csname fnum@toc@#1\endcsname}}'); + +#---------------------------------------------------------------------- +# "typerefnum" form is used by automatic cross-references, typically "type number" or similar. +# Customize by defining \typerefnum@ or \typerefnum@font@ +# Default uses either \typerefname or \name (if any, followed by space, then \the +DefMacro('\lx@typerefnum@@{}', + '{\normalfont\@ifundefined{typerefnum@font@#1}{}{\csname typerefnum@font@#1\endcsname}' + . '\@ifundefined{typerefnum@#1}{\lx@@typerefnum@@{#1}}{\csname typerefnum@#1\endcsname}}'); + +DefMacro('\lx@@typerefnum@@{}', + '\@ifundefined{#1typerefname}{' + . '\@ifundefined{lx@name@#1}{' + . '\@ifundefined{#1name}{' + . '}{' + . '\lx@refnum@compose{\csname #1name\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' + . '}}{' + . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' + . '}}{' + . '\lx@refnum@compose{\csname #1typerefname\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' + . '}'); + +AssignMapping('type_tag_formatter', 'typerefnum' => '\lx@typerefnum@@'); + +#---------------------------------------------------------------------- +# The following macros provide similar customization for titles & toctitles +# in particular for supporting localization for different languages. +# Redefine these if you want to assemble the name (eg. \chaptername), refnum and titles differently +#---------------------------------------------------------------------- +# \lx@format@title@@{type}{title} +# Format a title (or caption) appropriately for type. +# Customize by defining \format@title@type{title} +# Default composes \lx@fnum@@{type} space title. +DefMacro('\lx@format@title@@{}{}', + '\lx@@format@title@@{#1}' + . '{{\lx@format@title@font@@{#1}#2}}'); +DefMacro('\lx@@format@title@@{}{}', + '{\@ifundefined{format@title@#1}' + . '{\lx@@compose@title{\lx@fnum@@{#1}}{#2}}' + . '{\csname format@title@#1\endcsname{#2}}}'); + +# \lx@format@toctitle@@{type}{toctitle} +# Similar for toctitle, typically briefer +# Customize by defining \format@toctitle@type{title} +# Default composes \lx@fnum@toc@@{type} space title. +DefMacro('\lx@format@toctitle@@{}{}', + '\lx@@format@toctitle@@{#1}' + . '{{\lx@format@toctitle@font@@{#1}#2}}'); + +DefMacro('\lx@@format@toctitle@@{}{}', + '{\@ifundefined{format@toctitle@#1}' + . '{\lx@@compose@title{\lx@fnum@toc@@{#1}}{#2}}' + . '{\csname format@toctitle@#1\endcsname{#2}}}'); + +DefMacro('\lx@@compose@title{}{}', '\lx@tag[][ ]{#1}#2'); + +DefMacro('\lx@format@title@font@@{}', + '\@ifundefined{format@title@font@#1}{}{\csname format@title@font@#1\endcsname}'); +DefMacro('\lx@format@toctitle@font@@{}', + '\@ifundefined{format@toctitle@font@#1}{}{\csname format@toctitle@font@#1\endcsname}'); + +## NOTE that a 3rd form seems desirable: an concise form that cannot rely on context for the type. +## This would be useful for the titles in links; thus can be plain (unicode) text. + +#====================================================================== +# Normally definitions disappear; the macros are expanded or have their expected effect. +# But in a few cases (eg tabular column definitions, or LaTeX \Declarexxxx) +# they will need declarations in the (La)TeX preamble to allow (La)TeX to process snippets +# (eg. math) in order to create images. +# Returning a call to this utility from Primitives will add a preamble Processing Instruction +sub AddToPreamble { + my ($cs, @args) = @_; + return Digest(Invocation(T_CS('\lx@add@Preamble@PI'), Invocation((ref $cs ? $cs : T_CS($cs)), @args))); } + +DefConstructor('\lx@add@Preamble@PI Undigested', + ""); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/Base_XMath.pool.ltxml b/lib/LaTeXML/Engine/Base_XMath.pool.ltxml new file mode 100644 index 000000000..5d833ff9f --- /dev/null +++ b/lib/LaTeXML/Engine/Base_XMath.pool.ltxml @@ -0,0 +1,929 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | Base_XMath | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# LaTeXML Enhancemens to Math Representation to preserve Semantics +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Some of this stuff is more semantic versions of declarations in +# plain or latex. Is this the right place for them? + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Normally, the content branch contains the pure structure and meaning of a construct, +# and the presentation is generated from lower level TeX macros that only concern +# themselves with how to display the object. +# Nevertheless, it is sometimes useful to know where the tokens in the presentation branch +# came from; particularly what their presumed "meaning" is. +# For example, when search-indexing pmml, or providing links to definitions from the pmml. +# +# The following constructor (see how it's used in DefMath), adds meaning attributes +# whereever it seems sensible on the presentation branch, after it has been generated. +DefConstructor('\@ASSERT@MEANING{}{}', '#2', + reversion => '#2', + afterConstruct => sub { + my ($document, $whatsit) = @_; + my $node = $document->getNode; # This should be the wrapper just added. + my $meaning = ToString($whatsit->getArg(1)); + addMeaningRec($document, $node, $meaning); + $node; }); + +sub addMeaningRec { + my ($document, $node, $meaning) = @_; + if ($node->nodeType == XML_ELEMENT_NODE) { + my $qname = $document->getModel->getNodeQName($node); + if ($qname eq 'ltx:XMArg') { } # DONT cross through into arguments! + elsif ($qname eq 'ltx:XMTok') { + if ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN') + && !$node->getAttribute('meaning')) { + $document->setAttribute($node, meaning => $meaning); } } + else { + foreach my $c ($node->childNodes) { + addMeaningRec($document, $c, $meaning); } } } + return; } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Support for constructing mathematical expressions + +# Common XMath pattern for assigning attributes from Whatsit properties. +our $XMath_attributes = + " role='#role' name='#name' meaning='#meaning' omcd='#omcd'" + . " width='#width' height='#height' xoffset='#xoffset' yoffset='#yoffset'" + . " lpadding='#lpadding' rpadding='#rpadding'"; + +sub XMath_copy_keyvals { + my ($stomach, $whatsit) = @_; + my $kv = $whatsit->getArg(1); + $whatsit->setProperties($kv->getPairs) if $kv; + return; } + +# Build an ltx:XMApp, application of function/operator to arguments +# first piece of (TeX) argument is expected to be the operator +# Usually used on content side, but at least the arguments should be properly encapsulated: +# They should build individual subtrees; use ltx::XMArg, ltx:XMWrap ... if needed +DefConstructor('\lx@apply OptionalKeyVals:XMath {}{}', + "#2#3", + reversion => '#2#3', + afterDigest => sub { XMath_copy_keyvals(@_); }); + +# Build an ltx:XMTok, a mathematical symbol, with given attributes +# the argument should create text to be the content of the token. +DefConstructor('\lx@symbol OptionalKeyVals:XMath {}', + "#2", + reversion => '#2', + afterDigest => sub { + $_[1]->setFont($_[1]->getArg(2)->getFont); + XMath_copy_keyvals(@_); }); + +# Wrap the contents in an ltx:XMWrap, to stand as a single subtree & providing attributes +# The ltx:XMWrap may be collapsed, later, by parsing +DefConstructor('\lx@wrap OptionalKeyVals:XMath {}', + "#2", + reversion => '#2', + afterDigest => sub { XMath_copy_keyvals(@_); }); + +# Convert a hashref into a list of tokens of the form key=value,... +sub I_keyvals { + my ($keyvals) = @_; + my @options = (); + if ($keyvals) { + while (my ($key, $value) = each %$keyvals) { + $value = TokenizeInternal($value) if defined $value && !ref $value; + push(@options, T_OTHER(',')) if @options; + push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); } } + return (@options ? Tokens(T_OTHER('['), @options, T_OTHER(']')) : ()); } + +sub I_apply { + my ($kv, $op, @args) = @_; + return Tokens(T_CS('\lx@apply'), I_keyvals($kv), + T_BEGIN, T_CS('\lx@wrap'), T_BEGIN, $op, T_END, T_END, + T_BEGIN, (map { (T_CS('\lx@wrap'), T_BEGIN, $_, T_END); } @args), T_END); } + +sub I_symbol { + my ($kv, $text) = @_; + return Tokens(T_CS('\lx@symbol'), I_keyvals($kv), T_BEGIN, (defined $text ? $text : ()), T_END); } + +sub I_wrap { + my ($kv, @stuff) = @_; + return Tokens(T_CS('\lx@wrap'), I_keyvals($kv), T_BEGIN, @stuff, T_END); } + +# These two accept key operator_meaning, operator_omcd to give a meaning to the sub/superscript +# NOTE (BUG): We SHOULD nest paired sub/superscripts, but avoid conflicting double scripts +# To do that we need to sniff at the base, whether it already contains scripts. +# However, IsScript isn't quite sufficient if the scripts are hidden within Whatsits, duals, etc. +# Currently, LaTeXML manages to deal with the double scripts anyway; +# The reversion ALWAYS wraps the base (which will render non-optimally in images but avoid Errors) +DefConstructor('\lx@superscript OptionalKeyVals:XMath {} InScriptStyle', + "" + . "" + . "#2" + . "#3" + . "", + afterDigest => sub { XMath_copy_keyvals(@_); }, + reversion => sub { + my ($whatsit, $kv, $base, $sup) = @_; + my $bump = $whatsit->getProperty('bump'); + $bump = 1; # For now: ALWAYS {} wrap base in the reversion! + (IsEmpty($sup) + ? Revert($base) + : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUPER, revertScript($sup))); }, + properties => sub { + my ($stomach, $kv, $base, $script) = @_; + my $basetype = IsScript($base); + my $bump = ($basetype && ($$basetype[1] eq 'SUPERSCRIPT') ? 1 : 0); + (scriptpos => "post" . ($_[0]->getScriptLevel + $bump), + bump => $bump); }, + sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUPERSCRIPT', 'post'); }); + +DefConstructor('\lx@subscript OptionalKeyVals:XMath {} InScriptStyle', + "" + . "" + . "#2" + . "#3" + . "", + afterDigest => sub { XMath_copy_keyvals(@_); }, + reversion => sub { + my ($whatsit, $kv, $base, $sub) = @_; + my $bump = $whatsit->getProperty('bump'); + $bump = 1; # For now: ALWAYS {} wrap base in the reversion! + (IsEmpty($sub) + ? Revert($base) + : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUB, revertScript($sub))); }, + properties => sub { + my ($stomach, $kv, $base, $script) = @_; + my $basetype = IsScript($base); + my $bump = ($basetype && ($$basetype[1] eq 'SUBSCRIPT') ? 1 : 0); + (scriptpos => "post" . ($_[0]->getScriptLevel + $bump), + bump => $bump); }, + sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUBSCRIPT', 'post'); }); + +# Ignore $kv for the moment????? +sub I_subscript { + my ($kv, $base, $script) = @_; + return Tokens(T_CS('\lx@subscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); } + +sub I_superscript { + my ($kv, $base, $script) = @_; + return Tokens(T_CS('\lx@superscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); } + +# Superscript meaning power +DefMacro('\lx@power{}{}', '\lx@superscript[operator_meaning=power]{#1}{#2}'); +# Superscript meaning functional (or applicative) power; iterated function/operator application +DefMacro('\lx@functionalpower{}{}', '\lx@superscript[operator_meaning=functional-power]{#1}{#2}'); + +# These to be used in presentation side +DefMathI('\lx@ApplyFunction', undef, "\x{2061}", reversion => '', name => '', role => 'APPLYOP'); +DefMathI('\lx@InvisibleTimes', undef, "\x{2062}", reversion => '', name => '', meaning => 'times', role => 'MULOP'); +DefMathI('\lx@InvisibleComma', undef, "\x{2063}", reversion => '', name => '', role => 'PUNCT'); +DefMathI('\lx@InvisiblePlus', undef, "\x{2064}", reversion => '', name => '', meaning => 'plus', role => 'ADDOP'); + +DefConstructor('\lx@kludged{}', + "?#isMath(#1)(#1)", + reversion => '#1'); +DefConstructor('\lx@padded[MuDimension]{MuDimension}{}', + '#3', + afterConstruct => sub { + my ($document, $whatsit) = @_; + my $node = $document->getLastChildElement($document->getNode); + if ($document->getNodeQName($node) eq 'ltx:XMDual') { + my (@ch) = $node->childNodes; + $node = $ch[1]; } + if (my $lpadding = $whatsit->getArg(1)) { + $document->setAttribute($node, lpadding => $lpadding); } + if (my $rpadding = $whatsit->getArg(2)) { + $document->setAttribute($node, rpadding => $rpadding); } }, + reversion => '#3'); + +#====================================================================== +# Building XMDuals for Mathematical Parallel markup +# Used when the content and presentation forms have different structure. + +DefKeyVal('XMath', 'reversion', 'UndigestedDefKey'); +DefKeyVal('XMath', 'content_reversion', 'UndigestedDefKey'); +DefKeyVal('XMath', 'presentation_reversion', 'UndigestedDefKey'); +DefConstructor('\lx@dual OptionalKeyVals:XMath {}{}', + "#2#3", + beforeDigest => sub { + PushValue(PENDING_DUAL_XMARGS => {}); + return; }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $kv = $whatsit->getArg(1); + my $xmargs = PopValue('PENDING_DUAL_XMARGS'); # Really SHOULD be a hash + $whatsit->setProperties(%$xmargs) if $xmargs; # Hopefully no name class with XM + $whatsit->setProperties($kv->getPairs) if $kv; + my %props = $whatsit->getProperties; + my $cr = $props{content_reversion}; + my $pr = $props{presentation_reversion}; + my $r = ToString($props{revert_as}) || 'content'; # ????? + + if (!defined $props{reversion}) { + $whatsit->setProperty(reversion => sub { + my ($self, $kvs, $c, $p) = @_; + ($r eq 'content' ? $cr || Revert($c) + : ($r eq 'presentation' ? $pr || Revert($p) + : ($r eq 'dual' + ? Tokens(T_CS('\lx@dual'), I_keyvals($kvs), + T_BEGIN, ($cr || Revert($c)), T_END, + T_BEGIN, ($pr || Revert($p)), T_END) + : (($LaTeXML::DUAL_BRANCH || '') eq 'presentation' # Context dependent reversion + ? $pr || Revert($p) + : $cr || Revert($c))))); }); } + return; }, + sizer => '#3'); # size according to presentation + +# These are used within XMDual +# The XMDual represents both a content & presentation representation of some +# possibly exotic structure ("Transfix notation"), +# or just a somewhat complex presentation that corresponds (often) to a simpler +# applicative content structure. +# Invoking such a mathematical object to "arguments" requires that both the +# content & presentation branches contain those arguments. +# There will be an XMArg, with an ID, containing the actual markup, and an XMRef that referrs to it. +# The XMArg will usually be in the presentation branch (so that it inherits appropriate style), +# unless the arg is "hidden" (ie. semantic, but not displayed). +# This means that we don't know which one appears first! (See Package's dualize_arglist) +# +# To get a "proper id", we'll use a temporary label-like attribute (_xmkey) +# and establish an id and idref later. +DefConstructor('\lx@xmarg{}{}', "#2", + reversion => '#2', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my ($xmid, $arg) = $whatsit->getArgs(); + $xmid = ToString($xmid); + if (my $pending = LookupValue('PENDING_DUAL_XMARGS')) { + if (my $xmargs = $$pending[-1]) { + $$xmargs{$xmid} = $arg; } } + AssignValue('xref:' . ToString($_[1]->getArg(1)) => $_[1], 'global'); }); + +DefConstructor('\lx@xmref{}', "", + reversion => sub { + Revert(LookupValue('xref:' . ToString($_[1]))); }, + sizer => sub { LookupValue('xref:' . ToString($_[0]->getArg(1)))->getSize; }); + +# Connect up the XMRef/XMArg pairs (actually can be multiple XMRef's) +# We want to set the idref of the XMRef's to point to the id of the XMArg (or other XM element), +# but usually the XMRef is created first, and we want to let the referred to element +# get it's id computed by whatever means it prefers. +# so we have to work both ways (use state to record associations, to avoid expensive xpath) +# Set id's on any non-XMRef nodes that have an _xmkey +# This gets a more natural ordering +Tag('ltx:*', 'afterOpen:late' => sub { + my ($document, $node) = @_; + if (my $key = $node->getAttribute('_xmkey')) { + my $qname = $document->getNodeQName($node); + if (($qname ne 'ltx:XMRef') && ($qname =~ /^ltx:XM/) && !$node->hasAttribute('xml:id')) { + GenerateID($document, $node, undef, ''); } } }); + +Tag('ltx:XMDual', 'afterClose:late' => sub { + my ($document, $node) = @_; + my %ids = (); + my @refs = (); + # Collect all children with _xmkey attribute + foreach my $n ($document->findnodes('descendant::*[@_xmkey]', $node)) { + if (($document->getNodeQName($n) eq 'ltx:XMRef') && !$n->hasAttribute('idref')) { + push(@refs, $n); } # we'll fill these in next + else { # generate & record ids for all referenced noces + my $key = $n->getAttribute('_xmkey'); + if (!$ids{$key}) { + GenerateID($document, $n, undef, ''); # Generate id if none already. + $ids{$key} = $n->getAttribute('xml:id'); } } } + foreach my $r (@refs) { # Now fill in the references + $document->setAttribute($r, idref => $ids{ $r->getAttribute('_xmkey') }); + $r->removeAttribute('_xmkey'); } +}); + +# Construction aids +# Build an XMDual (via \lx@dual) given the content & presentation forms. +# These forms are provided as Tokens, invoking the appropriate constructor macros, +# and refering to any arguments using #1, #2.... (see T_XMArg for syntactic sugar) +# The arguments (if any) are given separately; within the content & presentation +# they are replaced by \lx@xmref and \lx@xmarg, appropriately, +# so that they will be linked/shared in the XML tree. +# The keyvals argument is a hash containing any properties of the construct, +# along with reversion, content_reversion & presentation_reversion, which are +# substituted for arguments as well. +sub I_dual { + my ($keyvals, $content, $presentation, @args) = @_; + $content = TokenizeInternal($content) if $content && !ref $content; + $presentation = TokenizeInternal($presentation) if $presentation && !ref $presentation; + my (@revargs, @pargs, @cargs); + foreach my $arg (@args) { + my $id = LaTeXML::Package::getXMArgID(); + push(@revargs, Tokens(I_arg(ToString($id)))); + push(@pargs, Invocation(T_CS('\lx@xmarg'), $id, $arg)); + push(@cargs, Invocation(T_CS('\lx@xmref'), $id)); } + my $optional = undef; + if ($keyvals) { + my @options = (); + while (my ($key, $value) = each %$keyvals) { + $value = TokenizeInternal($value) if $value && !ref $value; + if ($key =~ /^(?:presentation_|content_|)reversion$/) { + $value = $value->substituteParameters(@revargs); } + push(@options, T_OTHER(',')) if @options; + push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); } + $optional = Tokens(@options); } + return + Invocation(T_CS('\lx@dual'), $optional, + $content->substituteParameters(@cargs), + I_wrap({}, $presentation->substituteParameters(@pargs))); } + +# A little helper to shorten things up a bit; simply generates #1 (or whatever) +sub I_arg { # uncoditionally create an arg token + return bless ["$_[0]", CC_ARG], 'LaTeXML::Core::Token'; } + +sub I_xmarg { + my ($id, $arg) = @_; + return Tokens(T_CS('\lx@xmarg'), + T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END, T_BEGIN, $arg, T_END); } + +sub I_xmref { + my ($id) = @_; + return Tokens(T_CS('\lx@xmref'), T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END); } + +#====================================================================== +# We OUGHT to be able to do this using \llap,\rlap,\hss... +DefMacro('\lx@tweaked{}{}', '\ifmmode\lx@math@tweaked{#1}{#2}\else\lx@text@tweaked{#1}{#2}\fi'); +DefConstructor('\lx@math@tweaked RequiredKeyVals {}', + "#2", + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my ($kv, $body) = $whatsit->getArgs; + XMath_copy_keyvals($stomach, $whatsit); + $whatsit->setFont($body->getFont); + return; }, + reversion => '#2'); + +DefConstructor('\lx@text@tweaked RequiredKeyVals {}', + "#2", + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my ($kv, $body) = $whatsit->getArgs; + $whatsit->setProperties($kv->getPairs); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Support for rewrite rules +#********************************************************************** +DefConstructor('\WildCard[]', "<_WildCard_>#1"); +DefConstructorI('\WildCardA', undef, "<_WildCard_/>"); +DefConstructorI('\WildCardB', undef, "<_WildCard_/>"); +DefConstructorI('\WildCardC', undef, "<_WildCard_/>"); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Properties for plain characters. +# These are allowed in plain text, but need to act a bit special in math. +DefMathI('=', undef, '=', role => 'RELOP', meaning => 'equals'); +DefMathI('+', undef, '+', role => 'ADDOP', meaning => 'plus'); +DefMathI('-', undef, '-', role => 'ADDOP', meaning => 'minus'); +## Redefine, if we want Unicode minus +##DefMathI('-', undef, "\x{2212}", role => 'ADDOP', meaning => 'minus'); +DefMathI('*', undef, "\x{2217}", role => 'MULOP', meaning => 'times'); +DefMathI('/', undef, '/', role => 'MULOP', meaning => 'divide'); +DefMathI('!', undef, '!', role => 'POSTFIX', meaning => 'factorial'); +DefMathI(',', undef, ',', role => 'PUNCT'); +DefMathI('.', undef, '.', role => 'PERIOD'); +DefMathI(';', undef, ';', role => 'PUNCT'); +DefMathI('(', undef, '(', role => 'OPEN', stretchy => 'false'); +DefMathI(')', undef, ')', role => 'CLOSE', stretchy => 'false'); +DefMathI('[', undef, '[', role => 'OPEN', stretchy => 'false'); +DefMathI(']', undef, ']', role => 'CLOSE', stretchy => 'false'); +DefMathI('|', undef, '|', role => 'VERTBAR', stretchy => 'false'); +DefMathI(':', undef, ':', role => 'METARELOP', name => 'colon'); # Seems like good default role +DefMathI('<', undef, '<', role => 'RELOP', meaning => 'less-than'); +DefMathI('>', undef, '>', role => 'RELOP', meaning => 'greater-than'); + +# NOTE: Need to evolve Ligatures to be easier to write. +# rough draft of tool to make ligatures more sane to write... +# It is tempting to handle these with macros, +# But that tends to run afoul of tricky packages like babel that make : active as well! +# Even using mathactive doesn't help. +sub TestNode { + my ($node, $qname, $content, %attrib) = @_; + return $node + && ($LaTeXML::DOCUMENT->getModel->getNodeQName($node) eq $qname) + && ((!defined $content) || (($node->textContent || '') eq $content)) + && !grep { $node->getAttribute($_) ne $attrib{$_} } keys %attrib; } + +# Recognize !! +DefMathLigature("!!" => "!!", role => 'POSTFIX', meaning => 'double-factorial'); + +# Recognize := +DefMathLigature(":=" => ":=", role => 'RELOP', meaning => 'assign'); + +#====================================================================== +# Combine letters, when the fonts are right. (sorta related to mathcode) +# well, maybe a letter followed by letters & digits? +DefMathLigature(matcher => sub { my ($document, $node) = @_; + my @chars = (); + my $font = $document->getNodeFont($node); + if ($font->isSticky) { + my $n = 0; + my $string = ''; + my $s = ''; + while ($node + && ($document->getModel->getNodeQName($node) eq 'ltx:XMTok') + && ($document->getNodeFont($node)->equals($font)) + && (!$node->hasAttribute('name')) + && ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN') + || (($node->getAttribute('role') || 'UNKNOWN') eq 'NUMBER')) + && (($s = $node->textContent . $s) =~ /^[0-9a-zA-Z]+$/)) { + $string = $s; + do { $node = $node->previousSibling; $n++; + } while $node && ($node->nodeType == XML_COMMENT_NODE); } + (($string =~ /^[a-zA-Z]/) && ($n > 1) ? ($n, $string, role => 'UNKNOWN', meaning => undef) : undef); +} }); + +#====================================================================== +# Combine digits in math. + +foreach my $digit (qw(0 1 2 3 4 5 6 7 8 9)) { + DefMathI($digit, undef, $digit, role => 'NUMBER', meaning => $digit); } + +# Would probably be best to collapse all XMHint/spaces at the earliest stage. +our %space_chars = (negthinspace => '', thinspace => "\x{2009}", + medspace => "\x{2005}", thickspace => "\x{2004}", space => ' '); + +# This is getting out-of-hand; +# (1) this gets done after document build, so we query the document/node for language +# rather than using something specified during digestion (eg. macros, roles...) +# (2) the way we've specified the decimal & thousands separators (language dependent) +# is completely insufficient; should leverage numprint or babel or ... ? +# (3) the way we're detecting the chars is a mess: a mix of string content & role! +# If we could accommodate multiple roles, maybe a separate role could be set on the tokens +# (a period could be a PERIOD or a DECIMAL_SEPARATOR, eg) + +my %decimal_separator = (en => '.', de => ',', fr => ',', nl => ',', pt => ',', es => ','); +my %thousands_separator = (en => ',', de => '.', fr => '.', nl => '.', pt => '.', es => '.'); +DefMathLigature(matcher => sub { my ($document, $node) = @_; + my $lang = $document->getNodeLanguage($node); + $lang =~ s/-\w+$// if $lang; # strip off region code, if any. + my $dec = ($lang && $decimal_separator{$lang}) || '.'; + my $thou = ($lang && $thousands_separator{$lang}) || ','; + my $decrole = ($dec eq '.' ? 'PERIOD' : ''); + # my $skip = Dimension('5mu')->valueOf; + my @chars = (); + my ($n, $string, $number, $w, $font) = (0, '', '', 0, undef); + # NOTE: We're scanning chars from END! + while ($node) { + my $qn = $document->getModel->getNodeQName($node); + if ($qn =~ /^(ltx:XMTok|ltx:XMWrap)$/) { + my $r = ($node->getAttribute('role') || ''); + my $f = $document->getNodeFont($node); + my $text = $node->textContent; + if (($r eq 'NUMBER') && (!$font || ($f->equals($font)))) { # A number in same font? + $font = $f; + $string = $text . $string; + $number = $node->getAttribute('meaning') . $number; } + elsif (!$n) { # any following cases are not allowed as LAST char + last; } + # if thousands separator (but NOT simultaneously PUNCT!!!! Be paranoid about lists) + elsif (($text eq $thou) && ($r ne 'PUNCT')) { + $string = $text . $string; } # Add to string, but omit from number + # if decimal separator, turn it into "standard" "." + elsif (($text eq $dec) || ($r eq $decrole)) { # was $r eq 'PERIOD' + $string = $node->textContent . $string; + $number = '.' . $number; } + else { + last; } } + # OR if XMHint with 0 <= width <= thickmuskip (5mu == ?) + elsif ($qn eq 'ltx:XMHint') { + my $s; + if (($s = $node->getAttribute('name')) && ($s = $space_chars{$s})) { + $string = $s . $string; } + else { + last; } } + else { + last; } + do { $node = $node->previousSibling; $n++; + } while $node && ($node->nodeType == XML_COMMENT_NODE); } + if (($n > 1) && ($number =~ /\d/)) { + ($n, $string, meaning => $number, role => 'NUMBER'); } }); + +# This needs to be applied AFTER numbers have been resolved! +# If we have a non-negative integer (no signs, decimals,...) +# followed by a fraction dividing two non-negative integers, +# Figure it's a mixed fraction --- ADDING the fraction to the number, not multiplying! +DefRewrite(select => ['descendant-or-self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' + . '[ following-sibling::*[1][self::ltx:XMApp]' + . ' [child::*[1][self::ltx:XMTok[@meaning="divide"]]]' + . ' [child::*[2][' + . 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' + . 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' + . ']]' + . ' [child::*[3][' + . 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' + . 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' + . ']]' + . ']', + 2], + replace => sub { my ($document, $number, $frac) = @_; + my $box = $document->getNodeBox($number); + $document->openElement('ltx:XMApp', _box => $box); + $document->insertMathToken("\x{2064}", # Invisible Plus! + meaning => 'plus', role => "ADDOP", _box => $box); + $document->getNode->appendChild($number); + $document->getNode->appendChild($frac); + $document->closeElement('ltx:XMApp'); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Matrices; Generalized +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# The delimiters around a matrix may simply be notational, or for readability, +# and don't affect the "meaning" of the array structure as a matrix. +# In that case, we'll use an XMDual to indidate the content is simply the matrix, +# but the presentation includes the delimiters. +# HOWEVER, the delimeters may also signify an OPERATION on the matrix +# in which case the application & meaning of that operator must be supplied. + +# keys are +# name : the name of the environment (for reversion) +# datameaning: the (presumed) meaning of the array construct (typically 'matrix') +# delimitermeaning : the operator meaning due to delimiters (eg. norm)(as applied to the array) +# style : typically \displaystyle, \textstyle... +# left : TeX code for left of matrix +# right : TeX code for right +# ncolumns : the number of columns (default is not limited) +DefKeyVal('lx@GEN', 'style', 'UndigestedKey'); + +DefPrimitive('\lx@gen@matrix@bindings RequiredKeyVals:lx@GEN', sub { + my ($stomach, $kv) = @_; + $stomach->bgroup; + my $style = $kv->getValue('style') || T_CS('\textstyle'); + my $align = ToString($kv->getValue('alignment')) || 'c'; + # We really should be using ReadAlignmentTemplate (LaTeXML::Core::Alignment) + # but we'd have to convert it to a repeating spec somehow. + my @colspec = (before => Tokens(($align =~ /^(?:c|r)/ ? (T_CS('\hfil')) : ()), $style), + after => Tokens(($align =~ /^(?:c|l)/ ? (T_CS('\hfil')) : ()))); + my $ncols = ToString($kv->getValue('ncolumns')); + my %attributes = (); + foreach my $key (qw(rowsep)) { # Probably more? + if (my $value = $kv->getValue($key)) { + $attributes{$key} = $value; } } + alignmentBindings(LaTeXML::Core::Alignment::Template->new( + ($ncols ? (columns => [map { { @colspec } } 1 .. $ncols]) + : (repeated => [{@colspec}]))), + 'math', + (keys %attributes ? (attributes => {%attributes}) : ())); # }); + Let("\\\\", '\@alignment@newline'); + Let('\lx@intercol', '\lx@math@intercol'); + Let('\@row@before', '\@empty'); # Disable special row treatment (eg. numbering) unless requested + Let('\@row@after', '\@empty'); +}); + +DefPrimitive('\lx@end@gen@matrix', sub { $_[0]->egroup; }); + +DefMacro('\lx@gen@plain@matrix{}{}', + '\lx@gen@matrix@bindings{#1}' + . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\@finish@alignment}' + # . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\cr\@finish@alignment}' + . '\lx@end@gen@matrix'); + +# The delimiters on a matrix are presumably just for notation or readability (not an operator); +# the array data itself is the matrix. +DefConstructor('\lx@gen@plain@matrix@ RequiredKeyVals:lx@GEN {}', + "?#needXMDual(" + . "" + . "?#delimitermeaning()()" + . "?#datameaning()()" + . "" + . "?#delimitermeaning()()" + . "?#datameaning()()" + . "#left#2#right" + . "" + . ")(" + . "#2" + . ")", + properties => sub { %{ $_[1]->getKeyVals }; }, + reversion => sub { + my ($whatsit, $kv, $body) = @_; + my $name = ToString($kv->getValue('name')); + my $alignment = $whatsit->getProperty('alignment'); +## (T_CS('\\' . $name), T_BEGIN, Revert($body), T_END); }, +## (T_CS('\\' . $name), T_BEGIN, Revert($alignment), T_END); }, + (T_CS('\\' . $name), T_BEGIN, $alignment->revert, T_END); }, + + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $kv = $whatsit->getArg(1); + if ($kv->getValue('datameaning') || $kv->getValue('delimitermeaning')) { + $whatsit->setProperties( + needXMDual => 1, + xmkey => LaTeXML::Package::getXMArgID()); } + $whatsit->setProperties(alignment => LookupValue('Alignment')); + return; }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Cases: Generalized +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# keys are +# name : the name of the command (for reversion) +# meaning: the (presumed) meaning of the construct +# style : \textstyle or \displaystyle +# conditionmode : mode of 2nd column, text or math +# left : TeX code for left of cases +# right : TeX code for right + +DefConstructorI('\lx@cases@condition', undef, + "#body", + alias => '', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); +DefConstructorI('\lx@cases@end@condition', undef, "", alias => '', + beforeDigest => sub { $_[0]->endMode('text'); }); + +DefPrimitive('\lx@gen@cases@bindings RequiredKeyVals:lx@GEN', sub { + my ($stomach, $kv) = @_; + $stomach->bgroup; + my $style = $kv->getValue('style') || T_CS('\textstyle'); + $style = T_CS($style) unless ref $style; + my @mode = (ToString($kv->getValue('conditionmode')) eq 'text' + ? (T_MATH) : ()); + my $condtext = ToString($kv->getValue('conditionmode')) eq 'text'; + alignmentBindings(LaTeXML::Core::Alignment::Template->new( + columns => [ + { before => Tokens($style), after => Tokens(T_CS('\hfil')) }, + { before => Tokens($style, + ($condtext ? (T_CS('\lx@cases@condition')) : ())), + after => Tokens(T_CS('\lx@column@trimright'), + ($condtext ? (T_CS('\lx@cases@end@condition')) : ()), + T_CS('\hfil')) }]), + 'math'); + Let("\\\\", '\@alignment@newline'); + Let('\lx@intercol', '\lx@math@intercol'); + DefMacro('\@row@before', ''); # Don't inherit counter stepping from containing environments + DefMacro('\@row@after', ''); +}); + +DefMacro('\lx@gen@plain@cases{}{}', + '\lx@gen@cases@bindings{#1}' + . '\lx@gen@plain@cases@{#1}{\@start@alignment#2\@finish@alignment}' + . '\lx@end@gen@cases'); +DefPrimitive('\lx@end@gen@cases', sub { $_[0]->egroup; }); + +# The logical structure for cases extracts the columns of the alignment +# to give alternating value,condition (an empty condition is replaced by "otherwise" !?!?!) +DefConstructor('\lx@gen@plain@cases@ RequiredKeyVals:lx@GEN {}', + '#left#2#right', + properties => sub { %{ $_[1]->getKeyVals }; }, + afterConstruct => sub { + my ($document) = @_; + if (my $point = $document->getElement->lastChild) { + # Get the sequence of alternating (case, condition). + # Expecting ltx:XMArray/ltx:XMRow/ltx:XMCell [should have /ltx:XMArg, but could be empty!!!] + my @cells = $document->findnodes('ltx:XMArray/ltx:XMRow/ltx:XMCell', $point); + my @stuff = map { ($_->hasChildNodes ? createXMRefs($document, element_nodes($_)) + : ['ltx:XMText', {}, 'otherwise']) } @cells; + $document->replaceTree(['ltx:XMDual', {}, + ['ltx:XMApp', {}, ['ltx:XMTok', { meaning => 'cases' }], @stuff], + $point], + $point); } }, + reversion => sub { + my ($whatsit, $kv, $body) = @_; + my $name = $kv->getValue('name'); + (T_CS('\cases'), T_BEGIN, Revert($body), T_END); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Support for MathFork. +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# [Note: this block of code seems like it belongs somewhere else] +# A MathFork supports document-level alignment of math, +# by collecting equations into an equationgroup. Each equation can contain +# one or more MathFork structures which separate the semantically meaningful +# equation (if possible) from the collection of rows and/or column fragments +# for alignment. The goal is to be able to present the aligned structure +# composed of various mathematical fragments in a grid, and yet still represent +# the (presumably) meaningful complete formula. +# +# The structure looks like +# +# ... +# .. +# +# The initial, "main", Math will contain a complete formula (hopefully). +# The MathBranch will typically contain one or more , each of which +# contains one or more , each of which contains a representing +# a cell of the aligned structure. + +#====================================================================== +# openMathFork($document,$equation) will add a MathFork structure +# to the given $equation, and return ($mainfork, $branch) +# where $mainfork is the initial and $branch is the . +# You'll probably want to be adding Stuff to one or both of $mainfork & $branch. +# Most typically, you'll be finding math fragments that you've found in the +# current content of $equation and adding them into both $mainfork & $branch +# using addColumnToMathFork. +sub openMathFork { + my ($document, $equation) = @_; + my $fork = $document->openElementAt($equation, 'ltx:MathFork'); + my $mainfork = $document->openElementAt($fork, 'ltx:Math', _box => MathWhatsit()); # Start EMPTY! + my $xmath = $document->openElementAt($mainfork, 'ltx:XMath'); + my $branch = $document->openElementAt($fork, 'ltx:MathBranch'); + return ($mainfork, $branch); } + +# Close the appropriate elements of an ltx:MathFork created with openMathFork. +sub closeMathFork { + my ($document, $equation, $mainfork, $branch) = @_; + # Now, close them all. + $document->closeElementAt($branch); + $document->closeElementAt($document->getFirstChildElement($mainfork)); + $document->closeElementAt($mainfork); + # $document->closeElementAt($mainfork->parentNode); } + # More defensive? Sometimes we end up with a DocumentFragment as parent of $mainfork????!?!?!?! + my @mfs = $document->findnodes('ltx:MathFork', $equation); + $document->closeElementAt($mfs[-1]); + my $fork = $branch->parentNode; + my @branches = $fork->childNodes; + if (scalar(@branches) == 1) { # Whoops, came up empty! + $fork->unbindNode; } + return; } + +# Create an inline math Whatsit from a list of math Boxes, Lists or Whatsits. +# Note that we unwrap @hidden@bgroup's (!) and normalize \displaystyle (!) +# This is primarily useful for synthesizing the Box for a newly created ltx:Math +# that is synthesized from other math content within a ltx:MathFork. +sub MathWhatsit { + my (@items) = @_; + my $hbgd = LookupDefinition(T_CS('\@hidden@bgroup')); + @items = map { ((ref $_ eq 'LaTeXML::Core::Whatsit') && ($_->getDefinition eq $hbgd) + ? $_->getBody->unlist : ($_)) } + map { $_->unlist } grep { $_ } @items; + my $locator = undef; + foreach my $i (@items) { + last if $locator; + $locator = $i->getLocator; } + my @styles = grep { UnTeX($_) eq '\displaystyle' } @items; + if (@styles) { + @items = ($styles[0], grep { UnTeX($_) ne '\displaystyle' } @items); } + return LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@BEGININLINEMATH')), [], + body => List(@items, mode => 'math'), + trailer => T_CS('\@@ENDINLINEMATH'), + locator => $locator, isMath => 1); } + +#====================================================================== +# Add a new table column (ltx:td) into the ltx:MathBranch of a ltx:MathFork. +# The insertion point will be at $inbranch, presumably an ltx:tr +# [created in the ltx:MathBranch using: $document->openElementAt($branch,'ltx:tr'); ] +# The content of $cell (an ltx:_Capture_) is typically a single ltx:Math. +# (but occasionally mixed math & ltx:text; some cases may need more semantic analysis?) +# The content of $cell will be MOVED into the new column (ltx:td), (w/ ID's intact) +# and CLONED (w/modified ID's) onto the end of the first child of the main branch, $mainfork, +# of the ltx:MathFork [A Math Whatsit is also synthesized for the main branch, for TeX, etc!). +# Thus, the collection of rows/columns fragments represents the alignment, +# while the main branch synthesizes the (presumed) semantic whole. +# The now-empty $cell is then removed from its parent & the document. +sub addColumnToMathFork { + my ($document, $mainfork, $inbranch, $cell) = @_; + my $td = $document->openElementAt($inbranch, 'ltx:td'); + if (my $align = $cell->getAttribute('align')) { + $document->setAttribute($td, align => $align); } + if (my $colspan = $cell->getAttribute('colspan')) { + $document->setAttribute($td, colspan => $colspan); } + # Remove the _Capture_ from the document; parts will get cloned &/or reinserted + $cell->unbindNode; + # Usually, we will have captured a single ltx:Math node, but occasionally text? + # But in perverse cases, might have NOTHING! + foreach my $node ($cell->childNodes) { + # Add a Clone of the cell's contents to the main branch (This will get modified id's) + local $LaTeXML::Core::Document::ID_SUFFIX = '.mf'; + # Usually, an ltx:Math element will be the complete content of the _Capture_ (cell) + my $type = $document->getNodeQName($node); + my $box; + if ($type eq 'ltx:Math') { + if (my $xmath = $document->getFirstChildElement($node)) { + # But we CLONE the contents of it's ltx:XMath onto the end of + # the mainfork ltx:Math/ltx:XMath (under $mainfork), modifying id's along the way. + $document->appendClone($document->getFirstChildElement($mainfork), + $document->getChildElements($xmath)); + # Add the boxes from this cell to the previously collected ones in the main branch. + $box = $document->getNodeBox($node)->getBody; } } + # The next two cases are unusual (slightly unexpected?), + # typically coming from abused eqnarrays? May need more analysis preceding the MathFork'ing! + elsif (($type eq 'ltx:text') || ($type eq 'ltx:p')) { + next if $node->textContent eq ''; + my $txt = $document->openElementAt($document->getFirstChildElement($mainfork), 'ltx:XMText'); + $document->appendClone($txt, $node); + $document->closeElementAt($txt); + $box = $document->getNodeBox($node); } + elsif ($type eq '#PCDATA') { + my $string = $node->textContent; + # next if $string eq ''; + next if $string =~ /^\s*$/; + my $txt = $document->openElementAt($document->getFirstChildElement($mainfork), 'ltx:XMText'); + $txt->appendText($string); + $document->closeElementAt($txt); + $box = Box($string); } + elsif ($type eq '#Comment') { } + else { + Warn('unexpected', $type, $cell, + "Don't know how to synthesize equation with $type in column"); } + # Add the boxes from this cell to the previously collected ones in the main branch. + if ($box) { + my $composed = MathWhatsit($document->getNodeBox($mainfork)->getBody, $box); + $document->setNodeBox($mainfork, $composed); + $document->setNodeBox($mainfork->firstChild, $composed); } # And also to the XMath element! + # Finally MOVE (really copy) the node from the _Capture_ ($cell) to the td (in the fork) + # this keeps the same IDs as original; (& appendTree may remove id's from $node!) + $document->unRecordNodeIDs($node); + $document->appendTree($td, $node); } + # We can now remove the _Capture_ (and anything still in it?) + # $cell->unbindNode; + $document->closeElementAt($td); + return; } + +#====================================================================== +# Higher level support for equationgroups +# equationgroups hold a collection of equations +# each of which will likely have MathFork within that separates +# the complete semantic expression from a collection of rows & column cells. +# The latter are used to present an aligned set of equations; +# the former hopefully will be useful for the math....? +# +# Typically, there will be some sort of alignment macros, using & +# that will be set up to INITIALLY build an arrangement like: +# +# <_Capture>cellmath... +# that is, an for each row. +# Afterwards, we can analyze the cells and determine how the cells and/or rows +# will be divided up into "real" equations, and insert some MathFork's to reflect. + +# For example, the represents a whole eqnarray, +# and (initially, at least) the rows are represented as 's. +# Some analysis hopefully allows us to recognize + +# Given an ltx:equationgroup containing several ltx:equations (representing rows), +# equationgroupJoinRows combines one or more of those rows into a +# semantically meaningful equation and sets up the appropriate MathForks within. +# This is typically useful for eqnarray, after you have analyzed +# which subsequences of ltx:equations actually correspond to single semantic equations. +sub equationgroupJoinRows { + my ($document, $equationgroup, @equations) = @_; + # Make a new equation, with a single MathFork container + my $equation = $document->openElementAt($equationgroup, 'ltx:equation'); + $equationgroup->insertBefore($equation, $equations[0]); # Move to correct position. + # move labels, id, refnum to new equation + my ($labels, $id, $idctr, $idctrm, $tags); + foreach my $eq (@equations) { + if (my $l = $eq->getAttribute('labels')) { + $labels = ($labels ? "$labels $l" : $l); } + $id = $eq->getAttribute('xml:id') if $eq->hasAttribute('xml:id'); + $eq->removeAttribute('xml:id') if $id; + $tags = $document->findnode('ltx:tags', $eq); + # Annoying bookkeeping (should be more built in?) + $idctr = $eq->getAttribute('_ID_counter_') if $eq->hasAttribute('_ID_counter_'); + $idctrm = $eq->getAttribute('_ID_counter_m_') if $eq->hasAttribute('_ID_counter_m_'); } + $document->unRecordID($id) if $id; + $document->setAttribute($equation, labels => $labels) if $labels; + $document->setAttribute($equation, 'xml:id' => $id) if $id; + $document->setAttribute($equation, '_ID_counter_' => $idctr) if $idctr; + $document->setAttribute($equation, '_ID_counter_m_' => $idctrm) if $idctrm; + $equation->appendChild($tags) if $tags; + + # Scan equations to see which ones likely are continuations of previous + my ($mainfork, $branch) = openMathFork($document, $equation); + foreach my $eq (@equations) { + # remove equation; parts will be added in by adding to mathfork (hopefully taking care of ids) + $eq->unbindNode; + my $tr = $document->openElementAt($branch, 'ltx:tr'); + my @cells = $document->findnodes('ltx:_Capture_', $eq); + $document->setAttribute($tr, class => 'ltx_eqn_lefteqn') + if ($cells[0]->getAttribute('class') || '') =~ /\blefteqn\b/; + foreach my $cell (@cells) { + addColumnToMathFork($document, $mainfork, $tr, $cell); } + $document->closeElementAt($tr); } + closeMathFork($document, $equation, $mainfork, $branch); + $document->closeElementAt($equation); + return; } + +# Given an equation generated in an equationgroup, +# collect each $ncols columns into a MathFork structure, +# with the formatted portion being the columns. +# This is typically useful for AMS's align structures, +# which contain several columns, each pair of which represent a semantic equation. +sub equationgroupJoinCols { + my ($document, $ncols, $equation) = @_; + my ($col, $mainfork, $branch) = (0, undef, undef); + foreach my $cell ($document->findnodes('ltx:_Capture_', $equation)) { + next unless $document->getNodeQName($cell) =~ /(.*?:)?_Capture_$/; + if (($col++ % $ncols) == 0) { # Create new MathFork every $ncols cells. + closeMathFork($document, $equation, $mainfork, $branch) if $mainfork; + ($mainfork, $branch) = openMathFork($document, $equation); } + addColumnToMathFork($document, $mainfork, $branch, $cell); } + closeMathFork($document, $equation, $mainfork, $branch) if $mainfork; + return; } + +#********************************************************************** + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; + diff --git a/lib/LaTeXML/Package/BibTeX.pool.ltxml b/lib/LaTeXML/Engine/BibTeX.pool.ltxml similarity index 100% rename from lib/LaTeXML/Package/BibTeX.pool.ltxml rename to lib/LaTeXML/Engine/BibTeX.pool.ltxml diff --git a/lib/LaTeXML/Package/LaTeX.pool.ltxml b/lib/LaTeXML/Engine/LaTeX.pool.ltxml similarity index 99% rename from lib/LaTeXML/Package/LaTeX.pool.ltxml rename to lib/LaTeXML/Engine/LaTeX.pool.ltxml index 3fed16f0e..eb3810484 100644 --- a/lib/LaTeXML/Package/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Engine/LaTeX.pool.ltxml @@ -29,7 +29,7 @@ use List::Util qw(min max); #********************************************************************** LoadPool('TeX'); - +LOAD_LATEX(); # Apparently LaTeX does NOT define \magnification, # and babel uses that to determine whether we're runing LaTeX!!! Let('\magnification', '\@undefined'); @@ -368,7 +368,7 @@ DefConstructorI(T_CS('\end{document}'), undef, sub { if (my $ops = LookupValue('@at@end@document')) { push(@boxes, $stomach->digest(Tokens(@$ops))); } # Should we try to indent the last paragraph? If so, it goes like this: - push(@boxes, $stomach->digest(T_CS('\normal@par'))); + push(@boxes, $stomach->digest(T_CS('\lx@normal@par'))); # Now we check whether we're down to the last stack frame. # It is common for unclosed { or even environments # and we want to at least compress & avoid unnecessary errors & warnings. @@ -1467,7 +1467,7 @@ sub beginItemize { my $postfix = ToString(Tokens(roman($level))); my $usecounter = ($options{nolevel} ? $counter : $counter . $postfix); Let('\item' => "\\" . $type . '@item') if defined $type; - Let('\par', '\normal@par'); # In case within odd environment. + Let('\par', '\lx@normal@par'); # In case within odd environment. DefMacroI('\@listctr', undef, Tokens(Explode($usecounter))); # Now arrange that this list's id's are relative to the current (outer) item (if any) # And that the items within this list's id's are relative to this (new) list. @@ -4384,8 +4384,10 @@ DefConstructor('\lx@mark@nocite Semiverbatim', #====================================================================== # C.11.4 Splitting the input #====================================================================== -Let('\@@input', '\input'); # Save TeX's version. - # LaTeX's \input is a bit different... +if (!($LaTeXML::DEBUG{compiled} || $LaTeXML::DEBUG{compiling})) { + Let('\@@input', '\input'); # Save TeX's version. + # LaTeX's \input is a bit different... +} # Input, now DefPrimitive('\ltx@input {}', sub { Input(Expand($_[1])); }); DefMacroI('\input', undef, '\@ifnextchar\bgroup\@iinput\@@input'); @@ -4885,8 +4887,8 @@ DefPrimitive('\newsavebox DefToken', sub { AssignValue(allocated_boxes => $n, 'global'); DefRegisterI($_[1], undef, Number($n)); AssignValue('box' . $n, List()); }); - -RawTeX(<<'EOL'); +if (!($LaTeXML::DEBUG{compiled} || $LaTeXML::DEBUG{compiling})) { + RawTeX(<<'EOL'); \def\newsavebox#1{\@ifdefinable{#1}{\newbox#1}} \DeclareRobustCommand\savebox[1]{% \@ifnextchar(%) @@ -4916,7 +4918,7 @@ RawTeX(<<'EOL'); \def\endlrbox{\unskip\color@endgroup} \DeclareRobustCommand\usebox[1]{\leavevmode\copy #1\relax} EOL - +} Let('\lx@parboxnewline', '\lx@newline'); # Obsolete, but in case still used # NOTE: There are 2 extra arguments (See LaTeX Companion, p.866) # for height and inner-pos. We're ignoring inner-pos, for now, though. @@ -6402,39 +6404,41 @@ RequirePackage('textcomp'); # of pre-read/pre-processed latex.ltx ! # # For now, a few macros required by other packages will be included: -DefMacroI(T_CS('\hook_gput_code:nnn'), '{}{}{}', ''); -DefMacro('\NewHook{}', ''); -DefMacro('\NewReversedHook{}', ''); -DefMacro('\NewMirroredHookPair{}{}', ''); -DefMacro('\ActivateGenericHook{}', ''); -DefMacro('\DisableGenericHook{}', ''); -DefMacro('\AddToHook{}[]{}', ''); -DefMacro('\AddToHookNext{}{}', ''); -DefMacro('\ClearHookNext{}', ''); -DefMacro('\RemoveFromHook{}[]', ''); -DefMacro('\SetDefaultHookLabel{}', ''); -DefMacro('\PushDefaultHookLabel{}', ''); -DefMacro('\PopDefaultHookLabel', ''); -DefMacro('\UseHook{}', ''); -DefMacro('\UseOneTimeHook{}', ''); -DefMacro('\ShowHook{}', ''); -DefMacro('\LogHook{}', ''); -DefMacro('\DebugHooksOn', ''); -DefMacro('\DebugHooksOff', ''); -DefMacro('\DeclareHookRule{}{}{}{}', ''); -DefMacro('\DeclareDefaultHookRule{}{}{}', ''); -DefMacro('\ClearHookRule{}{}{}', ''); -DefMacro('\IfHookEmptyTF{}{}{}', '#3'); -DefMacro('\IfHookExistsTF{}{}{}', '#3'); -DefMacro('\MakeTextLowercase', '\lowercase'); -DefMacro('\MakeTextUppercase', '\uppercase'); - -DefConditional('\if@includeinrelease'); -Let('\@kernel@after@enddocument', '\@empty'); -Let('\@kernel@after@enddocument@afterlastpage', '\@empty'); -Let('\@kernel@before@begindocument', '\@empty'); -Let('\@kernel@after@begindocument', '\@empty'); -Let('\conditionally@traceon', '\@empty'); -Let('\conditionally@traceoff', '\@empty'); +if (!($LaTeXML::DEBUG{compiled} || $LaTeXML::DEBUG{compiling})) { + DefMacroI(T_CS('\hook_gput_code:nnn'), '{}{}{}', ''); + DefMacro('\NewHook{}', ''); + DefMacro('\NewReversedHook{}', ''); + DefMacro('\NewMirroredHookPair{}{}', ''); + DefMacro('\ActivateGenericHook{}', ''); + DefMacro('\DisableGenericHook{}', ''); + DefMacro('\AddToHook{}[]{}', ''); + DefMacro('\AddToHookNext{}{}', ''); + DefMacro('\ClearHookNext{}', ''); + DefMacro('\RemoveFromHook{}[]', ''); + DefMacro('\SetDefaultHookLabel{}', ''); + DefMacro('\PushDefaultHookLabel{}', ''); + DefMacro('\PopDefaultHookLabel', ''); + DefMacro('\UseHook{}', ''); + DefMacro('\UseOneTimeHook{}', ''); + DefMacro('\ShowHook{}', ''); + DefMacro('\LogHook{}', ''); + DefMacro('\DebugHooksOn', ''); + DefMacro('\DebugHooksOff', ''); + DefMacro('\DeclareHookRule{}{}{}{}', ''); + DefMacro('\DeclareDefaultHookRule{}{}{}', ''); + DefMacro('\ClearHookRule{}{}{}', ''); + DefMacro('\IfHookEmptyTF{}{}{}', '#3'); + DefMacro('\IfHookExistsTF{}{}{}', '#3'); + DefMacro('\MakeTextLowercase', '\lowercase'); + DefMacro('\MakeTextUppercase', '\uppercase'); + + DefConditional('\if@includeinrelease'); + Let('\@kernel@after@enddocument', '\@empty'); + Let('\@kernel@after@enddocument@afterlastpage', '\@empty'); + Let('\@kernel@before@begindocument', '\@empty'); + Let('\@kernel@after@begindocument', '\@empty'); + Let('\conditionally@traceon', '\@empty'); + Let('\conditionally@traceoff', '\@empty'); +} #********************************************************************** 1; diff --git a/lib/LaTeXML/Engine/TeX.pool.ltxml b/lib/LaTeXML/Engine/TeX.pool.ltxml new file mode 100644 index 000000000..696139ccf --- /dev/null +++ b/lib/LaTeXML/Engine/TeX.pool.ltxml @@ -0,0 +1,183 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +use Unicode::Normalize; +use LaTeXML::Util::Pathname; +use List::Util qw(min max); + +###$LaTeXML::DEBUG{compiled} = 1 unless $LaTeXML::DEBUG{compiling} || $LaTeXML::DEBUG{nocompiled}; + +#********************************************************************** +# CORE TeX; Built-in commands. +#********************************************************************** +#LaTeXML::Package::LoadPoolX('foo'); +LoadPool('Base_Schema'); +LoadPool('Base_ParameterTypes'); +LoadPool('Base_Utility'); +LoadPool('Base_XMath'); +LoadPool('TeX_Box'); +LoadPool('TeX_Character'); +LoadPool('TeX_Debugging'); +LoadPool('TeX_FileIO'); +LoadPool('TeX_Fonts'); +LoadPool('TeX_Glue'); +LoadPool('TeX_Hyphenation'); +LoadPool('TeX_Inserts'); +LoadPool('TeX_Job'); +LoadPool('TeX_Kern'); +LoadPool('TeX_Logic'); +LoadPool('TeX_Macro'); +LoadPool('TeX_Marks'); +LoadPool('TeX_Math'); +LoadPool('TeX_Page'); +LoadPool('TeX_Paragraph'); +LoadPool('TeX_Penalties'); +LoadPool('TeX_Registers'); +LoadPool('TeX_Tables'); +LoadPool('eTeX'); # unless.... ? +LoadPool('pdfTeX'); # unless.... ? + +LoadPool('Base_Deprecated'); + +##if ($LaTeXML::DEBUG{compiled}) { +## loadDump('plain.tex'); } +##elsif (!$LaTeXML::DEBUG{compiling}) { +LoadPool('plain'); +##} +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Orphans? +#====================================================================== + +# This is LaTeX, but used a little in the Primitives? +# define it here (only approxmiately), since it's already useful. +Let('\protect', '\relax'); + +# Is this real, or an accident/typo ? If real, Where does it come from? +DefRegister('\everyhelp', Tokens()); + +# Cleanup the pre & post tokens for halign columns in math mode. +# If a pair of $..$ enclose stuff that is "OK" in math mode, we don't need the $. +# Note that the 1st $ is switching OUT of math mode! +# BUT: This is never used??? +sub stripDupMath { + my (@tokens) = @_; + my @poss = grep { $tokens[$_]->defined_as(T_MATH) } 0 .. $#tokens; + shift(@poss) if scalar(@poss) % 2; # Get pairs! + while (@poss) { + my ($p2, $p1) = (pop(@poss), pop(@poss)); + splice(@tokens, $p1, 2) if $p2 == $p1 + 1; } + return @tokens; } + +DefMacro('\hiderel{}', "#1"); # Just ignore, for now... + +#====================================================================== +# LaTeX Trigger Hook +# Auto-load the format according to the initial control-sequences seen +#====================================================================== +# No, \documentclass isn't really a primitive -- It's not even TeX! +# But we define a number of stubs here that will automatically load +# the LaTeX pool (or AmSTeX.pool) (which will presumably redefine them), and then +# stuff the token back to be reexecuted. +if (!$LaTeXML::DEBUG{compiling}) { + foreach my $ltxtrigger (qw(documentclass + newcommand renewcommand newenvironment renewenvironment + NeedsTeXFormat ProvidesFile + ProvidesPackage RequirePackage PassOptionsToPackage + makeatletter makeatother + typeout begin listfiles nofiles)) { + DefAutoload($ltxtrigger, 'LaTeX.pool.ltxml'); } +} +if (!($LaTeXML::DEBUG{compiling} || $LaTeXML::DEBUG{compiled})) { + foreach my $ltx3trigger (qw(ExplSyntaxOn + ProvidesExplClass ProvidesExplPackage)) { + # DG: note that these auto-loads are not perfect -- + # if they are triggered with a raw .sty file for example, + # the expl3 support will "expire" at the end of the current scope, + # and e.g. \ExplSyntaxOn will once again be undefined. + DefAutoload($ltx3trigger, 'expl3.pool.ltxml'); } +} +if (!$LaTeXML::DEBUG{compiling}) { + # Seemingly good candidates to trigger AmSTeX ?? + foreach my $amstrigger (qw(BlackBoxes NoBlackBoxes + TagsAsMath TagsAsText TagsOnLeft TagsOnRight CenteredTagsOnSplits TopOrBottomTagsOnSplits + LimitsOnInts NoLimitsOnInts LimitsOnNames NoLimitsOnNames LimitsOnSums NoLimitsOnSums + loadbold loadeufb loadeufm loadeurb loadeurm loadeusb + loadeusm loadmathfont loadmsam loadmsbm)) { + DefAutoload($amstrigger, 'AmSTeX.pool.ltxml'); } + + # Darn; we need to be even more clever, since we need to simulate an amstex command, as well. + # For example \documentstyle[...]{amsppt} must switch to AMSTeX mode, _NOT_ LaTeX mode!!!! + DefMacro('\documentstyle OptionalSemiverbatim SkipSpaces Semiverbatim', sub { + my ($gullet, $options, $class) = @_; + LoadPool((ToString($class) =~ /^amsppt$/ ? "AmSTeX" : "LaTeX")); + (T_CS('\\documentstyle'), + ($options ? (T_OTHER('['), $options->unlist, T_OTHER(']')) : ()), + T_BEGIN, $class->unlist, T_END); }); +} +# Technically should be in LaTeX.pool, but we try to maintain the bookkeeping from the very start, +# in order to avoid partially defined behavior when --preload directives are mixed with \usepackage{} loads +if (!($LaTeXML::DEBUG{compiled} || $LaTeXML::DEBUG{compiling})) { + DefMacro('\@pushfilename', + '\xdef\@currnamestack{{\@currname}{\@currext}{\the\catcode`\@}\@currnamestack}'); + DefMacro('\@popfilename', '\expandafter\@p@pfilename\@currnamestack\@nil'); + DefMacro('\@p@pfilename {}{}{} Until:\@nil', + '\gdef\@currname{#1}% + \gdef\@currext{#2}% + \catcode`\@#3\relax + \gdef\@currnamestack{#4}'); +} +DefMacroI(T_CS('\@currnamestack'), undef, Tokens()); +Let('\@currname', '\@empty'); +Let('\@currext', '\@empty'); + +#====================================================================== +# After all other rewrites have acted, a little cleanup +# [This suggests that it should be (one of) the LAST (math) rewrite applied? +# Do we need to define it last?] +DefRewrite(xpath => 'descendant-or-self::ltx:XMWrap[count(child::*)=1]', + replace => sub { my ($document, $wrap) = @_; + if (my $node = $document->getFirstChildElement($wrap)) { + # Copy attributes but NOT internal ones, + # NOR xml:id, else we get clashes + foreach my $attribute ($wrap->attributes) { + if ($attribute->nodeType == XML_ATTRIBUTE_NODE) { + my $attr = $document->getNodeQName($attribute); + $document->setAttribute($node, $attr => $attribute->getValue) + unless ($attr eq 'xml:id') || $attr =~ /^_/; + if ($attr =~ /^_/) { } + elsif ($attr eq 'xml:id') { + my $id = $attribute->getValue; + if (my $previd = $node->getAttribute('xml:id')) { # Keep original id + # but swap any references to the one on the wrapper! + foreach my $ref ($document->findnodes("//*[\@idref='$id']")) { + $ref->setAttribute(idref => $previd); } + $wrap->removeAttribute('xml"id'); + $document->unRecordID($id); } + else { + $wrap->removeAttribute('xml:id'); + $document->unRecordID($id); + $document->setAttribute($node, 'xml:id' => $id); } } + else { + $document->setAttribute($node, $attr => $attribute->getValue); } } } + # But keep $node's font from being overwritten. + $document->setNodeFont($wrap, $document->getNodeFont($node)); + ## WHY THIS???? + $document->getNode->appendChild($node); +} }); + +#********************************************************************** + +1; diff --git a/lib/LaTeXML/Engine/TeX_Box.pool.ltxml b/lib/LaTeXML/Engine/TeX_Box.pool.ltxml new file mode 100644 index 000000000..35a8ccbda --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Box.pool.ltxml @@ -0,0 +1,703 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Box | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +#use Unicode::Normalize; +#use LaTeXML::Util::Pathname; +#use List::Util qw(min max); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Box Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#====================================================================== +# These define the handler for { } (or anything of catcode BEGIN, END) + +# These are actually TeX primitives, but we treat them as a Whatsit so they +# remain in the constructed tree. +#DefConstructor('{','#body', beforeDigest=>sub{$_[0]->bgroup;}, captureBody=>1); +######DefPrimitive('{', sub { +DefPrimitive(T_BEGIN, sub { + my ($stomach) = @_; + $stomach->bgroup; + my $open = Box(undef, undef, undef, T_BEGIN, isEmpty => 1, alignmentSkippable => 1); + my $ismath = $STATE->lookupValue('IN_MATH'); + my @body = $stomach->digestNextBody(); + List($open, @body, mode => ($ismath ? 'math' : 'text')); }); + +#######DefPrimitive('}', sub { +DefPrimitive(T_END, sub { + my $f = LookupValue('font'); + $_[0]->egroup; + Box(undef, $f, undef, T_END, isEmpty => 1, alignmentSkippable => 1); }); + +# These are for those screwy cases where you need to create a group like box, +# more than just bgroup, egroup, +# BUT you DON'T want extra {, } showing up in any untex-ing. +DefConstructor('\@hidden@bgroup', '#body', beforeDigest => sub { $_[0]->bgroup; }, captureBody => 1, + reversion => sub { Revert($_[0]->getProperty('body')); }); +DefConstructor('\@hidden@egroup', '', afterDigest => sub { $_[0]->egroup; }, + reversion => ''); + +#====================================================================== +DefMacro('\lx@nounicode {}', '\ifmmode\lx@math@nounicode#1\else\lx@text@nounicode#1\fi'); + +DefConstructor('\lx@framed[]{}', + "#2", + properties => { frame => sub { ToString($_[1] || 'rectangle'); } }); +DefConstructor('\lx@hflipped{}', + "#1"); + +sub reportNoUnicode { + my ($cs) = @_; + $cs = ToString($cs); + if (!LookupMapping('missing_unicode' => $cs)) { + Warn('expected', 'unicode', $cs, + "There's no Unicode equivalent for the symbol '$cs'"); + AssignMapping('missing_unicode' => $cs => 1); } + return; } +# Slightly contrived so that this can be used within a DefMath +# and still declare & get the semantic properties. +DefPrimitive('\lx@math@nounicode DefToken', sub { + my ($stomach, $cs) = @_; + reportNoUnicode($cs); + Box(ToString($cs), undef, undef, $cs, class => 'ltx_nounicode'); }); + +DefConstructor('\lx@text@nounicode DefToken', + "#1", + afterDigest => sub { + reportNoUnicode(ToString($_[1]->getArg(0))); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Box creation commands +# ---------------------------------------------------------------------- +# \hbox c constructs a box holding horizontal material. +# \vbox c constructs a box holding vertical material. +# \vtop c is an alternate way to construct a box holding vertical material. + +# \everyhbox pt holds tokens inserted at the start of every hbox. +# \everyvbox pt holds tokens inserted at the start of every vbox. +# ====================================================================== + +sub revert_spec { + my ($whatsit, $keyword) = @_; + my $value = $whatsit->getProperty($keyword); + return ($value ? (Explode($keyword), Revert($value)) : ()); } + +DefParameterType('BoxSpecification', sub { + my ($gullet) = @_; + if (my $key = $gullet->readKeyword('to', 'spread')) { + my $keyvals = LaTeXML::Core::KeyVals->new(undef, undef, skipMissing => 1); + $keyvals->setValue($key, $gullet->readDimension); + $keyvals; } }, + reversion => sub { + my ($spec) = @_; + if (my $to = $spec && $spec->getValue('to')) { + return Tokens(Tokenize('to'), Revert($to)); } + elsif (my $spread = $spec && $spec->getValue('spread')) { + return Tokens(Tokenize('spread'), Revert($spread)); } + else { + return; } }, + optional => 1, undigested => 1); + +# Risky: I think this needs to be digested as a body to work like TeX (?) +# but parameter think's it's just parsing from gullet... +sub readBoxContents { + no warnings 'recursion'; + my ($gullet, $everybox) = @_; + my $t; + while (($t = $gullet->readToken) && !$t->defined_as(T_BEGIN)) { } # Skip till { or \bgroup + # Now, insert some extra tokens, if any, possibly from \afterassignment + if (my $token = LookupValue('BeforeNextBox')) { + AssignValue(BeforeNextBox => undef, 'global'); + $gullet->unread($token); } + # AND, insert any extra tokens passed in, due to everyhbox or everyvbox + $gullet->unread($everybox->unlist) if $everybox; + my ($contents, @stuff) = $STATE->getStomach->invokeToken(T_BEGIN); + return $contents; } + +DefRegister('\everyhbox', Tokens()); +DefRegister('\everyvbox', Tokens()); + +DefParameterType('HBoxContents', sub { + readBoxContents($_[0], LookupValue('\everyhbox')); }, + undigested => 1); # Cause it already is digested! +DefParameterType('VBoxContents', sub { + readBoxContents($_[0], LookupValue('\everyvbox')); }, + undigested => 1); # Cause it already is digested! + +# This re-binds a number of important control sequences to their default text binding. +# This is useful within common boxing or footnote macros that can appear within +# alignments or special environments that have redefined many of these. +# You'll typically want this within a group or bounded=>1. +AssignValue(TEXT_MODE_BINDINGS => []); +AssignValue(HTEXT_MODE_BINDINGS => []); +AssignValue(VTEXT_MODE_BINDINGS => []); +PushValue(HTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@textmode')]); +PushValue(VTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@normalmode')]); +###PushValue(TEXT_MODE_BINDINGS => [T_CS('\centerline'), T_CS('\relax')]); + +sub reenterTextMode { + my ($verticalmode) = @_; + map { Let($$_[0], $$_[1]) } + @{ LookupValue(($verticalmode ? 'VTEXT_MODE_BINDINGS' : 'HTEXT_MODE_BINDINGS')) }, + @{ LookupValue('TEXT_MODE_BINDINGS') }; + return } + +# Similarly, for metadata appearing within peculiar environments, fonts, etc +# You'll typically want this within a group or bounded=>1. +sub neutralizeFont { + AssignValue(font => LaTeXML::Common::Font->textDefault(), 'local'); + AssignValue(mathfont => LaTeXML::Common::Font->mathDefault(), 'local'); + return; } + +sub REF { + my ($thing, $key) = @_; + return $thing && $$thing{$key}; } + +sub inSVG { + my $document = $LaTeXML::DOCUMENT; + my $context = $document->getElement; + return $context && $document->getNodeQName($context) =~ /^svg:/; } + +# Collapse redundant svg:g nodes that have only certain +# non-cummulative attributes +Tag('svg:g', afterClose => \&collapseSVGGroup); +my %collapsible_group_attributes = map { ($_ => 1); } + qw(fill fill-rule fill-opacity + stroke stroke-width stroke-linecap stroke-linejoin stroke-miterlimit + stroke-dasharray stroke-dashoffset stroke-opacity + color); + +# Collapse/remove/unwrap unneeded svg:g's to reduce depth of tree +sub collapseSVGGroup { + my ($document, $node) = @_; + my ($nempty, $nredundant, $nmerged, $npopped, $npushed) = (0, 0, 0, 0, 0); + # Record the attributes on $node, for later use. + my %nodeattr = (); + foreach my $attr ($node->attributes) { + my $key = $attr->getName; + $nodeattr{$key} = $attr->getValue if ($key !~ /^_/); } + return if defined $nodeattr{'clip-path'}; # Needs separate svg:g node (?) + my @children = element_nodes($node); + # Remove empty svg:g children + foreach my $c (@children) { + if (($document->getNodeQName($c) eq 'svg:g') && !scalar(element_nodes($c))) { + $nempty++; + $document->removeNode($c); } } + @children = element_nodes($node) if $nempty; + # Move ahead, all leading children whose svg:g attributes completely mask $node's attributes. + # Could do same moving trailing children to back + my $c; + while (scalar(@children) && ($document->getNodeQName($c = $children[0]) eq 'svg:g')) { + my $nmasked = 0; + foreach my $attr ($c->attributes) { + my $key = $attr->getName; + if (($key !~ /^_/) && $collapsible_group_attributes{$key} && defined $nodeattr{$key}) { + $nmasked++; } } + last unless $nmasked == scalar(keys %nodeattr); # child completely masks attr of node + $node->parentNode->insertBefore(shift(@children), $node); # move it outside! + $npopped++; } + # Same story for trailing children, but move behind + while (scalar(@children) && ($document->getNodeQName($c = $children[-1]) eq 'svg:g')) { + my $nmasked = 0; + foreach my $attr ($c->attributes) { + my $key = $attr->getName; + if (($key !~ /^_/) && $collapsible_group_attributes{$key} && defined $nodeattr{$key}) { + $nmasked++; } } + last unless $nmasked == scalar(keys %nodeattr); # child completely masks attr of node + $node->parentNode->insertAfter(pop(@children), $node); + $npushed++; } + # Now remove any redundant svg:g's (same attributes & values) [some left after above] + foreach my $c (@children) { + if ($c && ($document->getNodeQName($c) eq 'svg:g')) { # for every nested svg:g + my $issame = 1; + foreach my $attr ($c->attributes) { + my $key = $attr->getName; +### if (($key !~ /^_/) && ($attr->getValue ne ($nodeattr{$key} || ''))) { + if (($key !~ /^_/) + && (($attr->getValue ne ($nodeattr{$key} || '')) || ($key eq 'transform'))) { + $issame = 0; } } + if ($issame) { # child is completely redundant. + $document->unwrapNodes($c); + $nredundant++; } } } + @children = element_nodes($node) if $nredundant; + # Could check if $node is empty now? + # Then if only one left, and it's attributes can be migrated to $node, unwrap it + if ((scalar(@children) == 1) && ($document->getNodeQName($c = $children[0]) eq 'svg:g')) { + my %av = (); + my $mergeable = 1; + foreach my $attr ($c->attributes) { + my $key = $attr->getName; + if (($key =~ /^_/) || $collapsible_group_attributes{$key}) { + $av{$key} = $attr->getValue; } + elsif ($key eq 'transform') { + $av{$key} = ($nodeattr{$key} ? $nodeattr{$key} . ' ' : '') . $attr->getValue; } + else { + $mergeable = 0; } } + if ($mergeable) { + foreach my $key (sort keys %av) { + $nodeattr{$key} = $av{$key}; + $node->setAttribute($key => $av{$key}); } + $nmerged++; + $document->unwrapNodes($c); } } + return; } + +DefConstructor('\hbox BoxSpecification HBoxContents', sub { + # "#2", + no warnings 'recursion'; + my ($document, $spec, $contents, %props) = @_; + my $model = $document->getModel; + my $context = $document->getElement; + my $current = $context; + + # What is the CORRECT (& general) way to ask whether we're in "vertical mode"?? + # my $vmode = $tag eq 'ltx:inline-block'; # ie, explicitly \vbox !?!?!?! + my $issvg = $current && $document->getNodeQName($current) =~ /^svg:/; + my $vmode = $current && $current->getAttribute('_vertical_mode_'); + my $inline = $document->canContain($current, '#PCDATA'); + my $newtag = ($issvg ? 'svg:g' : ($vmode ? ($inline ? 'ltx:inline-block' : 'ltx:p') : 'ltx:text')); + my $node = $document->openElement($newtag, _noautoclose => 1, width => $props{width}); + $document->absorb($contents); + if (!$issvg) { + while (!$document->getElement()->hasAttribute('_beginscope') && $document->maybeCloseElement('svg:g')) { } + $document->maybeCloseElement('svg:svg'); } + if ($issvg) { # ODDLY, svg:g isnt necessarily balanced in tikz? + $document->maybeCloseElement('svg:g'); } + else { + $document->maybeCloseNode($node); } + }, + mode => 'text', bounded => 1, + sizer => '#2', + # Workaround for $ in alignment; an explicit \hbox gives us a normal $. + # And also things like \centerline that will end up bumping up to block level! + beforeDigest => sub { reenterTextMode(); }, + + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $spec = $whatsit->getArg(1); + my $box = $whatsit->getArg(2); + if (my $w = GetKeyVal($spec, 'to')) { + $whatsit->setWidth($w); } + elsif (my $s = GetKeyVal($spec, 'spread')) { + $whatsit->setWidth($box->getWidth->add($s)); } + $whatsit->setProperty(content_box => $box); + return; }); + +# Cleanup foreignObjects: remove empty (or only

); and determine size +Tag('svg:foreignObject', autoOpen => 1, autoClose => 1, + afterClose => sub { + my ($document, $node, $whatsit) = @_; + ## NOTE: The revised schema now allows plain text within foreignObject; watch out for mixed + my @fo = $node->childNodes; # What's in the foreignObject? + if (scalar(@fo) == 0) { # Empty? + $document->removeNode($node); # just remove whole thing + return; } + elsif (!grep { $_->nodeType != XML_TEXT_NODE } @fo) { # All text nodes? + $node = $document->renameNode($node, 'svg:text'); + $node->setAttribute(transform => "matrix(1 0 0 -1 0 0)"); + return; } + elsif ((scalar(@fo) == 1) && ($document->getNodeQName($fo[0]) eq 'ltx:p')) { # Single

? + my @p_c = element_nodes($fo[0]); + if (scalar(@p_c) == 0) { # or Empty

? + $document->removeNode($node); + return; } + # Else, single ltx:picture or ltx:text ? + elsif (scalar(@p_c) == 1) { + my $tag = $document->getNodeQName($p_c[0]); + if (($tag eq 'ltx:picture') || ($tag eq 'ltx:text')) { + my @pic_c = element_nodes($p_c[0]); + # With single svg:svg ? + if ((scalar(@pic_c) == 1) && ($document->getNodeQName($pic_c[0]) eq 'svg:svg')) { + $document->replaceNode($node, element_nodes($pic_c[0])); + return; } } } } + # Otherwise, we've still got an svg:foreignObject; + # Make sure we get a size, in case autoOpen'd + if ($whatsit) { + my ($w, $h, $d) = $whatsit->getSize; + my $y = $STATE->lookupDefinition(T_CS('\baselineskip'))->valueOf->pxValue; + my $ht = $h->add($d); + $node->setAttribute(width => $w->pxValue) unless $node->hasAttribute('width'); + $node->setAttribute(height => $ht->pxValue) unless $node->hasAttribute('height'); + $node->setAttribute(transform => "matrix(1 0 0 -1 0 $y)"); + $node->setAttribute(overflow => 'visible'); } }); + +# This attempts to be a generalize vbox construction; +# The idea is to receeive block-like material, possibly wrapped in appropriate +# container which gets attributes. +# The contents are constructed in an ltx:_CaptureBlock_ element, +# designed to accept all reasonable block material from several levels, +# and then determine which container element is most apprpriate for both the conent & context +# from block, logical-block or sectional-block, or the inline- variants. +sub insertBlock { + my ($document, $contents, %blockattr) = @_; + my $model = $document->getModel; + my $context = $document->getElement; # Where we originally start inserting. + if (!$context) { + # edge case: if we start the doc with a block, the context is empty + $document->absorb($contents); + return; } + my $context_tag = $document->getNodeQName($context); + my $is_svg = ($context_tag =~ /^svg:/); # svg is slightly tricky + my $ignorable_attr = $is_svg || !scalar(keys %blockattr); # if we do not REQUIRE the attributes + if (($context_tag =~ /^ltx:XM/) && ($context_tag ne 'ltx:XMText')) { # but math always needs this + $context = $document->openElement('ltx:XMText'); + $context_tag = $document->getNodeQName($context); } + my $inline = $is_svg || $document->canContain($context_tag, '#PCDATA'); + my $container = $document->openElement('ltx:_CaptureBlock_', '_vertical_mode_' => 1, %blockattr); + $document->absorb($contents); + my @nodes = $container->childNodes; + my @node_tags = map { $document->getNodeQName($_); } @nodes; + my $nnodes = scalar(@nodes); + $document->closeToNode($container, 1); + $document->closeNode($container); + $document->closeToNode($context, 1); + my $newcontainer; + + if ($nnodes < 1) { # Insertion came up empty? + $document->removeNode($container); } # then remove the new block entirely + elsif ($ignorable_attr # No attributes, contents allowed in context? + && !grep { !$document->canContain($context, $_); } @node_tags) { + $document->unwrapNodes($container); } # No container needed, at all. + elsif (($nnodes == 1) # Single node, allowed in context & accepts attributes + && $document->canContain($context, $nodes[0]) + && ($ignorable_attr || !grep { !$document->canHaveAttribute($nodes[0], $_) } keys %blockattr)) { + map { $document->setAttribute($nodes[0], $_ => $blockattr{$_}) } keys %blockattr; + $document->unwrapNodes($container); } # Add attributes and unwrap the single node + elsif (($nnodes == 1) # Single node, but needs auto-wrapper which accepts attributes? + && ($newcontainer = $document->canContainSomehow($context, $nodes[0])) + && ($ignorable_attr || !grep { !$document->canHaveAttribute($newcontainer, $_) } keys %blockattr)) { + $document->renameNode($container, $newcontainer, 1); } # rename the capture to that container + else { # Otherwise, rename the capture + if ($is_svg && grep { $_ =~ /^ltx:/; } @node_tags) { # MAY need foreignObject wrapper + $context = $document->wrapNodes('svg:foreignObject', $container); + $context_tag = $document->getNodeQName($context); } + my @candidates = + ($inline + ? (qw(ltx:inline-block ltx:inline-logical-block ltx:inline-sectional-block)) + : (qw(ltx:block ltx:logical-block ltx:sectional-block ltx:figure))); + my @filtered_candidates = (); # Filtered containers that can contain the content + foreach my $candidate (@candidates) { + push(@filtered_candidates, $candidate) + unless grep { !$document->canContainSomehow($candidate, $_); } @node_tags; } + my @allowed_candidates # and are allowed in the context + = grep { ($document->canContain($context_tag, $_) ? $_ : ()); } @filtered_candidates; + if (my $tag = $allowed_candidates[0] || $filtered_candidates[0]) { + $document->renameNode($container, $tag, 1); } # Rename the capture to the correct container + else { # we didn't know what to do? + Warn('malformed', '_CaptureBlock_', $document, "Did not find a block-like candidate in $context_tag (with attributes (" . join(";", map { "$_=$blockattr{$_}" } keys %blockattr) . ')'); + $document->renameNode($container, 'ltx:block', 1); + } + } + return @nodes; } + +DefConstructor('\vbox BoxSpecification VBoxContents', sub { + my ($document, $spec, $contents, %props) = @_; + my @block = insertBlock($document, $contents, vattach => 'bottom'); }, + sizer => '#2', + properties => { layout => 'vertical', vattach => 'bottom' }, + mode => 'text', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $spec = $whatsit->getArg(1); + my $box = $whatsit->getArg(2); + if (my $h = GetKeyVal($spec, 'to')) { + $whatsit->setHeight($h); } + elsif (my $s = GetKeyVal($spec, 'spread')) { + $whatsit->setHeight($box->getHeight->add($s)); } + $whatsit->setProperty(content_box => $box); + return; }); + +DefConstructor('\vtop BoxSpecification VBoxContents', sub { + my ($document, $spec, $contents, %props) = @_; + insertBlock($document, $contents, vattach => 'top'); }, + sizer => '#2', + properties => { layout => 'vertical', vattach => 'baseline' }, + mode => 'text', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $spec = $whatsit->getArg(1); + my $box = $whatsit->getArg(2); + if (my $h = GetKeyVal($spec, 'to')) { + $whatsit->setHeight($h); } + elsif (my $s = GetKeyVal($spec, 'spread')) { + $whatsit->setHeight($box->getHeight->add($s)); } + $whatsit->setProperty(content_box => $box); + return; }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Commands to store and use boxes +# ---------------------------------------------------------------------- +# \setbox c assigns an hbox, vbox, or vtop to a box register. +# \dp iq is the depth of a box. +# \ht iq is the height of a box. +# \wd iq is the width of a box. +# \box c puts the box's contents in the current list and empties the box. +# \copy c puts the box's contents in the current list but does not empty the box . +# \unhbox c puts unwrapped hbox contents in the current list and empties the box. +# \unhcopy c puts unwrapped hbox contents in the current list but does not empty the box. +# \unvbox c puts unwrapped vbox contents in the current list and empties the box. +# \unvcopy c puts unwrapped vbox contents in the current list but does not empty the box. +# \lastbox c is void or the last hbox or vbox on the current list. +# ====================================================================== + +DefPrimitive('\lastbox', sub { # Hopefully, the correct box got seen! + return pop(@LaTeXML::LIST); }); + +DefPrimitive('\setbox Number SkipSpaces SkipMatch:=', sub { + my ($stomach) = @_; + no warnings 'recursion'; + my $box = 'box' . $_[1]->valueOf; + # If there is any afterAssignment tokens, move them over so BoxContents parameter will use them + if (my $token = LookupValue('afterAssignment')) { + AssignValue('afterAssignment' => undef, 'global'); + AssignValue('BeforeNextBox' => $token); } + # Save global flag, since we're digesting to get the box content, which resets the flag! + # Should afterDigest be responsible for resetting flags? + my $scope = $STATE->getPrefix('global') && 'global'; + $STATE->clearPrefixes; # before invoke, below; we've saved the only relevant one (global) + my ($stuff, @rest) = $stomach->invokeToken($stomach->getGullet->readXToken); + AssignValue('box' . $_[1]->valueOf => $stuff, $scope); + @rest; }); + +# = \ht | \wd | \dp +DefRegister('\ht Number', Dimension(0), + getter => sub { + my ($n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + return ($stuff ? $stuff->getHeight : Dimension(0)); }, + setter => sub { + my ($value, $scope, $n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + $stuff->setHeight($value) if $stuff; + return; }); +DefRegister('\wd Number', Dimension(0), + getter => sub { + my ($n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + return ($stuff ? $stuff->getWidth : Dimension(0)); }, + setter => sub { + my ($value, $scope, $n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + $stuff->setWidth($value) if $stuff; + return; }); + +DefRegister('\dp Number', Dimension(0), + getter => sub { + my ($n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + return ($stuff ? $stuff->getDepth : Dimension(0)); }, + setter => sub { + my ($value, $scope, $n) = @_; + my $stuff = $n && LookupValue('box' . $n->valueOf); + $stuff->setDepth($value) if $stuff; + return; }); + +sub adjustBoxColor { + my ($box) = @_; + my $font = LookupValue('font'); + if (my $color = $font && $font->getColor) { + if (!Black->equals($color)) { + adjustBoxColor_rec($color, {}, $box); } } + return; } + +sub adjustBoxColor_rec { + no warnings 'recursion'; + my ($color, $adjusted, @boxes) = @_; + foreach my $box (@boxes) { + next unless defined $box; + next if $$adjusted{$box}; # since we do args AND props, be careful not to adjust twice! + $$adjusted{$box} = 1; + my $r = ref $box; + next unless $r && ($r !~ /(?:SCALAR|HASH|ARRAY|CODE|REF|GLOB|LVALUE)/) && $r->isaBox; + # NASTY access to internal structure; but worth a whole API for this one hack??? + if ($r eq 'LaTeXML::Core::Box') { + adjustBoxColor_internal($color, $box); } + elsif ($r eq 'LaTeXML::Core::List') { + adjustBoxColor_rec($color, $adjusted, $box->unlist); } + elsif ($r eq 'LaTeXML::Core::Whatsit') { + adjustBoxColor_internal($color, $box); + # now recurse on contained boxes (args AND properties!) + adjustBoxColor_rec($color, $adjusted, $box->getArgs); + adjustBoxColor_rec($color, $adjusted, values %{ $box->getPropertiesRef }); } + elsif ($r eq 'LaTeXML::Core::Alignment') { + foreach my $row (@{ $$box{rows} }) { + foreach my $col (@{ $$row{columns} }) { + adjustBoxColor_rec($color, $adjusted, $$col{boxes}->unlist) if $$col{boxes}; } } } + } + return; } + +sub adjustBoxColor_internal { + my ($color, $box) = @_; + if (my $font = $box->getFont) { + $box->setFont($font->merge(color => $color)); } + return; } + +DefPrimitive('\box Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + AssignValue($box, undef); + ($stuff ? $stuff : List()); }); + +DefPrimitive('\copy Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + ($stuff ? $stuff : List()); }); + +# \unhbox<8bit>, \unhcopy<8bit> +DefPrimitive('\unhbox Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + AssignValue($box, undef); + (defined $stuff ? $stuff->unlist : List()); }); + +DefPrimitive('\unhcopy Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + (defined $stuff ? $stuff->unlist : List()); }); + +# \unvbox<8bit>, \unvcopy<8bit> +DefPrimitive('\unvbox Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + AssignValue($box, undef); + (defined $stuff ? $stuff->unlist : List()); }); + +DefPrimitive('\unvcopy Number', sub { + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + (defined $stuff ? $stuff->unlist : List()); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Various box related parameters +# ---------------------------------------------------------------------- +# \prevdepth iq is the depth of the last box added to the current vertical list. +# \boxmaxdepth pd is the maximum possible depth of a vertical box. +# \badness iq is 0-10,000 and represents the badness of the glue settings in the last constructed box. +# \hbadness pi is the badness above which bad hboxes are reported. +# \vbadness pi is the badness above which bad vboxes are reported. +# \hfuzz pd is the overrun allowed before overfull hboxes are reported. +# \vfuzz pd is the overrun allowed before overfull vboxes are reported. +# \overfullrule pd is the width of the rule appended to an overfull box. +# ====================================================================== + +DefRegister('\prevdepth' => Dimension(0)); +DefRegister('\boxmaxdepth' => Dimension('16383.99999pt')); + +DefRegister('\hfuzz' => Dimension('0.1pt')); +DefRegister('\vfuzz' => Dimension('0.1pt')); +DefRegister('\overfullrule' => Dimension('5pt')); + +DefRegister('\badness' => Number(0), readonly => 1); +DefRegister('\hbadness' => Number(1000)); +DefRegister('\vbadness' => Number(1000)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Rules and Leaders +# ---------------------------------------------------------------------- +# \hrule c makes a rule box in vertical mode. +# \vrule c makes a rule box in horizontal mode. +# \cleaders c insert centered leaders. +# \leaders c fill space using specified glue with a box or rule. +# \xleaders c insert expanded leaders. +# ====================================================================== + +DefParameterType('RuleSpecification', sub { + my ($gullet) = @_; + my $keyvals = LaTeXML::Core::KeyVals->new(undef, undef, skipMissing => 1); + while (my $key = $gullet->readKeyword('width', 'height', 'depth')) { + $keyvals->setValue($key, $gullet->readDimension); } + $keyvals; }, + optional => 1, undigested => 1); + +# \hrule, \vrule are awkward in trying to deal with 3 cases +# * as rules within an alignment/table +# * as separating lines within text +# * as graphical lines within svg +# and each has different requirements for size +DefConstructor('\vrule RuleSpecification', + "?#invisible()(?#isVerticalRule()" + . "(?&inSVG()()" + . "()))", + afterConstruct => sub { # NOTE: Only For xy development! + Warn('unexpected', 'vrule', $_[0], "Encountered \\vrule in SVG") if inSVG(); }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $dims = $whatsit->getArg(1); + my $width = GetKeyVal($dims, 'width'); # || Dimension('0.4pt'); + my $height = GetKeyVal($dims, 'height'); + my $depth = GetKeyVal($dims, 'depth'); + $whatsit->setProperties( + rwidth => $width, cwidth => $width || Dimension('0.4pt'), + rheight => $height, cheight => ($height), sheight => ($height ? $height->pxValue : 0), + rdepth => $depth, cdepth => ($depth || Dimension(0))); + my $w = ($width ? $width->ptValue : undef); + my $h = ($height ? $height->ptValue : undef); + my $d = ($depth ? $depth->ptValue : undef); + if (my $alignment = LookupValue('Alignment')) { + if (((!defined $h) && (!defined $w)) || ((defined $h) && ($h > 20)) + || ((defined $h) && (defined $w) && ($h > 3 * $w))) { + $whatsit->setProperty(isVerticalRule => 1) } } # Marked as rule within alignment + elsif ((defined $w) && ($w == 0)) { + $whatsit->setProperty(invisible => 1); } + if (my $color = LookupValue('font')->getColor) { + if (!Black->equals($color)) { + $whatsit->setProperty(color => $color); } } + return; }); + +DefConstructor('\hrule RuleSpecification', + "?#isHorizontalRule()" + . "(?&inSVG()()" + . "())", + afterConstruct => sub { # NOTE: Only For xy development! + Warn('unexpected', 'hrule', $_[0], "Encountered \\hrule in SVG") if inSVG(); }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $dims = $whatsit->getArg(1); + my $width = GetKeyVal($dims, 'width'); + my $height = GetKeyVal($dims, 'height'); + my $depth = GetKeyVal($dims, 'depth'); + $whatsit->setProperties( + rwidth => $width || '100%', cwidth => $width, swidth => ($width ? $width->pxValue : 0), + rheight => $height || '1px', cheight => ($height || Dimension('0.4pt')), + rdepth => $depth, cdepth => ($depth || Dimension(0))); + my $w = ($width ? $width->ptValue : undef); + my $h = ($height ? $height->ptValue : undef); + my $d = ($depth ? $depth->ptValue : undef); + if (my $alignment = LookupValue('Alignment')) { + # What is the intended logic here? + if (((!defined $h) && (!defined $w)) || ((defined $w) && ($w > 20)) + || ((defined $h) && (defined $w) && ($w > 3 * $h))) { + $alignment->addLine('t'); + $whatsit->setProperty(isHorizontalRule => 1) } } # Marked as rule within alignment + if (my $color = LookupValue('font')->getColor) { + if (!Black->equals($color)) { + $whatsit->setProperty(color => $color); } } + return; }); + +# ====================================================================== +# Various leaders, ignored for now... +DefPrimitiveI('\leaders', undef, undef); +DefPrimitiveI('\cleaders', undef, undef); +DefPrimitiveI('\xleaders', undef, undef); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; + diff --git a/lib/LaTeXML/Engine/TeX_Character.pool.ltxml b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml new file mode 100644 index 000000000..c73691022 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml @@ -0,0 +1,211 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Character | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +use Unicode::Normalize; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Character Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# \ (ctrl space) c inserts a control space. +# \char c provides access to one of the 256 characters in a font. +#---------------------------------------------------------------------- +DefPrimitiveI('\ ', undef, sub { + Box(UTF(0xA0), undef, undef, T_CS('\ '), + name => 'space', isSpace => 1, width => Dimension('0.5em')); }); + +DefPrimitive('\char Number', sub { + Box(FontDecode($_[1]->valueOf), undef, undef, + Tokens(T_CS('\char'), $_[1]->revert, T_CS('\relax'))); }); + +#====================================================================== +# \accent c places an accent on a character. +#---------------------------------------------------------------------- + +#---------------------------------------------------------------------- +# Accents. LaTeX Table 3.1, p.38 +#---------------------------------------------------------------------- +# All of TeX's accents can (sorta) be handled by Unicode's combining accents +# (which follow the character to be accented). +# We'll let unicode normalization do the combination, if needed. +# Also, note that \t is intended to combine multiple chars, but it appears to +# work (via mozilla !?) best when the combining char is after the 1st char. +# Further, the accents \d and \b seem to center the under dot or bar under multiple +# chars --- how should this be handled in Unicode? + +# Since people sometimes try to get fancy by using an empty argument, +# for each, I'm providing the combining code and an equivalent(?) spacing one. +# (doesn't look quite the same to use a combining char after a space) + +# Create a box applying an accent to a letter +# Hopefully, we'll get a Box from digestion with a plain string. +# Then we can apply combining accents to it. +sub applyAccent { + my ($stomach, $letter, $combiningchar, $standalonechar, $reversion) = @_; + my $box = $stomach->digest($letter); + my $locator = $box->getLocator; + my $font = $box->getFont; + my $string = $box->toString; + $string =~ tr/\x{0131}\x{0237}/ij/; + $string =~ s/\s/ /g; + my @letters = split(//, $string); + return Box(($string =~ /^\s*$/ + ? $standalonechar + : NFC($letters[0] . $combiningchar . join('', @letters[1 .. $#letters]))), + $font, $locator, $reversion); } + +# Defines an accent command using a combining char that follows the +# 1st char of the argument. In cases where there is no argument, $standalonechar is used. +sub DefAccent { + my ($accent, $combiningchar, $standalonechar, %options) = @_; + $options{above} = 1 if !(defined $options{above}) && !$options{below}; + # Used for converting a char used as an above-accent to a combining char (See \accent) + AssignMapping('accent_combiner_above', $standalonechar => $combiningchar) if $options{above}; + AssignMapping('accent_combiner_below', $standalonechar => $combiningchar) unless $options{above}; + DefMacroI($accent, "{}", + Tokens(T_CS('\lx@applyaccent'), T_OTHER($accent), + T_OTHER($combiningchar), T_OTHER($standalonechar), + T_BEGIN, T_ARG(1), T_END), + protected => 1); + return; } + +DefPrimitiveI('\lx@applyaccent', "DefToken Token Token {}", sub { + my ($stomach, $accent, $combiningchar, $standalonechar, $letter) = @_; + applyAccent($stomach, $letter, $combiningchar->getString, $standalonechar->getString, + Tokens(T_CS($accent->getString), T_BEGIN, $letter, T_END)); }, + mode => 'text'); + +# This will fail if there really are "assignments" after the number! +# We're given a number pointing into the font, from which we can derive the standalone char. +# From that, we want to figure out the combining character, but there could be one for +# both the above & below cases! We'll prefer the above case. +DefPrimitive('\accent Number {}', sub { + my ($stomach, $num, $letter) = @_; + my $n = $num->valueOf; + my $fontinfo = lookupFontinfo(LookupValue('textfont_0')); + my $acc = ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : chr($n)); + my $reversion = Invocation(T_CS('\accent'), $num, $letter); + # NOTE: REVERSE LOOKUP in above accent list for the non-spacing accent char + # BUT, \accent always (?) makes an above type accent... doesn't it? + if (my $combiner = LookupMapping('accent_combiner_above', $acc) + || LookupMapping('accent_combiner_below', $acc)) { + applyAccent($stomach, $letter, $combiner, $acc, $reversion); } + else { + Warn('unexpected', "accent$n", $stomach, "Accent '$n' not recognized"); + Box(ToString($letter), undef, undef, $reversion); } }); + +#====================================================================== +# \chardef iq provides an alternate way to define a control sequence that returns a character. +#---------------------------------------------------------------------- + +# Almost like a register (and \countdef), but different... +# (including the preassignment to \relax!) +DefPrimitive('\chardef Token SkipSpaces SkipMatch:=', sub { + my ($stomach, $newcs) = @_; + $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment + my $value = $stomach->getGullet->readNumber(); + $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value)); + AfterAssignment(); + return; }); + +#====================================================================== +# Upper/Lowercase +#---------------------------------------------------------------------- +# \lowercase c converts tokens to lowercase. +# \uppercase c converts tokens to uppercase. +# \uppercase, \lowercase +sub ucToken { + my ($token) = @_; + my $code = $STATE->lookupUCcode($token->getString); + return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); } + +sub lcToken { + my ($token) = @_; + my $code = $STATE->lookupLCcode($token->getString); + return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); } + +# Note that these are NOT expandable, even though the "return" tokens! +DefPrimitive('\uppercase GeneralText', sub { + my ($stomach, $tokens) = @_; + $stomach->getGullet->unread(map { ucToken($_) } $tokens->unlist); + return; }); + +DefPrimitive('\lowercase GeneralText', sub { + my ($stomach, $tokens) = @_; + $stomach->getGullet->unread(map { lcToken($_) } $tokens->unlist); + return; }); + +#====================================================================== +# Converting things to strings (tokens, really) +#---------------------------------------------------------------------- +# \number c produces the decimal equivalent of numbers. +# \romannumeral c converts a number to lowercase roman numerals. +# \string c converts a control sequence to characters. + +DefMacro('\number Number', sub { Explode($_[1]->valueOf); }); +DefMacro('\romannumeral Number', sub { roman($_[1]->valueOf); }); + +# Hmm... I wonder, should getString itself be dealing with escapechar? +sub escapechar { + my $code = LookupRegister('\escapechar')->valueOf; + return (($code >= 0) && ($code <= 255) ? chr($code) : ''); } + +# 1) Knuth, The TeXBook, page 40, paragraph 1, Chapter 7: How TEX Reads What You Type. +# suggests all characters except spaces are returned in category code Other, i.e. Explode() +DefMacro('\string Token', sub { + my $s = $_[1]->toString; + if ($s =~ s/^\\//) { + $s = escapechar() . $s; } + Explode($s); }); +#====================================================================== +# Character properties +#---------------------------------------------------------------------- +# \catcode iq holds the category code for a character. +# \lccode iq holds the lowercase value for a character. +# \sfcode iq holds the space factor value for a character. +# \uccode iq holds the uppercase value for a character. +DefRegister('\catcode Number', Number(0), + getter => sub { my $code = LookupCatcode(chr($_[0]->valueOf)); + Number(defined $code ? $code : CC_OTHER); }, + setter => sub { AssignCatcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); +# Not used anywhere (yet) +DefRegister('\sfcode Number', Number(0), + getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf)); + Number(defined $code ? $code : 0); }, + setter => sub { $STATE->assignSFcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); +DefRegister('\lccode Number', Number(0), + getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf)); + Number(defined $code ? $code : 0); }, + setter => sub { $STATE->assignLCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); +DefRegister('\uccode Number', Number(0), + getter => sub { my $code = $STATE->lookupUCcode(chr($_[0]->valueOf)); + Number(defined $code ? $code : 0); }, + setter => sub { $STATE->assignUCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); + +#====================================================================== +# Special character codes +#---------------------------------------------------------------------- +# \endlinechar pi is the character added to the end of input lines. +# \escapechar pi is the character used for category 0 characters when outputting control sequences. +# \newlinechar pi is the character which begins a new line of output. + +DefRegister('\endlinechar' => Number(ord("\r"))); +DefRegister('\escapechar' => Number(ord('\\'))); +DefRegister('\newlinechar' => Number(-1)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml new file mode 100644 index 000000000..6e466bab8 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml @@ -0,0 +1,231 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Debugging | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Debugging Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +DefConstructor('\lx@ERROR{}{}', "#2"); + +#====================================================================== +# running modes +#---------------------------------------------------------------------- +# \batchmode c acts like pressing Q in response to an error. +# \errorstopmode c switches to normal interaction for processing errors. +# \nonstopmode c acts like pressing R in response to an error. +# \scrollmode c acts like pressing S in response to an error. +# \pausing pi if positive, the program halts after every line is read from the input file and waits for a response from the user. + +# These are no-ops; Basically, LaTeXML runs in scrollmode +DefPrimitiveI('\errorstopmode', undef, undef); +DefPrimitiveI('\scrollmode', undef, undef); +DefPrimitiveI('\nonstopmode', undef, undef); +DefPrimitiveI('\batchmode', undef, undef); +DefRegister('\pausing' => Number(0)); + +#====================================================================== +# Messages +#---------------------------------------------------------------------- +# \message c writes an expanded token list on the terminal and to the log file. +# \errmessage c displays text on the terminal and interrupts the program. +# \errhelp pt is text displayed on the terminal if h is pressed after an \errmessage . +# \errorcontextlines pi is the number of lines to display on the terminal at an error. + +# Converts $tokens to a string in the fashion of \message and others: +# doubles #, converts to string; optionally adds spaces after control sequences +# in the spirit of the B Book, "show_token_list" routine, in 292. +# [This could be a $tokens->unpackParameters, but for the curious space treatment] +sub writableTokens { + my ($tokens) = @_; + my @tokens = $tokens->unlist; + @tokens = map { + my $cc = $$_[1]; + if ($cc == CC_CS) { ($_, T_SPACE); } + elsif ($cc == CC_SPACE) { (T_SPACE); } + elsif ($cc == CC_PARAM) { ($_, $_); } + elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); } + else { $_; } + } @tokens; + return UnTeX(Tokens(@tokens), 1); } + +DefPrimitive('\message{}', sub { + my ($stomach, $stuff) = @_; + NoteLog(writableTokens(Expand($stuff))); + return; }); + +DefRegister('\errhelp' => Tokens()); +DefPrimitive('\errmessage{}', sub { + my ($stomach, $stuff) = @_; + Note(ToString(Expand($stuff)) . ": " . ToString(Expand(Tokens(T_CS('\the'), T_CS('\errhelp'))))); + return; }); + +DefRegister('\errorcontextlines' => Number(5)); + +#====================================================================== +# meaning +#---------------------------------------------------------------------- +# \meaning c adds characters describing a token to the output stream. + +our @CATCODE_MEANING = ( + "the escape character", "begin-group character", + "end-group character", "math shift character", + "alignment tab character", "end-of-line character", + "macro parameter character", "superscript character", + "subscript character", "ignored character", + "blank space", "the letter", + "the character", "active character", + "comment character", "invalid character", + undef, "latexml marker character", + "macro parameter character"); + +# Not sure about this yet... +# NOTE: Lots of back-and-forth mangle with definition vs cs; don't do that! +DefMacro('\meaning Token', sub { + my ($gullet, $tok) = @_; + my $meaning = 'undefined'; + if (my $definition = ($tok->defined_as(T_ALIGN) ? $tok : LookupMeaning($tok))) { + my $type = ref $definition; + $type =~ s/^LaTeXML:://; + # Pre-step: We can't extract the bodies of definitions which are defined via Perl subroutines. + # So do the next best thing -- represent them as their tokens. + if ($type =~ /(primitive|conditional|constructor)$/i) { + $definition = $definition->getCSorAlias; + $type = ref $definition; + $type =~ s/^LaTeXML:://; + if (my $fontinfo = LookupValue('fontinfo_' . ToString($definition))) { + $meaning = 'select font ' . ($$fontinfo{fontname} || 'fontname'); + $meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at}; + $type = 'font'; } } + # The actual tests start here + if ($type =~ /token$/i) { + my $cc = $definition->getCatcode; + my $char = $definition->toString; + my $meaning_cc = $CATCODE_MEANING[$cc] || ''; + $meaning_cc .= ' ' if $meaning_cc; # append space separator if defined + $meaning = $meaning_cc . $char; } + elsif ($type =~ /register$/i) { + $meaning = $definition->getAddress; } + elsif ($type =~ /expandable$/i) { +# short-circuit some troublesome discrepancies with TeX, which end up macros on LaTeXML's end, but \meaning expects as primitives in the CTAN ecosystem. + my $cs = ToString($definition->getCSorAlias); + # These exceptions could be extended further, as we add more .sty/.cls support + return Explode($cs) if $cs =~ /^\\(?:(?:un)?expanded|detokenize)$/; + my $expansion = $definition->getExpansion; + my $ltxps = $definition->getParameters; + my $arg_index = 0; + my @spec_parts = (); + my @params = $ltxps ? $ltxps->getParameters : (); + my $p_trailer = ''; + + for my $param (@params) { + my $p_spec = $$param{spec}; + if ($p_spec eq 'RequireBrace') { + # tex's \meaning prints out the required braces for "\def\a#{}" variants + $p_trailer = '{'; + $p_spec = '{'; } + elsif ($p_spec eq 'UntilBrace') { # should only ever be used in the last argument? + $p_trailer = '{'; + $p_spec = "#" . (++$arg_index) . '{'; } + elsif ($p_spec =~ s/^Match://) { } # just match, don't increment arg index + elsif ($p_spec =~ s/^\w?Until(\w*)://) { # implied argument at this slot + $p_spec = "#" . (++$arg_index) . $p_spec; } + else { # regular parameter, increment + next if $$param{novalue}; # skip the latexml-only requirement params, but only here, since Match also have "novalue" set. + $p_spec = "#" . (++$arg_index); } + push @spec_parts, $p_spec; } + my $spec = join("", @spec_parts); + $spec =~ s/\{\}//g; + $spec =~ s/Token//g; + my $prefixes = join('', + ($definition->isProtected ? '\protected' : ()), + ($definition->isLong ? '\long' : ()), + ($definition->isOuter ? '\outer' : ()), + ); + my $expansion_str = ''; + if (ref $expansion eq 'LaTeXML::Core::Tokens') { + $expansion_str = writableTokens($expansion); } + else { + $expansion_str = ToString($expansion); } + $meaning = ($prefixes ? $prefixes . ' ' : '') . + "macro:$spec->$expansion_str$p_trailer"; } + elsif ($type =~ /chardef$/i) { # from \chardef or \mathchardef + my $prefix = ($$definition{mathglyph} ? '\mathchar' : '\char'); + $meaning = $prefix . '"' . $definition->valueOf->valueOf; } } + # One catch: make sure all \s in the meaning string are normalized to a simple space ' ' + $meaning =~ s/\s/ /g; + return Explode($meaning); }); + +#====================================================================== +# Showing internal things +#---------------------------------------------------------------------- + +# \show c writes a token's definition on the terminal and to the log file. +# \showbox c writes the contents of a box to the log file. +# \showlists c writes information about current lists to the log file. +# \showthe c writes a value on the terminal and to the log file and interrupts the program. +# \showboxbreadth pi is the maximum number of items per level written by \showbox and \showlists. +# \showboxdepth pi is the maximum level written by \showbox and \showlists. + +# Debugging aids; Ignored! +DefPrimitive('\show Token', sub { + my $stuff = Invocation(T_CS('\meaning'), $_[1]); + Note("> " . ($_[1][1] == CC_CS ? ToString($_[1]) . '=' : '') . writableTokens(Expand($stuff))); + Note($_[0]->getLocator->toString()); + return; }); +DefPrimitive('\showbox Number', sub { + my $n = $_[1]->valueOf; + my $stuff = LookupValue('box' . $n); + Debug("Box $n = " . ToString($stuff)); + undef; }); +DefPrimitive('\showlists', undef); +DefPrimitive('\showthe Token', undef); + +DefRegister('\showboxbreadth' => Number(5)); +DefRegister('\showboxdepth' => Number(3)); + +#====================================================================== +# Tracing +#---------------------------------------------------------------------- +# \tracingcommands pi if positive, writes commands to the log file. +# \tracinglostchars pi if positive, writes characters not in the current font to the log file . +# \tracingmacros pi if positive, writes to the log file when expanding macros and arguments . +# \tracingonline pi if positive, writes diagnostic output to the terminal as well as to the log file. +# \tracingoutput pi if positive, writes contents of shipped out boxes to the log file. +# \tracingpages pi if positive, writes the page-cost calculations to the log file. +# \tracingparagraphs pi if positive, writes a summary of the line-breaking calculations to the log file. +# \tracingrestores pi if positive, writes save-stack details to the log file. +# \tracingstats pi if positive, writes memory usage statistics to the log file. + +DefRegister('\tracingmacros', Number(0), + getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_MACROS); }, + setter => sub { my $p = (LookupValue('TRACING') || 0); + AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_MACROS : $p & ~TRACE_MACROS)); }); +DefRegister('\tracingcommands', Number(0), + getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_COMMANDS); }, + setter => sub { my $p = (LookupValue('TRACING') || 0); + AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_COMMANDS : $p & ~TRACE_COMMANDS)); }); + +DefRegister('\tracingonline' => Number(0)); +DefRegister('\tracingstats' => Number(0)); +DefRegister('\tracingparagraphs' => Number(0)); +DefRegister('\tracingpages' => Number(0)); +DefRegister('\tracingoutput' => Number(0)); +DefRegister('\tracinglostchars' => Number(1)); +DefRegister('\tracingrestores' => Number(0)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml b/lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml new file mode 100644 index 000000000..d910db78b --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_FileIO.pool.ltxml @@ -0,0 +1,232 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_FileIO | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +#use Unicode::Normalize; +use LaTeXML::Util::Pathname; +#use List::Util qw(min max); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# File I/O Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#====================================================================== +# Low-level input +#---------------------------------------------------------------------- +# \openin c opens an auxiliary file for reading. +# \closein c closes an auxiliary file opened for reading. +# \read c reads one or more lines from an auxiliary file. +# \endinput c stops input from a file at the end of the current line. +# \inputlineno iq holds the line number of the line last read in the current input file. + +# TeX I/O primitives +DefPrimitive('\openin Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { + my ($stomach, $port, $filename) = @_; + # possibly should close $port if it's already been opened? + $port = ToString($port); + $filename = ToString($filename); + # Rely on FindFile to enforce any access restrictions + # It's tempting to pout noltxml=>1 here, since who would want to read in an .ltxml file's perl? + # However, \openin is often used by low-level code to check for existence of a file + # when we SHOULD find an .ltxml version! + # Hopefully, if they get one, they won't actually try to read its content... + if (my $path = FindFile($filename)) { + my $mouth = LaTeXML::Core::Mouth->create($path, + content => LookupValue($path . '_contents')); + AssignValue('input_file:' . $port => $mouth, 'global'); } + return; }); + +DefPrimitive('\closein Number', sub { + my ($stomach, $port, $filename) = @_; + # close the mouth (if any) and clear the variable + $port = ToString($port); + if (my $mouth = LookupValue('input_file:' . $port)) { + $mouth->finish; + AssignValue('input_file:' . $port => undef, 'global'); } + return; }); + +DefPrimitive('\read Number SkipKeyword:to SkipSpaces Token', sub { + my ($stomach, $port, $token) = @_; + $port = ToString($port); + if (my $mouth = LookupValue('input_file:' . $port)) { + $stomach->bgroup; + AssignValue(PRESERVE_NEWLINES => 2); # Special EOL/EOF treatment for \read + AssignValue(INCLUDE_COMMENTS => 0); + my @tokens = (); + my ($t, $level) = (undef, 0); + while ($t = $mouth->readToken) { + my $cc = $t->getCatcode; + push(@tokens, $t) unless $cc == CC_MARKER; # End of line marker + $level++ if $cc == CC_BEGIN; + $level-- if $cc == CC_END; + last if !$level && $mouth->isEOL; } + $stomach->egroup; + DefMacroI($token, undef, Tokens(@tokens), nopackParameters => 1); } + return; }); + +# Note that TeX doesn't actually close the mouth; +# it just flushes it so that it will close the next time it's read! +DefMacroI('\endinput', undef, sub { $_[0]->flushMouth; }); + +DefRegister('\inputlineno', Number(0), + getter => sub { + my $locator = $STATE->getStomach->getGullet->getLocator(); + Number($locator ? $$locator{fromLine} : 0); }, + readonly => 1); + +#====================================================================== +# Low-level output +#---------------------------------------------------------------------- +# \openout c opens an auxiliary file for writing. +# \closeout c closes an auxiliary file opened for writing. +# \write c writes material to an auxiliary file. +# \immediate c performs the following output command without waiting for \shipout. + +# For output files, we'll write the data to a cached internal copy +# rather than to the actual file system. +DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { + my ($stomach, $port, $filename) = @_; + $port = ToString($port); + $filename = ToString($filename); + AssignValue('output_file:' . $port => $filename, 'global'); + AssignValue($filename . '_contents' => "", 'global'); + return; }); + +DefPrimitive('\closeout Number', sub { + my ($stomach, $port) = @_; + $port = ToString($port); + AssignValue('output_file:' . $port => undef, 'global'); + return; }); + +DefPrimitive('\write Number {}', sub { + my ($stomach, $port, $tokens) = @_; + $port = ToString($port); + if (my $filename = LookupValue('output_file:' . $port)) { + my $handle = $filename . '_contents'; + my $contents = LookupValue($handle); + AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); } + else { + Note(UnTeX(Expand($tokens))); } + return; }); + +# Since we don't paginate, we're effectively always "shipping out", +# so all operations are \immediate +DefPrimitive('\immediate', undef); + +#====================================================================== +# High-level input +#---------------------------------------------------------------------- +# \input c inserts a file at the current position in the source file. + +DefMacro('\input TeXFileName', sub { + my $filename = $_[1]; + my @t = $filename->unlist; + # If given a LaTeX-style argument, strip braces + if (@t && $t[0] && $t[0]->getCatcode == CC_BEGIN && $t[-1]->getCatcode == CC_END) { + $filename = Tokens(@t[1 .. $#t - 1]); + # and load LaTeX.pool if not already + if (!LookupValue('LaTeX.pool_loaded')) { + LoadPool("LaTeX"); } } + Input($filename, reloadable => 1); }); + +#====================================================================== +# Special output +#---------------------------------------------------------------------- +# \special c sends material to the dvi file for special processing. + +DefPrimitive('\special {}', sub { + my ($stomach, $arg) = @_; + my $special_str = ToString($arg); + # recognize one special graphics inclusion case + if ($special_str =~ /\bpsfile=(.+?)(?:\s|\})/) { + my $graphic = $1; + RequirePackage('graphicx', searchpaths_only => 1); + my @kv; + for my $prop (qw(voffset hoffset hscale vscale hsize vsize angle)) { + if ($special_str =~ /\b$prop=(.+?)(?:\s|\})/) { + push(@kv, T_OTHER(',')) if @kv; + push(@kv, T_OTHER($prop), T_OTHER("="), T_OTHER($1)); } } + @kv = (T_OTHER("["), @kv, T_OTHER("]")) if @kv; + $stomach->getGullet->unread( + T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); } + else { + Info('ignored', 'special', $stomach, 'Unrecognized TeX Special', $arg); } + return; }); + +# adapted from graphicx.sty.ltxml +DefKeyVal('SpecialPS', 'angle', ''); +DefKeyVal('SpecialPS', 'voffset', ''); +DefKeyVal('SpecialPS', 'hoffset', ''); +DefKeyVal('SpecialPS', 'hsize', ''); +DefKeyVal('SpecialPS', 'vsize', ''); +DefKeyVal('SpecialPS', 'hscale', ''); +DefKeyVal('SpecialPS', 'vscale', ''); +DefConstructor('\ltx@special@graphics OptionalKeyVals:SpecialPS Semiverbatim', + "", + sizer => \&image_graphicx_sizer, + properties => sub { + my ($stomach, $kv, $path) = @_; + $path = ToString($path); $path =~ s/^\s+//; $path =~ s/\s+$//; + $path =~ s/("+)(.+)\g1/$2/; + my $searchpaths = LookupValue('GRAPHICSPATHS'); + my @candidates = pathname_findall($path, types => ['*'], paths => $searchpaths); + if (my $base = LookupValue('SOURCEDIRECTORY')) { + @candidates = map { pathname_relative($_, $base) } @candidates; } + my $options = ''; + if ($kv) { # remap psfile options to includegraphics options: + if (my $hscale = $kv->getValue('hscale')) { + $hscale = $hscale && int(ToString($hscale)) / 100; + $options .= ',' if $options; + $options .= "xscale=$hscale"; } + if (my $vscale = $kv->getValue('vscale')) { + $vscale = $vscale && int(ToString($vscale)) / 100; + $options .= ',' if $options; + $options .= "yscale=$vscale"; } + if (my $hsize = $kv->getValue('hsize')) { + $hsize = ToString($hsize); + $options .= ',' if $options; + $options .= "width=$hsize"; } + if (my $vsize = $kv->getValue('vsize')) { + $vsize = ToString($vsize); + $options .= ',' if $options; + $options .= "height=$vsize"; } + if (my $angle = $kv->getValue('angle')) { + $angle = ToString($angle); + $options .= ',' if $options; + $options .= "angle=$angle"; } + my $voffset = $kv->getValue('voffset') || 0; + $voffset = $voffset && int(ToString($voffset)); + my $hoffset = $kv->getValue('hoffset') || 0; + $hoffset = $hoffset && int(ToString($hoffset)); + if ($voffset || $hoffset) { + my $left = -$hoffset; + my $bottom = -$voffset; + $options .= "," if $options; + $options .= "trim=$left $bottom 0 0,clip=true"; } } + (options => $options, path => $path, candidates => join(',', @candidates)); }, + mode => 'text'); +# Since these ultimately generate external resources, it can be useful to have a handle on them. +Tag('ltx:graphics', afterOpen => sub { GenerateID(@_, 'g'); }); + +#====================================================================== +# output processing +#---------------------------------------------------------------------- +# \shipout c sends the contents of a box to the dvi file. +# \output pt holds the token list used to typeset one page. + +# DefPrimitive('\shipout ?? +DefRegister('\output', Tokens()); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml new file mode 100644 index 000000000..4772129dd --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml @@ -0,0 +1,294 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Fonts | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fonts Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Font declaration +#---------------------------------------------------------------------- +# \font iq loads information about a font into TeX's memory. +# \fontname c returns the system file name for a font. +# \fontdimen iq holds font parameters. +# \nullfont iq is a predefined font with no characters. + +sub lookupFontinfo { + my ($token) = @_; + my $defn = LookupDefinition($token); + # return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); } + return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); } + +DefParameterType('FontToken', sub { + my ($gullet) = @_; + my $token = $gullet->readToken; + if ($token->toString =~ /^\\(text|script|scriptscript)font$/) { + my $type = $1; + if (my $fam = $gullet->readNumber) { + $token = LookupValue($type . 'font_' . $fam->valueOf); } } + elsif ($token->toString eq '\\font') { + $token = LookupValue('textfont_0'); } # ??? I assume shuld get current font? + $token; }); #? + +# This should eventually actually load the font metrics, +# and tie-in to the FontMetrics data used by Font. +DefPrimitive('\font SkipSpaces Token SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { + my ($stomach, $cs, $name) = @_; + my $gullet = $stomach->getGullet; + $name = ToString($name); + my ($at, $scaled); + if ($gullet->readKeyword('at')) { $at = $gullet->readDimension; } + if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; } + my %props = LaTeXML::Common::Font::decodeFontname($name, + $at && $at->ptValue, $scaled && $scaled->valueOf / 1000); + if (!keys %props) { # Failed? + Info('unexpected', $name, $stomach, "Unrecognized font name '$name'", + "Font switch macro " . ToString($cs) . " will have no effect"); } + else { + $props{fontname} = $name; } + my $f = ($at ? $at->divide(Dimension('1em'))->valueOf + : ($scaled ? $scaled->valueOf / 1000 + : 1)); + my $fontinfo = \%props; +##### $$fontinfo{data} = [map { $_->multiply($f); } + $$fontinfo{data} = [map { $_->multiply($f)->valueOf; } + Dimension(0), Dimension('0.5em'), Dimension(0), + Dimension(0), Dimension('1ex'), Dimension('1em')]; + $gullet->skipSpaces; + # Store the font info & metrics + AssignValue('fontinfo_' . ToString($cs) => $fontinfo); + # The font $cs should select the font + DefPrimitiveI($cs, undef, undef, font => $fontinfo); + return; }); + +DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); }); +# Access to the font parameters; Curiously, can be used as scratch arrays (eg LaTeX3) +DefRegister('\fontdimen Number FontToken' => Dimension(0), + getter => sub { + my ($p, $font) = @_; + my $info = lookupFontinfo($font); + $p = ToString($p); + my $data = $info && $$info{data}; +#### return ($data && $$data[$p - 1]) || Dimension(0); }, + return Dimension(($data && $$data[$p - 1]) || 0); }, + setter => sub { + my ($value, $scope, $p, $font) = @_; + my $info = lookupFontinfo($font); + $p = ToString($p); + if (my $data = $info && $$info{data}) { + my $l = scalar(@$data); + if ($l < $p) { + for (my $i = $l ; $i < $p ; $i++) { + $$data[$i] = 0; } } +#### $$data[$p - 1] = $value; } } + $$data[$p - 1] = $value->valueOf; } } +); + +# Not sure what this should be... +DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' }); + +#====================================================================== +# Italic correction +#---------------------------------------------------------------------- +# / (italic corr.) c inserts an italic correction. +DefPrimitiveI('\/', undef, sub { + Box("", undef, undef, T_CS('\/'), + isSpace => 1, name => 'italiccorr', width => Dimension('0em')); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Font encoding and FontMaps + +DefMacro('\fontencoding{}', '\@@@fontencoding{#1}'); + +DefPrimitive('\@@@fontencoding{}', sub { + my ($stomach, $encoding) = @_; + $encoding = ToString(Expand($encoding)); + if (LoadFontMap($encoding)) { + MergeFont(encoding => $encoding); } + else { + MergeFont(encoding => 'OT1'); } # Default to OT1 encoding if no map found + return; }); + +DefMacroI('\f@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); +DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); + +# Used for SemiVerbatim text +DeclareFontMap('ASCII', + [undef, undef, undef, undef, undef, undef, undef, undef, + undef, undef, undef, undef, undef, undef, undef, undef, + undef, undef, undef, undef, undef, undef, undef, undef, + undef, undef, undef, undef, undef, undef, undef, undef, + " ", '!', "\"", '#', '$', '%', '&', "'", + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '[', "\\", ']', "^", "_", + "`", 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', "{", "|", "}", "~", undef]); + +# Note that several entries are used for accents, and in practice will actually +# be used in something like an m:mover; thus they needn't (shouldn't?) be "small" +# There are also some questions about which choices are best +# grave & acute accents (entry 0x12 & 0x13) (often typed using 0x60 & 0x27) +# are probably best using U+60(grave accent) & U+B4(acute accent) +# but could be U+2035 (reversed prime) & U+2032 (prime). (particularly for math?) +# [we do use these for \prime, however!] +# or U+02CB (modifier letter grave accent) & U+02CA (modifier letter acute accent) +# Similarly, hat & tilde (entries 0x5E & 0x7E) +# typed using ^ 0x5E circumflex accent) & ~ 0x7E tilde +# are probably best just sticking with U+5E & U+7E +# but could be U+02C6 (modifier letter circumflex accent) U+02DC (small tilde) +# [Note that generally we're using codepoints characterized as "modifier letter" +# only when no other spacing point is available.] +DeclareFontMap('OT1', + ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", + "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{FB00}", "\x{FB01}", "\x{FB02}", "\x{FB03}", "\x{FB04}", + "\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}", + UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8), + UTF(0xA0) . "\x{0335}", '!', "\x{201D}", '#', '$', '%', '&', "\x{2019}", + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', UTF(0xA1), '=', UTF(0xBF), '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '[', "\x{201C}", ']', "^", "\x{02D9}", + "\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', "\x{2013}", "\x{2014}", "\x{02DD}", UTF(0x7E), UTF(0xA8)]); + +DeclareFontMap('OT1', + ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", + "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{2191}", "\x{2193}", "'", UTF(0xA1), UTF(0xBF), + "\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}", + UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8), + "\x{2423}", '!', "\"", '#', '$', '%', '&', "\x{2019}", + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', "<", '=', ">", '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '[', "\\", ']', "^", "_", + "\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', "{", "|", "}", "~", UTF(0xA8)], + family => 'typewriter'); + +DeclareFontMap('OML', + [ # \Gamma \Delta \Theta \Lambda \Xi \Pi \Sigma \Upsilon + "\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", + # \Phi \Psi \Omega alpha beta gamma delta epsilon + "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{03B1}", "\x{03B2}", "\x{03B3}", "\x{03B4}", "\x{03F5}", + # zeta eta theta iota kappa lambda mu nu + "\x{03B6}", "\x{03B7}", "\x{03B8}", "\x{03B9}", "\x{03BA}", "\x{03BB}", "\x{03BC}", "\x{03BD}", + # xi pi rho sigma tau upsilon phi chi + "\x{03BE}", "\x{03C0}", "\x{03C1}", "\x{03C3}", "\x{03C4}", "\x{03C5}", "\x{03D5}", "\x{03C7}", + # psi omega varepsilon vartheta varpi varrho varsigma varphi + "\x{03C8}", "\x{03C9}", "\x{03B5}", "\x{03D1}", "\x{03D6}", "\x{03F1}", "\x{03C2}", "\x{03C6}", + # l.harp.up l.harp.dn r.harp.up r.harp.dn lhook rhook rt.tri lf.tri + "\x{21BC}", "\x{21BD}", "\x{21C0}", "\x{21C1}", "\x{2E26}", "\x{2E27}", "\x{25B7}", "\x{25C1}", + # old style numerals! (no separate codepoints ?) + # 0 1 2 3 4 5 6 7 + '0', '1', '2', '3', '4', '5', '6', '7', + # 8 9 . , < / > star + '8', '9', '.', ',', UTF(0x3C), UTF(0x2F), UTF(0x3E), "\x{22C6}", + # partial A B C D E F G + "\x{2202}", 'A', 'B', 'C', 'D', 'E', 'F', 'G', + # H I J K L M N O + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + # P Q R S T U V W + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + # X Y Z flat natural sharp smile frown + 'X', 'Y', 'Z', "\x{266D}", "\x{266E}", "\x{266F}", "\x{2323}", "\x{2322}", + # ell a b c d e f g + "\x{2113}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', + # h i j k l m n o + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + # p q r s t u v w + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + # x y z dotless i dotless j weier-p arrow acc. inv.breve + 'x', 'y', 'z', "\x{0131}", "j", "\x{2118}", "\x{2192}", UTF(0xA0) . "\x{0311}"]); +DeclareFontMap('OMS', + [ #minus dot times ast divide diamond plus-minus minus-plus + "-", "\x{22C5}", UTF(0xD7), "\x{2217}", UTF(0xF7), "\x{22C4}", UTF(0xB1), "\x{2213}", + # oplus ominus otimes oslash odot bigcirc circ bullet + "\x{2295}", "\x{2296}", "\x{2297}", "\x{2298}", "\x{2299}", "\x{25CB}", "\x{2218}", "\x{2219}", + # asymp equiv subseteq supseteq leq geq preceq succeq + "\x{224D}", "\x{2261}", "\x{2286}", "\x{2287}", "\x{2264}", "\x{2265}", "\x{2AAF}", "\x{2AB0}", + # sim approx subset supset ll gg prec succ + "\x{223C}", "\x{2248}", "\x{2282}", "\x{2283}", "\x{226A}", "\x{226B}", "\x{227A}", "\x{227B}", + # leftarrow rightarrow uparrow downarrow leftrightar nearrow searrow simeq + "\x{2190}", "\x{2192}", "\x{2191}", "\x{2193}", "\x{2194}", "\x{2197}", "\x{2198}", "\x{2243}", + # Leftarrow Rightarrow Uparrow Downarrow Leftrightar nwarrow swarrow propto + "\x{21D0}", "\x{21D2}", "\x{21D1}", "\x{21D3}", "\x{21D4}", "\x{2196}", "\x{2199}", "\x{221D}", + # prime infty in ni bigtri.up bigtri.dn slash mapsto + "\x{2032}", "\x{221E}", "\x{2208}", "\x{220B}", "\x{25B3}", "\x{25BD}", "/", "\x{21A6}", + # forall exists not emptyset Re Im top bot + "\x{2200}", "\x{2203}", UTF(0xAC), "\x{2205}", "\x{211C}", "\x{2111}", "\x{22A4}", "\x{22A5}", + # aleph cal A cal B cal C cal D cal E cal F cal G + "\x{2135}", "\x{1D49C}", "\x{212C}", "\x{1D49E}", "\x{1D49F}", "\x{2130}", "\x{2131}", "\x{1D4A2}", + # cal H cal I cal J cal K cal L cal M cal N cal O + "\x{210B}", "\x{2110}", "\x{1D4A5}", "\x{1D4A6}", "\x{2112}", "\x{2133}", "\x{1D4A9}", "\x{1D4AA}", + # cal P cal Q cal R cal S cal T cal U cal V cal W +"\x{1D4AB}", "\x{1D4AC}", "\x{211B}", "\x{1D4AE}", "\x{1D4AF}", "\x{1D4B0}", "\x{1D4B1}", "\x{1D4B2}", + # cal X cal Y cal Z cup cap uplus wedge vee + "\x{1D4B3}", "\x{1D4B4}", "\x{1D4B5}", "\x{222A}", "\x{2229}", "\x{228C}", "\x{2227}", "\x{2228}", + # vdash dashv lfloor rfloor lceil rceil lbrace rbrace + "\x{22A2}", "\x{22A3}", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}", + # langle rangle | \| updownarrow Updownarrow backslash wr + "\x{27E8}", "\x{27E9}", "|", "\x{2225}", "\x{2195}", "\x{21D5}", UTF(0x5C), "\x{2240}", + # surd amalg nabla int sqcup sqcap sqsubseteq sqsupseteq + "\x{221A}", "\x{2210}", "\x{2207}", "\x{222B}", "\x{2294}", "\x{2293}", "\x{2291}", "\x{2292}", + # section dagger ddagger para clubsuit diam.suit heartsuit spadesuit + UTF(0xA7), "\x{2020}", "\x{2021}", UTF(0xB6), "\x{2663}", "\x{2662}", "\x{2661}", "\x{2660}"]); + +DeclareFontMap('OMX', + [ # ( ) [ ] lfloor rfloor lceil rceil + "(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", + #lbrace rbrace langle rangle | || / \ + "{", "}", "\x{27E8}", "\x{27E9}", "|", "\x{2225}", "/", UTF(0x5C), + "(", ")", "(", ")", "[", "]", "\x{230A}", "\x{230B}", + "\x{2308}", "\x{2309}", "{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C), + "(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", + "{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C), "/", UTF(0x5C), + # next two rows are just fragments + # l.up.paren r.up.paren l.up.brak r.up.brak l.bot.brak r.bot.brak l.brak.ext r.brak.ext + "\x{239B}", "\x{239E}", "\x{23A1}", "\x{23A4}", "\x{23A3}", "\x{23A6}", "\x{23A2}", "\x{23A5}", + # l.up.brace r.up.brace l.bot.brace r.bot.brace l.brace.mid r.brace.mid brace.ext v.arrow.ext + "\x{23A7}", "\x{23AB}", "\x{23A9}", "\x{23AD}", "\x{23A8}", "\x{23AC}", "\x{23AA}", "\x{23D0}", + # l.bot.paren r.bot.paren l.paren.ext r.paren.ext + "\x{239D}", "\x{23A0}", "\x{239C}", "\x{239F}", "\x{27E8}", "\x{27E9}", "\x{2294}", "\x{2294}", + "\x{222E}", "\x{222E}", "\x{2299}", "\x{2299}", "\x{2295}", "\x{2295}", "\x{2297}", "\x{2297}", + "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", + "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", + "\x{2210}", "\x{2210}", UTF(0x5E), UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), UTF(0x7E), + "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}", +# [missing rad frags] double arrow ext. + "\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", undef, undef, undef, + # [missing tips for horizontal curly braces] + "\x{2191}", "\x{2193}", undef, undef, undef, undef, "\x{21D1}", "\x{21D3}"]); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Glue.pool.ltxml b/lib/LaTeXML/Engine/TeX_Glue.pool.ltxml new file mode 100644 index 000000000..fbf41e941 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Glue.pool.ltxml @@ -0,0 +1,128 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Glue | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Glue Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Inserting, removing glue +#---------------------------------------------------------------------- +# \hskip c inserts horizontal glue in a horizontal or math list. +# \vskip c inserts vertical glue in a vertical list. +# \unskip c removes a glue item from the current list. + +# a candidate for use by \hskip, \hspace, etc... ? +sub DimensionToSpaces { + my ($dimen) = @_; + my $fs = LookupValue('font')->getSize; # 1 em + my $pt = $dimen->ptValue; + my $ems = $pt / $fs; + if ($ems < 0.01) { return; } + elsif ($ems < 0.17) { return "\x{2006}"; } # 6/em + elsif ($ems < 0.30) { return "\x{2005}"; } # 4/em + elsif ($ems < 0.40) { return "\x{2004}"; } # 3/em (same as nbsp?) + else { + my $n = int(($ems + 0.3) / 0.333); # 10pts per space...? + return (UTF(0xA0) x $n); } } + +# \hskip handled similarly to \kern +# \hskip can be ignored in certain situations... +DefConstructor('\hskip Glue', sub { + my ($document, $length, %props) = @_; + my $parent = $document->getNode; + # Debug("HSKIP ".ToString($length)." at ".$document->getNodeQName($parent)); + if ($document->getNodeQName($parent) eq 'svg:g') { + if (my $x = $length->pxValue) { + # HACK HACK HACK + my $transform = $parent->getAttribute('transform'); + $parent->setAttribute(transform => ($transform ? $transform . ' ' : '') . "translate($x,0)"); + } } + elsif (inSVG()) { + Warn('unexpected', 'kern', $_[0], "Lost hskip in SVG " . ToString($length)); } + + else { + # $document->openText(DimensionToSpaces($length), $props{font}); } }, + $document->absorb(DimensionToSpaces($length)); } }, + properties => sub { + my ($stomach, $length) = @_; + (width => $length, isSpace => 1); }); + +# If this is the right solution... +# then we also should put the desired spacing on a style attribute?!?!?! +DefConstructor('\vskip Glue', sub { + my ($document, $length) = @_; + $length = $length->ptValue; + if ($length > 10) { # Or what!?!?!?! + if ($document->isCloseable('ltx:para')) { + $document->closeElement('ltx:para'); } + elsif ($document->isOpenable('ltx:break')) { + $document->insertElement('ltx:break'); } } + return; }, + properties => sub { (height => $_[1], isSpace => 1, isVerticalSpace => 1, isBreak => 1); }); + +## Worrisome, but... +DefPrimitiveI('\unskip', undef, sub { + my ($stomach) = @_; + my $box; + while (($box = $LaTeXML::LIST[-1]) && IsEmpty($box)) { + pop(@LaTeXML::LIST); } + return; }); + +#====================================================================== +# Horizontal skips +#---------------------------------------------------------------------- +# \hfil d inserts first order infinitely stretchable horizontal glue in a horizontal or math list. +# \hfill d inserts second order infinitely stretchable horizontal glue in a horizontal or math list. +# \hfilneg d cancels the stretchability of \hfil. +# \hss d inserts infinitely stretchable and shrinkable horizontal glue in a horizontal or math list. + +DefPrimitiveI('\hss', undef, undef); +DefPrimitiveI('\hfilneg', undef, undef); + +DefPrimitiveI('\hfil', undef, sub { + Box(' ', undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); }); +### Box("\x{200B}", undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); }); +### Box("\x{200A}", undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); }); +DefPrimitiveI('\hfill', undef, sub { + Box(' ', undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); }); +#### Box("\x{200B}", undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); }); +### Box("\x{200A}", undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); }); + +#====================================================================== +# Vertical skips +#---------------------------------------------------------------------- +# \vfil d inserts first order infinitely stretchable vertical glue in a vertical list. +# \vfill d inserts second order infinitely stretchable vertical glue in a vertical list. +# \vfilneg d cancels the stretchability of \vfil. +# \vss d insert infinitely stretchable and shrinkable vertical glue in a vertical list. + +# Stuff to ignore for now... +DefPrimitiveI('\vfil', undef, undef); +DefPrimitiveI('\vfill', undef, undef); +DefPrimitiveI('\vss', undef, undef); +DefPrimitiveI('\vfilneg', undef, undef); + +#====================================================================== +# Lastskip +#---------------------------------------------------------------------- +# \lastskip iq is 0.0 pt or the last glue or muglue on the current list. + +DefRegister('\lastskip' => Glue(0), readonly => 1); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml b/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml new file mode 100644 index 000000000..f89c7b0dd --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml @@ -0,0 +1,75 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Hyphenation | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Hyphenation Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# explicit hyphenation +#---------------------------------------------------------------------- +# - (discretionary hyphen) d inserts a discretionary hyphen. +# \discretionary c specifies a discretionary break in a paragraph. +DefPrimitiveI('\-', undef, undef); +DefMacro('\discretionary{}{}{}', '#3'); # No hyphenation here! + +#====================================================================== +# hyphenation tables +#---------------------------------------------------------------------- +# \hyphenation c adds words to the hyphenation exception dictionary for the current language. +# \patterns c is used in INITEX to add patterns to the pattern dictionary for the current language. + +# Stub definitions ??? +DefMacro('\hyphenation GeneralText', Tokens()); +DefMacro('\patterns{}', Tokens()); + +#====================================================================== +# language choice +#---------------------------------------------------------------------- +# \setlanguage c inserts a language whatsit in restricted horizontal mode. +# \language pi selects a language to use with hyphenation and \patterns. + +DefRegister('\language' => Number(0)); +DefPrimitive('\setlanguage Number', undef); + +#====================================================================== +# codepoints used for hyphenation +#---------------------------------------------------------------------- +# \hyphenchar iq holds the current hyphen character used with hyphenation. +# \defaulthyphenchar pi is the \hyphenchar value to use when a new font is loaded. +# \lefthyphenmin pi is the minimum number of characters that must appear before the first hyphen in an hyphenated word. +# \righthyphenmin pi is the minimum number of characters that must appear after the last hyphen in an hyphenated word. +# \uchyph pi prevents hyphenation of uppercase words unless this is positive. + +DefRegister('\hyphenchar FontToken' => Number(ord('-')), + getter => sub { + my ($font) = @_; + my $info = lookupFontinfo($font); + return ($info && $$info{hyphenchar}) || Number(ord('-')); }, + setter => sub { + my ($value, $scope, $font) = @_; + if (my $info = lookupFontinfo($font)) { + $$info{hyphenchar} = $value; } } +); + +DefRegister('\defaulthyphenchar' => Number(ord('-'))); +DefRegister('\lefthyphenmin' => Number(0)); +DefRegister('\righthyphenmin' => Number(0)); +DefRegister('\uchyph' => Number(1)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Inserts.pool.ltxml b/lib/LaTeXML/Engine/TeX_Inserts.pool.ltxml new file mode 100644 index 000000000..85f0f7ecc --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Inserts.pool.ltxml @@ -0,0 +1,60 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Inserts | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Inserts Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Inserting material +#---------------------------------------------------------------------- +# \insert c places material into an insertions class. +# \insert<8bit>{} +DefPrimitive('\insert Number', undef); # Just let the insertion get processed(?) + +#====================================================================== +# Splitting a box +#---------------------------------------------------------------------- +# \vsplit c removes a specified amount of material from a box register . +# \splitbotmark c is the mark text of the last mark in the most recent \vsplit operation . +# \splitfirstmark c is the mark text of the first mark in the most recent \vsplit operation . + +DefPrimitive('\vsplit Number Match:to Dimension', sub { + # analog to \box for now. + my $box = 'box' . $_[1]->valueOf; + my $stuff = LookupValue($box); + adjustBoxColor($stuff); + ($stuff ? $stuff : List()); }); + +DefMacroI('\splitfirstmark', undef, Tokens()); +DefMacroI('\splitbotmark', undef, Tokens()); + +#====================================================================== +# Insertion parameters +#---------------------------------------------------------------------- +# \insertpenalties iq is a quantity used by TeX in two different ways. +# \splitmaxdepth pd is the maximum depth of boxes created by \vsplit. +# \splittopskip pg is special glue placed inside the box created by \vsplit. +# \holdinginserts pi is positive if insertions should remain dormant when \output is called. + +DefRegister('\insertpenalties' => Number(0)); +DefRegister('\splitmaxdepth' => Dimension('16383.99999pt')); +DefRegister('\splittopskip' => Glue('10pt')); +DefRegister('\holdinginserts' => Number(0)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Job.pool.ltxml b/lib/LaTeXML/Engine/TeX_Job.pool.ltxml new file mode 100644 index 000000000..99b178dab --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Job.pool.ltxml @@ -0,0 +1,154 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Job | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Job Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# The current Job +#---------------------------------------------------------------------- +# \jobname c is the underlying file name for a job. +# \time pi holds the current time in minutes after midnight (0-1439). +# \day pi holds the current day of the month (1-31). +# \month pi holds the current month of the year (1-12). +# \year pi holds the current year (e.g., 2000). +# \mag pi holds the magnification ratio times 1000. + +DefMacroI('\jobname', undef, Tokens()); # Set to the filename by initialization +DefRegister('\time' => Number(0)); +DefRegister('\day' => Number(0)); +DefRegister('\month' => Number(0)); +DefRegister('\year' => Number(0)); +DefRegister('\mag' => Number(1000)); + +# This may mess up Daemon state? +{ my ($sec, $min, $hour, $mday, $mon, $year) = defined $ENV{SOURCE_DATE_EPOCH} ? gmtime($ENV{SOURCE_DATE_EPOCH}) : localtime(); + AssignValue('\day' => Number($mday), 'global'); + AssignValue('\month' => Number($mon + 1), 'global'); + AssignValue('\year' => Number(1900 + $year), 'global'); + AssignValue('\time' => Number(60 * $hour + $min), 'global'); } + +our @MonthNames = (qw( January February March April May June + July August September October November December)); + +# Return a string for today's date. +sub today { + return $MonthNames[LookupValue('\month')->valueOf - 1] + . " " . LookupValue('\day')->valueOf + . ', ' . LookupValue('\year')->valueOf; } + +#====================================================================== +# Random Job related things +#---------------------------------------------------------------------- +# \end c terminates the current job. +# \everyjob pt holds tokens which are inserted at the start of every job. +# \deadcycles iq is the number of times \output was called since the last \shipout. +# \maxdeadcycles pi is the maximum allowed value of \deadcycles before an error is generated. + +DefPrimitiveI('\end', undef, sub { $_[0]->getGullet->flush; return; }); +DefRegister('\everyjob' => Tokens()); +DefRegister('\deadcycles' => Number(0)); +DefRegister('\maxdeadcycles' => Number(0)); + +#====================================================================== +# Dumping +#---------------------------------------------------------------------- +# \dump c outputs a format file in INITEX; otherwise it is equivalent to \end. + +DefMacro('\dump', sub { + Warn('unexpected', 'dump', $_[0], "Do not know how to \\dump yet, sorry"); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +####$LaTeXML::DEBUG{compiled} = 1 unless $LaTeXML::DEBUG{compiling} || $LaTeXML::DEBUG{nocompiled}; +sub loadDump { + my ($file) = @_; + my $stage = "Loading compiled $file"; + ProgressSpinup($stage); + $file = ToString($file); + my ($dir, $name, $type) = pathname_split($file); + my $path = '/tmp/' . $name . '.' . $type . '.ltxmlc'; + Debug("Loading $file from $path"); + # DDS version + our $FREEZE = undef; + my $istage = "reading"; + ProgressSpinup($istage); + my $result = do($path); + ProgressSpindown($istage); + if (!$result) { Debug("Load failed with $@"); } + my $nsym = 0; + my $nlet = 0; + Debug("Got $FREEZE"); + $istage = "installing"; + ProgressSpinup($istage); + + foreach my $table_name (sort keys %$FREEZE) { + my $data = $$FREEZE{$table_name}; + foreach my $key (sort keys %$data) { + # Risky? And Risky if NOT!!! (loops) + if (defined $$STATE{$table_name}{$key}[0]) { # SKIP if already defined (by binding?) + # Debug("SKIPPING latex.ltx redefinition of $key in $table_name"); + next; } + my $value = $$data{$key}; + next if ($table_name eq 'meaning') && !ref $value; + $nsym++; + LaTeXML::Core::State::assign_internal($STATE, $table_name, $key, $value, 'global'); } } + ProgressSpindown($istage); + $istage = "\\lets"; + ProgressSpinup($istage); + # Deferred lookup of \let symbols + foreach my $table_name (qw(meaning)) { + my $data = $$FREEZE{$table_name}; + foreach my $key (sort keys %$data) { + my $value = $$data{$key}; + next if !$value || ref $value; + $nsym++; + $nlet++; + my $truevalue = $STATE->lookupMeaning(T_CS($value)); + Debug("Missing binding '$key' => '$value'") unless $truevalue; + LaTeXML::Core::State::assign_internal($STATE, $table_name, $key, $truevalue, 'global'); } } + ProgressSpindown($istage); + Debug("Retrieved $nsym ($nlet \\lets) from $path!"); + ProgressSpindown($stage); + return; } + +sub LOAD_LATEX { + LoadPool('eTeX'); # unless.... ? + LoadPool('pdfTeX'); # unless.... ? + if (!$LaTeXML::DEBUG{compiling} + && $LaTeXML::DEBUG{compiled}) { + # Try this after (most) primitives & parameter types have been defined + Let('\@@input', '\input'); # Save TeX's version. + DefMacro('\try@load@fontshape', '', locked => 1); + DefMacro('\define@newfont', '', locked => 1); + ##loadCompiled('latex.ltx'); + loadDump('latex.ltx'); + # Then restore some things (not lockable?) + # Some are code compatibility issues, some are gratuitous test case differences + $STATE->assignValue(font => LaTeXML::Common::Font->textDefault(), 'global'); + $STATE->assignValue(mathfont => LaTeXML::Common::Font->mathDefault(), 'global'); + DefMacroI('\f@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); + DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); + + DefMacro('\hline', '\noalign{\@@alignment@hline}'); + DefMacroI('\ldots', undef, '\lx@ldots'); # for tests? + + Let('\par', '\lx@normal@par'); + # LoadPool('LaTeX'); + return; } } +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Kern.pool.ltxml b/lib/LaTeXML/Engine/TeX_Kern.pool.ltxml new file mode 100644 index 000000000..b25a16739 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Kern.pool.ltxml @@ -0,0 +1,100 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Kern | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Kern Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Basic kerning +#---------------------------------------------------------------------- +# \kern c adds a kern item to the current list. +# \unkern c removes a kern from the current list. +# \lastkern iq is 0.0 pt or the last kern on the current list. + +# \kern is heavily used by xy. +# Completely HACK version for the moment +# Note that \kern should add vertical spacing in vertical modes! +DefConstructor('\kern Dimension', sub { + my ($document, $length) = @_; + my $parent = $document->getNode; + if ($document->getNodeQName($parent) eq 'svg:g') { + if (my $x = $length->pxValue) { + # HACK HACK HACK + my $transform = $parent->getAttribute('transform'); + $parent->setAttribute(transform => ($transform ? $transform . ' ' : '') . "translate($x,0)"); + } } + elsif (inSVG()) { + Warn('unexpected', 'kern', $_[0], "Lost kern in SVG " . ToString($length)); } +}); +DefPrimitiveI('\unkern', undef, undef); +DefRegister('\lastkern' => Dimension(0), readonly => 1); + +#====================================================================== +# Moving Vertically +#---------------------------------------------------------------------- +# \raise c shifts a box up and appends it to the current horizontal or math list. +# \lower c shifts a box down and appends it to the current horizontal or math list. +# \lower +# \raise +# But apparently must really explicitly be an \hbox, \vbox or \vtop (?) +# OR something that expands into one!! +sub raisedSizer { + my ($box, $y) = @_; + my ($w, $h, $d) = $box->getSize; + my $z = Dimension(0); + $h = $h->add($y)->larger($z); + $d = $d->subtract($y)->larger($z); + return ($w, $h, $d); } + +DefConstructor('\lower Dimension MoveableBox', + "?&inSVG()(#2)" + . "(#2)", + sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)->negate); }, + afterDigest => sub { + my $y = $_[1]->getArg(1)->multiply(-1); + my $ypx = $y->pxValue; + my $transform = ($ypx ? "translate(0,$ypx)" : undef); + $_[1]->setProperties(y => $y, transform => $transform); }); + +DefConstructor('\raise Dimension MoveableBox', + "?&inSVG()(#2)" + . "(#2)", + sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)); }, + afterDigest => sub { + my $y = $_[1]->getArg(1); + my $ypx = $y->pxValue; + my $transform = ($ypx ? "translate(0,$ypx)" : undef); + $_[1]->setProperties(y => $y, transform => $transform); }); + +#====================================================================== +# Moving Horizontally +#---------------------------------------------------------------------- +# \moveleft c shifts a box left and appends it to the current vertical list. +# \moveright c shifts a box right and appends it to the current vertical list. +# \moveleft, \moveright +DefConstructor('\moveleft Dimension MoveableBox', + "#2", + afterDigest => sub { + $_[1]->setProperty(x => $_[1]->getArg(1)->multiply(-1)); }); +DefConstructor('\moveright Dimension MoveableBox', + "#2", + afterDigest => sub { + $_[1]->setProperty(x => $_[1]->getArg(1)); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Logic.pool.ltxml b/lib/LaTeXML/Engine/TeX_Logic.pool.ltxml new file mode 100644 index 000000000..ad8415008 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Logic.pool.ltxml @@ -0,0 +1,139 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Logic | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Logic Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#====================================================================== +# Basic logic +#---------------------------------------------------------------------- +# \iftrue c is a conditional which is always true. +# \iffalse c is a conditional which is always false. +# \else c begins the false part of a conditional. +# \fi c is the concluding command of a conditional. +# \or c separates cases in an \ifcase conditional. + +DefConditionalI('\iftrue', undef, sub { 1; }); +DefConditionalI('\iffalse', undef, sub { 0; }); +DefConditional('\else', undef); # BUILT-IN to Definition +DefConditional('\or', undef); # BUILT-IN to Definition +DefConditional('\fi', undef); # BUILT-IN to Definition + +#====================================================================== +# Token testing +#---------------------------------------------------------------------- +# \if c tests if two tokens have the same character codes (i.e., values 0-256). +# \ifx c tests if two tokens are the same. +# \ifcat c tests if two tokens have the same category codes (i.e., values 0-16). +DefParameterType('ExpandedIfToken', sub { + my ($gullet) = @_; + my $token = $gullet->readXToken(0, 1); + if (!$token) { + Error('expected', 'ExpandedIfToken', $gullet, + "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); + $token = T_CS('\@empty'); } + return $token; }); + +DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; }); +DefConditional('\ifx Token Token', sub { XEquals($_[1], $_[2]); }); +DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; }); + +#====================================================================== +# Number testing +#---------------------------------------------------------------------- +# \ifnum c compares two integers. +# \ifodd c tests for an odd integer. +# \ifcase c begins a multi-case conditional. + +sub compare { + my ($u, $rel, $v) = @_; + $u = $u->valueOf if ref $u; + $v = $v->valueOf if ref $v; + if ($rel->equals(T_OTHER('<')) || $rel->equals(T_CS('\@@<'))) { + return $u < $v; } + elsif ($rel->equals(T_OTHER('='))) { + return $u == $v; } + elsif ($rel->equals(T_OTHER('>')) || $rel->equals(T_CS('\@@>'))) { + return $u > $v; } + else { + Error('expected', '', $STATE->getStomach->getGullet, + "Expected a relational token for comparision", "Got " . Stringify($rel)); + return; } } + +DefConditional('\ifnum Number Token Number', sub { compare($_[1], $_[2], $_[3]); }); +DefConditional('\ifodd Number', sub { $_[1]->valueOf % 2; }); +DefConditional('\ifcase Number', undef); # BUILT-IN to Definition +#====================================================================== +# Dimension testing +#---------------------------------------------------------------------- +# \ifdim c compares two dimensions. +DefConditional('\ifdim Dimension Token Dimension', sub { compare($_[1], $_[2], $_[3]); }); + +#====================================================================== +# Box testing +#---------------------------------------------------------------------- +# \ifhbox c is true if a box register contains an \hbox. +# \ifvbox c is true if a box register contains a \vbox. +# \ifvoid c is true if a box register is void. + +# Kinda rough: We don't really keep track of modes as carefully as TeX does. +# We'll assume that a box is horizontal if there's anything at all, +# but it's not a vbox (!?!?) +sub classify_box { + my ($boxnum) = @_; + my $box = LookupValue('box' . $boxnum->valueOf); + if (!$box) { + return ''; } + elsif ($box->isa('LaTeXML::Core::Whatsit') && ($box->getDefinition eq LookupDefinition(T_CS('\vbox')))) { + return 'vbox'; } + else { + return 'hbox'; } } + +DefConditional('\ifvoid Number', sub { !classify_box($_[1]); }); +DefConditional('\ifhbox Number', sub { classify_box($_[1]) eq 'hbox'; }); +DefConditional('\ifvbox Number', sub { classify_box($_[1]) eq 'vbox'; }); + +#====================================================================== +# Mode testing +#---------------------------------------------------------------------- +# \ifhmode c is true if TeX is in horizontal or restricted horizontal mode. +# \ifinner c is true if TeX is in internal vertical, restricted horizontal, or nondisplay math mode. +# \ifmmode c is true if TeX is in math or display math mode. +# \ifvmode c is true if TeX is in vertical or internal vertical mode. + +# NOTE: We don't KNOW if we're in vertical, horizontal or inner mode!!!!!!! +DefConditionalI('\ifvmode', undef, sub { 0; }); +DefConditionalI('\ifhmode', undef, sub { 0; }); +DefConditionalI('\ifinner', undef, sub { 0; }); + +DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); }); + +#====================================================================== +# I/O testing +#---------------------------------------------------------------------- +# \ifeof c tests for the end of a file . + +DefConditional('\ifeof Number', sub { + my ($gullet, $port) = @_; + $port = ToString($port); + if (my $mouth = LookupValue('input_file:' . $port)) { + return $$mouth{at_eof}; } + else { + return 1; } }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml b/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml new file mode 100644 index 000000000..04da95db0 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml @@ -0,0 +1,282 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Macro | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Macro Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Bosics +#---------------------------------------------------------------------- +# \begingroup c starts a group that must be ended by \endgroup. +# \endgroup c ends a group that was begun by \begingroup. +# \relax c is a control sequence which typesets nothing. +# \afterassignment c saves a token and inserts it after the next assignment. +# \aftergroup c saves a token and inserts it after the current group is complete. + +DefPrimitive('\begingroup', sub { $_[0]->begingroup; }); +DefPrimitive('\endgroup', sub { $_[0]->endgroup; }); + +# This makes \relax disappear completely after digestion +# (which seems most TeX like). +DefPrimitive('\relax', sub { (); }); +### However, this keeps a box, so it can appear in UnTeX +### DefPrimitive('\relax',undef); +## But if you do that, you've got to watch out since it usually +### shouldn't be a box; See the isRelax code in handleScripts, below + +# NON-STANDARD: Internal token produced by Gullet in response to \dont_expand; +# Acts like \relax, but isn't equal to it. +DefPrimitiveI('\special_relax', undef, sub { (); }); + +# \afterassignment saves ONE token (globally!) to execute after the next assignment +DefPrimitive('\afterassignment Token', sub { AssignValue(afterAssignment => $_[1], 'global'); }); +# \aftergroup saves ALL tokens (from repeated calls) to be executed IN ORDER after the next egroup or } +DefPrimitive('\aftergroup Token', sub { PushValue(afterGroup => $_[1]); }); + +#====================================================================== +# CSName +#---------------------------------------------------------------------- +# \csname c forms a control sequence name from the characters making up a collection of tokens. +# \endcsname c is used with \csname to make a control sequence name. + +DefParameterType('CSName', sub { $_[0]->readCSName; }); + +DefMacro('\csname CSName', sub { + my ($gullet, $token) = @_; + $STATE->assignMeaning($token, $STATE->lookupMeaning(T_CS('\relax'))) unless defined LookupMeaning($token); + $token; }); + +DefPrimitive('\endcsname', sub { + my ($stomach) = @_; + Error('unexpected', '\endcsname', $_[0], "Extra \\endcsname", + $stomach->getGullet->showUnexpected); + return; }); + +#====================================================================== +# Definition flags +#---------------------------------------------------------------------- +# \global c is an assignment prefix which makes the assignment transcend its group. +# \long c is a prefix for definitions which require multi-paragraph arguments. +# \outer c is a prefix for a definition which restricts where the definition may be used. +# \globaldefs pi if positive, all assignments are global; if negative, \global is ignored. + +# See Stomach.pm & Stomach.pm +DefPrimitiveI('\global', undef, sub { $STATE->setPrefix('global'); return; }, isPrefix => 1); +DefPrimitiveI('\long', undef, sub { $STATE->setPrefix('long'); return; }, isPrefix => 1); +DefPrimitiveI('\outer', undef, sub { $STATE->setPrefix('outer'); return; }, isPrefix => 1); + +DefRegister('\globaldefs' => Number(0)); + +#====================================================================== +# Definitions +#---------------------------------------------------------------------- +# \def c defines a macro. +# \edef c is similar to \def, except control sequences in the replacement +# text are expanded when the definition is made. +# \gdef d is equivalent to `\global\def'. +# \xdef d is equivalent to `\global\edef'. + +sub parseDefParameters { + my ($cs, $params) = @_; + my @tokens = $params->packParameters->unlist; + # Now, recognize parameters and delimiters. + my @params = (); + my $n = 0; + while (@tokens) { + my $t = shift(@tokens); + my $cc = $$t[1]; + if ($cc == CC_PARAM || $cc == CC_ARG) { + if ($cc == CC_PARAM) { + if (!@tokens) { # Special case: lone # NOT following a numbered parameter + # Note that we require a { to appear next, but do NOT read it! + push(@params, LaTeXML::Core::Parameter->new('RequireBrace', 'RequireBrace')); + last; } + else { + $n++; $t = shift(@tokens); } } + else { # CC_ARG case, keep looking at this token + $n++; } + Fatal('expected', "#$n", $STATE->getStomach, + "Parameters for '" . ToString($cs) . "' not in order in " . ToString($params)) + unless (defined $t) && ($n == int($$t[0])); + # Check for delimiting text following the parameter #n + my @delim = (); + my $pc = -1; + INNER_DELIM: while (@tokens) { + my $inner_cc = $tokens[0]->getCatcode; + last INNER_DELIM if $inner_cc == CC_PARAM || $inner_cc == CC_ARG; + my $d = shift(@tokens); + push(@delim, $d) unless $pc == CC_SPACE && $inner_cc == CC_SPACE; # BUT collapse whitespace! + $pc = $inner_cc; } + # Found text that marks the end of the parameter + if (@delim) { + my $expected = Tokens(@delim); + push(@params, LaTeXML::Core::Parameter->new('Until', + 'Until:' . ToString($expected), + extra => [$expected])); } + # Special case: trailing sole # => delimited by next opening brace. + elsif ((scalar(@tokens) == 1) && ($tokens[0]->getCatcode == CC_PARAM)) { + shift(@tokens); + push(@params, LaTeXML::Core::Parameter->new('UntilBrace', 'UntilBrace')); } + # Nothing? Just a plain parameter. + else { + push(@params, LaTeXML::Core::Parameter->new('Plain', '{}')); } } + else { + # Initial delimiting text is required. + my @lit = ($t); + my $lit_cc; + while (@tokens && ($lit_cc = $tokens[0]->getCatcode) && + ($lit_cc != CC_PARAM && $lit_cc != CC_ARG)) { + push(@lit, shift(@tokens)); } + my $expected = Tokens(@lit); + push(@params, LaTeXML::Core::Parameter->new('Match', + 'Match:' . ToString($expected), + extra => [$expected], + novalue => 1)); } + } + return (@params ? LaTeXML::Core::Parameters->new(@params) : undef); } + +sub do_def { + my ($globally, $gullet, $cs, $params, $body) = @_; + if (!$cs) { + Error('expected', 'Token', $gullet, "Expected definition token"); + return; } + elsif (!$params) { + Error('misdefined', $cs, $gullet, "Expected definition parameter list"); + return; } + $params = parseDefParameters($cs, $params); + # noprep=>1 : leave preparing the ##, #1-#9 tokens to the Def parameter types + # to avoid carrying around the masks around and keep core code simple + $STATE->installDefinition(LaTeXML::Core::Definition::Expandable->new($cs, $params, $body, + nopackParameters => 1), + ($globally ? 'global' : undef)); + AfterAssignment(); + return; } + +DefPrimitive('\def SkipSpaces Token UntilBrace DefPlain', sub { do_def(0, @_); }, locked => 1); +DefPrimitive('\gdef SkipSpaces Token UntilBrace DefPlain', sub { do_def(1, @_); }, locked => 1); +DefPrimitive('\edef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(0, @_); }, locked => 1); +DefPrimitive('\xdef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(1, @_); }, locked => 1); + +#====================================================================== +# Copying definitions +#---------------------------------------------------------------------- +# \let c gives a control sequence a token's current meaning. +# \futurelet c ` ' is equivalent to `\let = '. +DefPrimitive('\let SkipSpaces Token SkipSpaces SkipMatch:= Skip1Space Token', sub { + my ($stomach, $token1, $token2) = @_; + Let($token1, $token2); + return; }); + +DefPrimitive('\futurelet Token Token Token', sub { + my ($stomach, $cs, $token1, $token2) = @_; + $stomach->getGullet->unread($token1, $token2); # NOT expandable, but puts tokens back + Let($cs, $token2); + return; }); + +#====================================================================== +# Expansion control +#---------------------------------------------------------------------- +# \expandafter c `' is equivalent to ` expansion of '. +# \noexpand c prevents the expansion of the following token. + +DefMacro('\expandafter Token Token', sub { + no warnings 'recursion'; + my ($gullet, $tok, $xtok) = @_; + my $defn; + if (defined($defn = $STATE->lookupExpandable($xtok))) { + my @x = (); + { + local $LaTeXML::CURRENT_TOKEN = $xtok; + @x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY! + } + ($tok, @x); } + elsif (!$STATE->lookupMeaning($xtok)) { + # Undefined token is an error, as expansion is expected. + # BUT The unknown token is NOT consumed, (see TeX B book, item 367) + # since probably in a real TeX run it would have been defined. + $STATE->generateErrorStub($gullet, $xtok); + ($tok, $xtok); } + else { + ($tok, $xtok); } }); + +use constant T_expandafter => T_CS('\expandafter'); +DefMacro('\expandafter Token Token', sub { + no warnings 'recursion'; + my ($gullet, $tok, $xtok) = @_; + my $defn; + my @skipped = ($tok); + while ($xtok->defined_as(T_expandafter)) { + push(@skipped, $gullet->readToken); + $xtok = $gullet->readToken; } + if (defined($defn = $STATE->lookupExpandable($xtok))) { + my @x = (); + { + local $LaTeXML::CURRENT_TOKEN = $xtok; + @x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY! + } + (@skipped, @x); } + elsif (!$STATE->lookupMeaning($xtok)) { + # Undefined token is an error, as expansion is expected. + # BUT The unknown token is NOT consumed, (see TeX B book, item 367) + # since probably in a real TeX run it would have been defined. + $STATE->generateErrorStub($gullet, $xtok); + (@skipped, $xtok); } + else { + (@skipped, $xtok); } }); + +# If next token is expandable, prefix it with the internal marker \dont_expand +# That token is never defined, explicitly handled in Gullet & should never escape the Gullet +DefMacroI('\noexpand', undef, sub { + my $token = $_[0]->readToken; + # Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed + return ($token + ? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token) + ? (T_CS('\dont_expand'), $token) + : $token) + : ()); }); + +# NON-STANDARD: +DefPrimitiveI('\dont_expand', undef, sub { + Error('misdefined', '\dont_expand', $_[0], + "The token \\dont_expand should never reach Stomach!"); }); + +#====================================================================== +# \the +#---------------------------------------------------------------------- +# \the c returns character tokens for an internal quantity's or parameter's current value. + +# \the +DefMacro('\the Register', sub { + my ($gullet, $variable) = @_; + return () unless $variable; + my ($defn, @args) = @$variable; + if (!$defn || $defn eq 'missing') { + Error('expected', "", $gullet, "a register was expected to be here"); return (); } + my $type = $defn->isRegister; + if (!$type) { + my $cs = ToString($defn->getCS); + if ($cs eq '\font') { # what to do here? + return T_CS('\tenrm'); } + Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); } + my $value = $defn->valueOf(@args); + ## In all cases, these should be OTHER, except for space. (!?) + my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value))); + return @tokens; }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Marks.pool.ltxml b/lib/LaTeXML/Engine/TeX_Marks.pool.ltxml new file mode 100644 index 000000000..978221d83 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Marks.pool.ltxml @@ -0,0 +1,37 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Marks | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Marks Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Marks +#---------------------------------------------------------------------- +# \mark c specifies text which should be marked. +# \topmark c is the value of \botmark on the previous page. +# \botmark c is the mark text most recently encountered on a page. +# \firstmark c is the mark text first encountered on a page. + +DefPrimitive('\mark{}', undef); + +DefMacroI('\topmark', undef, Tokens()); +DefMacroI('\botmark', undef, Tokens()); +DefMacroI('\firstmark', undef, Tokens()); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml new file mode 100644 index 000000000..42e919c60 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml @@ -0,0 +1,1145 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Math | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +#use Unicode::Normalize; +#use LaTeXML::Util::Pathname; +#use List::Util qw(min max); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Math Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# NOT YET IMPLEMENTED !?!?! +#---------------------------------------------------------------------- +# \radical c makes a radical atom from the delimiter (27-bit number) and the math field. +# \muskipdef c creates a symbolic name for a \muskip register. +# \muskip iq assigns to a \muskip register. +# \nonscript c ignores immediately following glue or kern in script and scriptscript styles. + +#====================================================================== +# The next two sections are the basic LaTeXML Infrastructure for math. +# There are several internal control sequences which need to be renamed! +#====================================================================== + +# Decide whether we're going into or out of math, inline or display. +Tag('ltx:XMText', autoOpen => 1, autoClose => 1); +# This really should be T_MATH +# and it should (or not) check for a second $ only if not in restricted horizontal mode! +# (and then all the \@dollar@in@(text|math|normal)mode defns would not be needed. +DefPrimitiveI('\@dollar@in@normalmode', undef, sub { + my ($stomach) = @_; + my $gullet = $stomach->getGullet; + my $mode = LookupValue('MODE'); + my $op = '\@@BEGININLINEMATH'; + if ($mode eq 'display_math') { + if ($gullet->ifNext(T_MATH)) { + $gullet->readToken; + $op = '\@@ENDDISPLAYMATH'; } + else { + # Avoid a Fatal, but we're likely in trouble. + # Should we switch to text mode? (LaTeX normally wouldn't) + # Did we miss something and would should have already been in text mode? Possibly... + # OR, were we in a lenient package that allowed inline math mixed in with display? + Error('expected', '$', $stomach, + "Missing \$ closing display math.", + "Ignoring; expect to be in wrong math/text mode."); + $op = undef; } } + elsif ($mode eq 'inline_math') { + $op = '\@@ENDINLINEMATH'; } + # elsif(!LookupValue('Alignment') && $gullet->ifNext(T_MATH)){ + elsif ($gullet->ifNext(T_MATH)) { + $gullet->readToken; + $op = '\@@BEGINDISPLAYMATH'; } + $stomach->invokeToken(T_CS($op)) if $op; }); +# Let this be the default, conventional $ +Let(T_MATH, T_CS('\@dollar@in@normalmode')); + +# Effectively these are the math hooks, redefine these to do what you want with math? +DefConstructorI('\@@BEGINDISPLAYMATH', undef, + "" + . "" + . "" + . "#body" + . "" + . "" + . "", + reversion => Tokens(T_MATH, T_MATH), + beforeDigest => sub { + $_[0]->beginMode('display_math'); + if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) { + $_[0]->getGullet->unread(@everymath_toks); } + if (my @everydisplay_toks = $STATE->lookupDefinition(T_CS('\everydisplay'))->valueOf->unlist()) { + $_[0]->getGullet->unread(@everydisplay_toks); } + return; }, captureBody => 1); +DefConstructorI('\@@ENDDISPLAYMATH', undef, "", + reversion => Tokens(T_MATH, T_MATH), + beforeDigest => sub { $_[0]->endMode('display_math'); }); + +DefConstructorI('\@@BEGININLINEMATH', undef, + "" + . "" + . "#body" + . "" + . "", + reversion => Tokens(T_MATH), + beforeDigest => sub { + $_[0]->beginMode('inline_math'); + if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) { + $_[0]->getGullet->unread(@everymath_toks); } + return; }, captureBody => 1); +DefConstructorI('\@@ENDINLINEMATH', undef, "", + reversion => Tokens(T_MATH), + beforeDigest => sub { $_[0]->endMode('inline_math'); }); + +# Add the TeX code from the object that created this node, +# unless it has already been recorded on another node. +sub add_TeX { + my ($document, $node, $thing) = @_; + if ($thing) { + local $LaTeXML::DUAL_BRANCH = 'presentation'; + my $tex = UnTeX($thing); + $LaTeXML::DUAL_BRANCH = 'content'; + my $ctex = UnTeX($thing); + $document->setAttribute($node, tex => $tex); + $document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex; } + return; } + +# Same as add_TeX, but add the code from the body of the object. +sub add_body_TeX { + my ($document, $node, $thing) = @_; + if ($thing) { + if (defined(my $body = $thing->getProperty('body'))) { + local $LaTeXML::DUAL_BRANCH = 'presentation'; + my $tex = UnTeX($body); + $LaTeXML::DUAL_BRANCH = 'content'; + my $ctex = UnTeX($body); + $document->setAttribute($node, tex => $tex); + $document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex; } } + return; } + +Tag('ltx:Math', afterClose => \&add_body_TeX); +Tag('ltx:Math', afterClose => \&cleanup_Math); + +# Cleanup ltx:Math elements; particularly if they aren't "really" math. +# But record the oddity with class=ltx_markedasmath +sub cleanup_Math { + my ($document, $mathnode) = @_; + # If the Math ONLY contains XMath/XMText, it apparently isn't math at all!?! + # Single token PUNCTs can also be taken out of math. + if (!$document->findnodes('ltx:XMath/ltx:*' + . '[local-name() != "XMText" and local-name() != "XMHint" ' + . 'and not(' + . 'local-name() = "XMTok" and (@role="PUNCT" or @role="PERIOD") ' + . 'and not(preceding-sibling::*) and not(following-sibling::*) )]', $mathnode)) { + # So unwrap down to the contents of the XMText's. + my @texts = (); + foreach my $xmnode (map { $_->childNodes } $mathnode->childNodes) { + if ($document->getNodeQName($xmnode) eq 'ltx:XMHint') { + if (my $width = $xmnode->getAttribute('width')) { + if (my $space = DimensionToSpaces(Glue($width))) { + push(@texts, $space); } } } + else { # is XMText + foreach my $child ($xmnode->childNodes) { + my $t = $child->nodeType; + if ($t == XML_COMMENT_NODE) { } + elsif ($t != XML_ELEMENT_NODE) { # Make sure we've got an element + push(@texts, ['ltx:text', { class => 'ltx_markedasmath' }, $child]); } + else { + $document->addClass($child, 'ltx_markedasmath'); + push(@texts, $child); } } } } + # and replace the whole Math with the pieces + $document->replaceTree([undef, undef, @texts], $mathnode); } + else { # Cleanup any remaining XMTexts + cleanup_XMText_outer($document, $mathnode); } + return; } + +# Here's for an inverse case: when an XMText isn't "really" just text +# if it only contains an Math ORR, a tabular with only Math in the cells? +# First case: pull it back into the math, but in an XMWrap to isolate it for parsing. +# Should we just pull any mixed text math up or only a single Math? +# For the tabular case, convert it to an XMArray. + +# Note that normally, we'd do afterClose on ltx:XMText, +# but since the ltx:XMText closes before the outer ltx:Math, +# we would keep cleanup_Math from recognizing the trivial case of +# a single ltx:tabular in an equation (perverse, but people do that). +# So, we put this one on ltx:Math also, and scan for any contained XMText to fixup. + +sub cleanup_XMText_outer { + my ($document, $mathnode) = @_; + foreach my $textnode ($document->findnodes('descendant::ltx:XMText', $mathnode)) { + cleanup_XMText($document, $textnode); } + return; } + +sub cleanup_XMText { + my ($document, $textnode) = @_; + # We're really only interested in reducing nested math, right? + # But actually also collapsing ltx:XMText/ltx:text + # Apply "outer" simplifications: remove ltx:text or ltx:p wrappings. + my $model = $document->getModel; + # A single "simple" element, with a single child + my %simple_element = ('ltx:text' => 1, 'ltx:p' => 1, 'ltx:inline-block' => 1); + my @preserved = (qw(yoffset xoffset)); + my @children; + while ((@children = $textnode->childNodes) && (scalar(@children) == 1) + && $document->findnodes('ltx:text' + . ' | ltx:inline-block[count(*)=1]' + . ' | ltx:p', + $textnode)) { + my $child = $children[0]; + $document->setNodeFont($textnode, $document->getNodeFont($child)); + foreach my $attr ($child->attributes) { # Copy the child's attributes (should Merge!!) + $textnode->setAttribute($attr->nodeName => $attr->getValue) unless $attr->nodeName eq 'xml:id'; } + $document->unwrapNodes($child); } + + # Now apply a simplifying rule for nested Math + # If the XMText contains a single Math, pull it's content up in + if ((scalar(@children) == 1) && $document->findnodes('ltx:Math', $textnode)) { + # Replace XMText by XMWrap/* (this should preserve the parse?) + $textnode = $document->renameNode($textnode, 'ltx:XMWrap'); + $document->replaceNode($children[0], map { $_->childNodes } $children[0]->childNodes); } + # # # RISKY!!!! If SOME nodes are math... + # # # pull the whole sequence up, unwrap the math and putting the rest back in XMText. + # # # Even with the XMWrap, this seems to wreak havoc on parsing and structure? + # # if($document->findnodes('ltx:Math',$textnode)){ + # # # Replace XMText by XMWrap/* (this should preserve the parse?) + # # $textnode=$document->renameNode($textnode,'ltx:XMWrap'); + # # foreach my $child (@children){ + # # if($model->getNodeQName($child) eq 'ltx:Math'){ + # # $document->replaceNode($child,map($_->childNodes,$child->childNodes)); } + # # else { + # # $document->wrapNodes('ltx:XMText',$child); }}} + # If a single tabular that ONLY(?) contains Math, turn into an XMArray + # Well, a tabular REALLY shouldn't be in math; + # How much math should determine the switch? + # [will alignment attributes be lost?] + elsif ((scalar(@children) == 1) && ($model->getNodeQName($children[0]) eq 'ltx:tabular') +## Should we ALWAYS do this, or just for some minimal amount of math??? +## && !$document->findnodes('ltx:tabular/ltx:tr/ltx:td/text()' +## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td/text()' +## .' | ltx:tabular/ltx:tr/ltx:td[not(ltx:Math)]' +## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td[not(ltx:Math)]', +## $textnode) + ) { + # First step is remove any ltx:tbody from the tabular! + foreach my $tb ($document->findnodes('ltx:tabular/ltx:tbody', $textnode)) { + $document->unwrapNodes($tb); } + # Now, we can start replacing tabular=>XMArray, tr=>XMRow, td=>XMCell + my $table = $document->renameNode($children[0], 'ltx:XMArray'); + foreach my $row ($table->childNodes) { + $row = $document->renameNode($row, 'ltx:XMRow'); + foreach my $cell ($row->childNodes) { + $cell = $document->renameNode($cell, 'ltx:XMCell'); + foreach my $m ($cell->childNodes) { + if ($model->getNodeQName($m) eq 'ltx:Math') { # Math cell, unwrap the Math/XMath layer + $document->replaceNode($m, map { $_->childNodes } $m->childNodes); } + else { # Otherwise, wrap whatever it is in an XMText + $document->wrapNodes('ltx:XMText', $m); } + } } } + # And now we don't need the XMText any more. + foreach my $attr ($textnode->attributes) { # Copy the child's attributes (should Merge!!) + $table->setAttribute($attr->nodeName => $attr->getValue); } + my $newtable = $document->unwrapNodes($textnode); + if (my $id = $textnode->getAttribute('xml:id')) { + $document->unRecordID($id); + $document->recordID($id, $newtable); } } + return; } + +#====================================================================== +# Scripts are a bit of a strange beast, with respect to when the arguments +# are processed, and what kind of object should be created. +# +# While scripts look like they take a normal TeX argument, they really +# take the next BOX (AFTER expansion & digestion)! Thus, while +# a^\frac{b}{c} and a^\mathcal{B} +# DO work in TeX, other things like +# a^\sqrt{3} or a^\acute{b} +# DO NOT! (Hint: consider the expansions) +# Note that with +# \def\xyz{xyz} +# a^\xyz => a^{x}yz +# So, we try to mimic, but note that our boxes don't correspond 100% to TeX's +# +# Normally, sub/super scripts should be turned into a sort of postfix operator: +# The parser will attach the script to the appropriate preceding object. +# However, there are a few special cases involving empty boxes {}. +# If the argument is an empty box $x^{}$, the whole script should just disappear. +# If the PRECEDING box is {} (in ${}^{p}$, a sort of `floating' script should be created. +# This may combine, in the parser, with the following object to generate +# a prescript. + +# Remember a "safe" way to test a script Whatsit. +# Returns [ (FLOATING|POST) , (SUBSCRIPT|SUPERSCRIPT) ] or nothing +sub IsScript { + my ($object) = @_; + if (ref $object eq 'LaTeXML::Core::List') { + $object = [$object->unlist]->[-1]; } + if ((ref $object eq 'LaTeXML::Core::Whatsit') # careful w/alias in getCSName! + && ($object->getDefinition->getCS->getCSName =~ /^\\@@(FLOATING|POST)(SUBSCRIPT|SUPERSCRIPT)$/)) { + return [$1, $2]; } + return; } + +sub scriptHandler { + no warnings 'recursion'; + my ($stomach, $op) = @_; + my $gullet = $stomach->getGullet; + $gullet->skipSpaces; + my $font = LookupValue('font'); + my $style = $font->getMathstyle; + my @putback = (); + my $nscripts = 0; + + if (defined $style) { + my $cs = '\@@FLOATING' . $op; + my ($prevscript, $prevspace, $base); + # Check preceding boxes to determine possible attachment (floating vs post), + # Note that this analysis has to be done now (or sometime like it) before grouping lists go away; + # and whether there are conflicting preceding scripts, which is an error + # Parsing is too late! + while (my $prev = pop(@LaTeXML::LIST)) { + if (($prev->getProperty('isSpace')) + || ($prev->getProperty('isEmpty')) # EXPLICITLY empty, rather than {} + || (ref $prev eq 'LaTeXML::Core::Comment')) { + $prevspace = 1; # a space avoids double-scripts + unshift(@putback, $prev); # put back? assuming it will add rpadding to previous??? + next; } + elsif (IsEmpty($prev)) { # If empty, the script floats, can't conflict, but don't put back + last; } + elsif (my $prevop = IsScript($prev)) { + unshift(@putback, $prev); + if ($$prevop[1] eq $op) { # Whoops, duplicated; better use FLOATING + Error('unexpected', "double-" . lc($$prevop[1]), $stomach, "Double " . lc($$prevop[1])) + unless $prevspace; + $cs = '\@@FLOATING' . $op; + last; } + else { # Else, is OK (so far) assume POST (it will stack previous script) + $prevscript = $prev; # we'll overlap the width of the previous. + $cs = '\@@POST' . $op; } + # if we hit a FLOATING script, terminate, as the floating empty group avoids double scripts + last if ($$prevop[0] eq 'FLOATING'); + last if ++$nscripts > 1; } + else { + # We found something "normal", so assume we'll attach to it, and we're done. + $base = $prev; + unshift(@putback, $prev); + $cs = '\@@POST' . $op; + last; } } + push(@LaTeXML::LIST, @putback); + + MergeFont(scripted => 1); + # Now, get following boxes (may have to process several tokens!) + my @stuff = (); + while (my $tok = $gullet->readXToken(0)) { + @stuff = $stomach->invokeToken($tok); + last if @stuff; } + if (!@stuff) { + Error('expected', '{', $stomach, "Missing sub/superscript argument", $gullet->showUnexpected); + push(@stuff, Box()); } + my $script = shift(@stuff); # ONLY the first box is the script! + unshift(@stuff, + LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS($cs)), [$script], + locator => $gullet->getLocator, + font => $script->getFont, isMath => 1, + level => $stomach->getBoxingLevel, + scriptlevel => $stomach->getScriptLevel, + base => $base, # for sizing/positioning + prevscript => $prevscript)) + unless IsEmpty($script); + AssignValue(font => $font); # revert + return @stuff; } + else { # Non math use of _ ?? + my $c = (($op eq 'SUPERSCRIPT') ? '^' : '_'); + Error('unexpected', $c, $stomach, "Script $c can only appear in math mode"); + return Box($c, undef, undef, (($op eq 'SUPERSCRIPT') ? T_SUPER : T_SUB)); +} } + +DefPrimitiveI(T_SUPER, undef, sub { scriptHandler($_[0], 'SUPERSCRIPT'); }); +DefPrimitiveI(T_SUB, undef, sub { scriptHandler($_[0], 'SUBSCRIPT'); }); + +# The `argument' to a sub or superscript will typically be processed as a box, +# and either has braces, or is something that results in a single box. +# When we revert these, we DON'T want to wrap extra braces around, because they'll accumulate; +# at the least they're ugly; in some applications they affect "round trip" processing. +# OTOH, direct use of \@@POSTSUPERSCRIPT, etal, MAY need to have extra braces around them. +# So, when reverting, we're going to a bit of extra trouble to make sure we have ONE set +# of braces, but no extras!! +sub revertScript { + my ($script) = @_; + # We need to handle lists of lists, see arXiv:2210.11051 + my @tokens = Tokens($script->revert)->unlist; + my @t = @tokens; + my $l; + if ($t[0]->defined_as(T_BEGIN)) { + $l++; shift(@t); } + while (@t && $l) { + my $t = shift(@t); + if ($t->defined_as(T_BEGIN)) { $l++; } + elsif ($t->defined_as(T_END)) { $l--; } } + return (@tokens && !@t ? @tokens : (T_BEGIN, @tokens, T_END)); } + +# Compute the 'advance' of this script. +# can we do this before parsing? we can do the advance or something.... Hmmmm. +# * Need to know scriptpos (mid or post) to determine position. +# * need to know sub/super +sub scriptSizer { + my ($script, $base, $prev, $op, $pos) = @_; + + # NOTE: Currently, the mathstyle is NOT reflected in the font of the script!!!! + # Or is it now ????? + # [unless it's different from the 'expected' style!!!] + my ($ws, $hs, $ds) = map { $_->valueOf } $script->getSize; + $ws *= 0.8; $hs *= 0.8; $ds *= 0.8; # HACK!@!! + my ($wb, $hb, $db) = map { $_->valueOf } ($base ? $base->getSize + : LookupValue('font')->getNominalSize); + my ($w, $h, $d) = (0, 0, 0); + # Fishing for the scriptpos on the base (if any) + my $attr; + $pos = $base->getProperty('scriptpos') if !defined $pos && defined $base; + $pos = 'post' if !defined $pos; + if ($pos eq 'mid') { + $w = max(0, $ws - $wb); # as if max width of base & script + if ($op eq 'SUPERSCRIPT') { + $h = $hb + $ds + $hs; } + else { + $d = $db + $hs + $ds; } } + else { + my $wp = ($prev && $prev->getWidth) || 0; # as if max of width & prev script's width + $w = max(0, $ws - $wp); + if ($op eq 'SUPERSCRIPT') { + $h = $hb + $hs / 2; } + else { + $d = $hs / 2 + $ds; } } + $w = Dimension($w); $h = Dimension($h); $d = Dimension($d); + return ($w, $h, $d); } + +# NOTE: The When reverting these, the +DefConstructor('\@@POSTSUPERSCRIPT InScriptStyle', + "" + . "#1" + . "", + reversion => sub { (T_SUPER, revertScript($_[1])); }, + sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'), + $_[0]->getProperty('prevscript'), 'SUPERSCRIPT', 'post'); }); +DefConstructor('\@@POSTSUBSCRIPT InScriptStyle', + "" + . "#1" + . "", + reversion => sub { (T_SUB, revertScript($_[1])); }, + sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'), + $_[0]->getProperty('prevscript'), + 'SUBSCRIPT', 'post'); }); +DefConstructor('\@@FLOATINGSUPERSCRIPT InScriptStyle', + "" + . "#1" + . "", + reversion => sub { (T_BEGIN, T_END, T_SUPER, revertScript($_[1])); }, + sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUPERSCRIPT', 'post'); }); +DefConstructor('\@@FLOATINGSUBSCRIPT InScriptStyle', + "" + . "#1" + . "", + reversion => sub { (T_BEGIN, T_END, T_SUB, revertScript($_[1])); }, + sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUBSCRIPT', 'post'); }); + +DefMacroI('\active@math@prime', undef, sub { + my ($gullet) = @_; + my @sup = (T_CS('\prime')); + # Collect up all ', convering to \prime + while ($gullet->ifNext(T_OTHER('\''))) { + $gullet->readToken; + push(@sup, T_CS('\prime')); } + # Combine with any following superscript! + # However, this is semantically screwed up! + # We really need to set up separate superscripts, but at same level! + if ($gullet->ifNext(T_SUPER)) { + $gullet->readToken; + push(@sup, $gullet->readArg->unlist); } + (T_SUPER, T_BEGIN, @sup, T_END); }, + locked => 1); # Only in math! +AssignMathcode("'" => 0x8000); +Let("'", '\active@math@prime'); + +# Experiment: When we detect a math element containing solely a floating superscript in the +# *Frontmatter* of a document, assume it is a note mark, and normalize it down to +# plain text. +DefRewrite(xpath => 'descendant::ltx:Math[child::ltx:XMath[child::ltx:XMApp[' . + '(@role="FLOATSUPERSCRIPT" or @role="FLOATSUBSCRIPT") and ' . + 'not(preceding-sibling::*) and not(following-sibling::*) ' . + 'and not(./*/*[not(self::ltx:XMTok)]) ]]]', + replace => sub { + my ($document, $math) = @_; + # We can assume the grandchild of the XMath node is the XMArg, + # which we need to normalize to scripted Unicode. + if (my @xmath = element_nodes($math)) { + if (my @xmapp = element_nodes($xmath[0])) { + if (my @xmarg = element_nodes($xmapp[0])) { + if (my $role = $xmapp[0]->getAttribute('role')) { + my $text = $xmarg[0]->textContent; + local $LaTeXML::BOX = $document->getNodeBox($xmarg[0]); + if ($role eq 'FLOATSUPERSCRIPT') { + $document->insertElement('ltx:sup', $text); + return; } + elsif ($role eq 'FLOATSUBSCRIPT') { + $document->insertElement('ltx:sub', $text); + return; } + } } } } + # should never happen, but just in case: + Info("rewrite", "footnotemark", "Failed to find floating node in: " . $math->toString(1)); + $document->getNode->appendChild($math); + return; }); + +#====================================================================== +# General +#---------------------------------------------------------------------- +# \everydisplay pt holds tokens inserted at the start of every switch to display math mode. +# \everymath pt holds tokens inserted at the start of every switch to math mode. +DefRegister('\everymath', Tokens()); +DefRegister('\everydisplay', Tokens()); + +#====================================================================== +# Creating mathematical tokens/characters +#---------------------------------------------------------------------- +# \mathchar c specifies a math character by giving its class, family, and font position. +# \delimiter c specifies a delimiter. + +# \mathchardef d provides an alternate way to define a control sequence that returns a math character. +# \mathaccent c makes an accent atom from the mathchar and the following item. +# \fam pi if 0-15, specifies the font family of class 7 (variable) math symbols. +# \delcode iq is -1 or the delimiter code for a character. +# \mathcode iq holds the math character (15-bit number) for each of the 256 characters with which TeX works. + +our @mathclassrole = (undef, 'BIGOP', 'BINOP', 'RELOP', 'OPEN', 'CLOSE', 'PUNCT', undef); +# Is this "fontinfo" stuff sufficient to maintain a math font "family" ?? +# What we're really after is a connectio nto a font encoding mapping. +sub decodeMathChar { + my ($n) = @_; + my $class = int($n / (16 * 256)); $n = $n % (16 * 256); + my $fam = int($n / 256); $n = $n % 256; + my $font = LookupValue('textfont_' . $fam) + || LookupValue('scriptfont_' . $fam) + || LookupValue('scriptscriptfont_' . $fam); + my $char = chr($n); + # If no specific class, Lookup properties from a DefMath? + my $charinfo = LookupValue('math_token_attributes_' . $char); + my $fontinfo = lookupFontinfo($font); + my $role = $mathclassrole[$class]; + $role = $$charinfo{role} if (!defined $role) && $charinfo; + return ($role, + ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : $char)); } + +DefPrimitive('\mathchar Number', sub { + my ($stomach, $code) = @_; + my ($role, $glyph) = decodeMathChar($code->valueOf); + Box($glyph, undef, undef, + Tokens(T_CS('\mathchar'), $_[1]->revert, T_CS('\relax')), + role => $role); }); + +DefConstructor('\delimiter Number', + "?#glyph(?#isMath(#glyph)(#glyph))", + sizer => '#glyph', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $n = $whatsit->getArg(1)->valueOf; + $n = $n >> 12; # Ignore 3 rightmost digits and treat as \mathchar + my ($role, $glyph) = decodeMathChar($n); + $whatsit->setProperty(glyph => $glyph) if $glyph; + $whatsit->setProperty(role => $role) if defined $role; + $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; + return; }); + +# Almost like a register, but different... +DefPrimitive('\mathchardef Token SkipSpaces SkipMatch:=', sub { + my ($stomach, $newcs) = @_; + $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment + my $value = $stomach->getGullet->readNumber(); + my ($role, $glyph) = decodeMathChar($value->valueOf); + $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value, + $glyph, role => $role)); + AfterAssignment(); + return; }); + +DefConstructor('\mathaccent Number Digested', + "#glyph#2", + sizer => '#2', # Close enough? + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $n = $whatsit->getArg(1)->valueOf; + my ($role, $glyph) = decodeMathChar($n); + $whatsit->setProperty(glyph => $glyph) if $glyph; + $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; + return; }); + +# # Only used for active math characters, so far +DefRegister('\mathcode Number', Number(0), + getter => sub { + my $ch = $_[0]->valueOf; + my $code = $STATE->lookupMathcode(chr($ch)); + Number(defined $code ? $code : $ch); }, # defaults to the char's code itself(?) + setter => sub { $STATE->assignMathcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); +# Not used anywhere (yet) +DefRegister('\delcode Number', Number(0), + getter => sub { my $code = $STATE->lookupDelcode(chr($_[0]->valueOf)); + Number(defined $code ? $code : 0); }, + setter => sub { $STATE->assignDelcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); + +DefRegister('\fam' => Number(-1)); + +#====================================================================== +# TeX-level grammatical roles +#---------------------------------------------------------------------- +# \mathbin c assigns class 2 (binary operation) to the following character or subformula. +# \mathclose c assigns class 5 (closing) to the following character or subformula. +# \mathinner c makes an inner atom holding the math field. +# \mathop c assigns class 1 (large operator) to following character or subformula. +# \mathopen c assigns class 4 (opening) to following character or subformula. +# \mathord c assigns class 0 (ordinary) to following character or subformula. +# \mathpunct c assigns class 6 (punctuation) to following character or subformula. +# \mathrel c assigns class 3 (relation) to following character or subformula. + +# Is XMWrap the right thing to wrap with (instead of XMArg)? +# We can't really assume that the stuff inside is sensible math. +# NOTE that \mathord and \mathbin aren't really right here. +# We need a finer granularity than TeX does: an ORD could be several things, +# a BIN could be a MULOP or ADDOP. +# AND, rarely, they're empty.... Is it wrong to drop them? +DefConstructor('\mathord{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathop{}', "?#1(#1)()", + bounded => 1, properties => { scriptpos => \&doScriptpos }); +DefConstructor('\mathbin{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathrel{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathopen{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathclose{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathpunct{}', "?#1(#1)()", bounded => 1); +DefConstructor('\mathinner{}', "?#1(#1)()", bounded => 1); + +#====================================================================== +# Delimiters +#---------------------------------------------------------------------- +# \left c makes TeX calculate the size of the delimiter needed at the left of a subformula. +# \right c makes TeX calculate the size of the delimiter needed at the right of a subformula. + +# This duplicates in slightly different way what DefMath has put together. +# [duplication seems like a bad idea!] +our %DELIMITER_MAP = + ('(' => { char => "(", lrole => 'OPEN', rrole => 'CLOSE' }, + ')' => { char => ")", lrole => 'OPEN', rrole => 'CLOSE' }, + '[' => { char => "[", lrole => 'OPEN', rrole => 'CLOSE' }, + ']' => { char => "]", lrole => 'OPEN', rrole => 'CLOSE' }, + '\{' => { char => "{", lrole => 'OPEN', rrole => 'CLOSE' }, + '\}' => { char => "}", lrole => 'OPEN', rrole => 'CLOSE' }, + '\lfloor' => { char => "\x{230A}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lfloor' }, + '\rfloor' => { char => "\x{230B}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rfloor' }, + '\lceil' => { char => "\x{2308}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lceil' }, + '\rceil' => { char => "\x{2309}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rceil' }, + '\langle' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' }, + '\rangle' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' }, + '<' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' }, + '>' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' }, + '/' => { char => "/", lrole => 'MULOP', rrole => 'MULOP' }, + '\backslash' => { char => UTF(0x5C), lrole => 'MULOP', rrole => 'MULOP', name => 'backslash' }, + '|' => { char => "|", lrole => 'VERTBAR', rrole => 'VERTBAR' }, + '\|' => { char => "\x{2225}", lrole => 'VERTBAR', rrole => 'VERTBAR' }, + '\uparrow' => { char => "\x{2191}", lrole => 'OPEN', rrole => 'CLOSE', name => 'uparrow' }, # ?? + '\Uparrow' => { char => "\x{21D1}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Uparrow' }, # ?? + '\downarrow' => { char => "\x{2193}", lrole => 'OPEN', rrole => 'CLOSE', name => 'downarrow' }, # ?? + '\Downarrow' => { char => "\x{21D3}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Downarrow' }, # ?? + '\updownarrow' => { char => "\x{2195}", lrole => 'OPEN', rrole => 'CLOSE', name => 'updownarrow' }, # ?? + '\Updownarrow' => { char => "\x{21D5}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Updownarrow' }, # ?? + ); + +# With new treatment of Simple Symbols as just Box's with assigned attributes, +# we're not getting whatsits, and so we're not looking them up the same way!!! +# TEMPORARILY (?) hack the Delimiter map +foreach my $entry (values %DELIMITER_MAP) { + $DELIMITER_MAP{ $$entry{char} } = $entry; } + +sub lookup_delimiter { + my ($delim) = @_; + return $DELIMITER_MAP{$delim}; } + +# This is a little messier than you'd think. +# These effectively create a group between the \left,\right. +# And this also gives us a single list of things to parse separately. +# Since \left,\right are TeX, primitives and must be paired up, +# we use a bit of macro trickery to simulate. +# [The \@hidden@bgroup/egroup keep from putting a {} into the UnTeX] +# HOWEVER, an additional complication is that it is a common mistake to omit the balancing \right! +# Using an \egroup (or hidden) makes it hard to recover, so use a special egroup +DefMacro('\left XToken', '\@left #1\@hidden@bgroup'); +# Like \@hidden@egroup, but softer about missing \left +DefConstructor('\right@hidden@egroup', '', + afterDigest => sub { + my ($stomach) = @_; + if ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?! + || $STATE->lookupValue('groupNonBoxing')) { # or group was opened with \begingroup + Error('unexpected', '\right', undef, "Unbalanced \\right, no balancing \\left."); } + else { + $stomach->egroup; } }, + reversion => ''); + +DefMacro('\right XToken', '\right@hidden@egroup\@right #1'); + +DefConstructor('\@left Token', + "?#char(#char)" + . "(?#hint()(#1))", + afterDigest => sub { my ($stomach, $whatsit) = @_; + my $arg = $whatsit->getArg(1); + my $delim = ToString($arg); + if ($delim eq '.') { + $whatsit->setProperty(hint => 1); } + elsif (my $entry = $DELIMITER_MAP{$delim}) { + $whatsit->setProperties(role => $$entry{lrole}, + char => $$entry{char}, + name => $$entry{name}, + stretchy => 'true'); + $whatsit->setFont($arg->getFont()); } + elsif (($arg->getProperty('role') || '') eq 'OPEN') { + $arg->setProperty(stretchy => 'true'); } + else { + Warn('unexpected', $delim, $stomach, + "Missing delimiter; '.' inserted"); } + return; }, + alias => '\left'); +DefConstructor('\@right Token', + "?#char(#char)" + . "(?#hint()(#1))", + afterDigest => sub { my ($stomach, $whatsit) = @_; + my $arg = $whatsit->getArg(1); + my $delim = ToString($arg); + if ($delim eq '.') { + $whatsit->setProperty(hint => 1); } + elsif (my $entry = $DELIMITER_MAP{$delim}) { + $whatsit->setProperties(role => $$entry{rrole}, + char => $$entry{char}, + name => $$entry{name}, + stretchy => 'true'); + $whatsit->setFont($arg->getFont()); } + elsif (($arg->getProperty('role') || '') eq 'CLOSE') { + $arg->setProperty(stretchy => 'true'); } + else { + Warn('unexpected', $delim, $stomach, + "Missing delimiter; '.' inserted)"); } + return; }, + alias => '\right'); + +#====================================================================== +# Limit placement +#---------------------------------------------------------------------- +# \limits c displays limits above and below large operators (class 1). +# \nolimits c displays limits to the right of large operators (class 1). +# \displaylimits c restores normal conventions for using limits with operators. + +DefConstructorI('\limits', undef, '', + afterDigest => sub { mergeLimits('mid'); }, + properties => { isEmpty => 1 }); +DefConstructorI('\nolimits', undef, '', + afterDigest => sub { mergeLimits('post'); }, + properties => { isEmpty => 1 }); +DefConstructorI('\displaylimits', undef, '', + afterDigest => sub { + mergeLimits((($_[1]->getProperty('mathstyle') || '') eq 'display' ? 'mid' : 'post')); }, + properties => { isEmpty => 1 }); + +sub mergeLimits { + my ($pos) = @_; + for (my $i = scalar(@LaTeXML::LIST) - 1 ; $i >= 0 ; $i--) { + my $box = $LaTeXML::LIST[$i]; + my $prev = $box->getProperty('scriptpos') || ''; + my $level = ($prev =~ /^\w*(\d+)$/ ? $1 : $STATE->getStomach->getScriptLevel || ''); + $box->setProperty(scriptpos => $pos . $level); + last unless IsEmpty($box) || IsScript($box); } + return; } + +#====================================================================== +# Math script fonts +#---------------------------------------------------------------------- +# \textfont iq specifies the text font for a family. +# \scriptfont iq specifies the script font for a family. +# \scriptscriptfont iq specifies the scriptscript font for a family. + +# Doubtful that we can do anything useful with these. +# These look essentially like Registers, although Knuth doesn't call them that. +# NOTE: These should just point to a CS token, right???? +# (although it SHOULD be one defined to be a font switch??) +# NOTE: These should NOT be global(?) +DefRegister('\textfont Number' => T_CS('\tenrm'), + getter => sub { + my ($fam) = @_; + LookupValue('textfont_' . $fam->valueOf); }, + setter => sub { + my ($font, $scope, $fam) = @_; + AssignValue('textfont_' . $fam->valueOf => $font, $scope); }); +DefRegister('\scriptfont Number' => T_CS('\sevenrm'), + getter => sub { + my ($fam) = @_; + LookupValue('scriptfont_' . $fam->valueOf); }, + setter => sub { + my ($font, $scope, $fam) = @_; + AssignValue('scriptfont_' . $fam->valueOf => $font, $scope); }); +DefRegister('\scriptscriptfont Number' => T_CS('\fiverm'), + getter => sub { + my ($fam) = @_; + LookupValue('scriptscriptfont_' . $fam->valueOf); }, + setter => sub { + my ($font, $scope, $fam) = @_; + AssignValue('scriptscriptfont_' . $fam->valueOf => $font, $scope); }); + +#====================================================================== +# Math script styles +#---------------------------------------------------------------------- +# \displaystyle c selects display style: D or D'. +# \scriptscriptstyle c selects scriptscript style: SS or SS'. +# \scriptstyle c selects script style: S or S'. +# \textstyle c selects text style: T or T'. + +# Also record that this explicitly sets the mathstyle (support for \over, etal) +DefPrimitiveI('\displaystyle', undef, sub { + MergeFont(mathstyle => 'display'); + Box(undef, undef, undef, T_CS('\displaystyle'), explicit_mathstyle => 1); }); +DefPrimitiveI('\textstyle', undef, sub { + MergeFont(mathstyle => 'text'); + Box(undef, undef, undef, T_CS('\textstyle'), explicit_mathstyle => 1); }); +DefPrimitiveI('\scriptstyle', undef, sub { + MergeFont(mathstyle => 'script'); + Box(undef, undef, undef, T_CS('\scriptstyle'), explicit_mathstyle => 1); }); +DefPrimitiveI('\scriptscriptstyle', undef, sub { + MergeFont(mathstyle => 'scriptscript'); + Box(undef, undef, undef, T_CS('\scriptscriptstyle'), explicit_mathstyle => 1); }); + +#====================================================================== +# +#---------------------------------------------------------------------- +# \mathchoice c specifies specific subformulas for the 4 main styles. +# \vcenter c centers material with respect to the axis. + +# Note that in TeX, all 4 args get digested(!) +# and the choice is made when absorbing! +DefConstructor('\mathchoice Digested Digested Digested Digested', sub { + my ($document, $d, $t, $s, $ss, %props) = @_; + my $style = $props{mathstyle}; + my $choice = ($style eq 'display' ? $d + : ($style eq 'text' ? $t + : ($style eq 'script' ? $s + : $ss))); + $document->absorb($choice); }, + properties => { mathstyle => sub { LookupValue('font')->getMathstyle; } }); + +# THIS IS WRONG!!!! +Let('\vcenter', '\vbox'); + +#====================================================================== +# +#---------------------------------------------------------------------- +# \overline c puts a line over the following character or subformula. +# \underline c puts a line under the following character or subformula. + +DefMath('\overline Digested', UTF(0xAF), operator_role => 'OVERACCENT'); # MACRON +DefMath('\math@underline{}', UTF(0xAF), operator_role => 'UNDERACCENT', + name => 'underline', alias => '\underline'); +DefConstructor('\text@underline{}', "#1"); +DefMath('\math@overrightarrow{}', "\x{2192}", operator_role => 'OVERACCENT', + name => 'overrightarrow', alias => '\overrightarrow'); +DefMath('\math@overleftarrow{}', "\x{2190}", operator_role => 'OVERACCENT', + name => 'overleftarrow', alias => '\overleftarrow'); + +# Careful: Use \protect so that it doesn't expand too early in alignments, etc. +# [Really shouldn't use \protect, since this is a TeX primitive and \protect is LaTeX] +DefMacro('\underline{}', '\protect\ifmmode\math@underline{#1}\else\text@underline{#1}\fi'); + +#====================================================================== +# fraction-like things +#---------------------------------------------------------------------- +# \above d is equivalent to `\abovewithdelims..'. +# \abovewithdelims c is a generalized fraction command. +# \atop d is equivalent to `\atopwithdelims..'. +# \atopwithdelims d is a generalized fraction command with an invisible fraction bar. +# \over d is equivalent to `\overwithdelims..'. +# \overwithdelims d is a generalized fraction command with preset fraction bar thickness. +# After digesting the \choose (or whatever), grab the previous and following material +# and store as args in the whatsit. + +# Increment the mathstyle stored in any boxes & whatsits. +# The tricky part is to know when NOT to increment! +# \displaystyle, constructors that set their own specific style,... +# And, any collateral adjustments that had been done in digestion depending on mathstyle +# WONT be adjusted! +# We don't have a clear API to find the displayable Boxes within; +# and we don't have a good handle on grouping... + +# ARGH!!!!!!!!! RETHINK!!!!!! +sub adjustMathstyle { + my ($outerstyle, $adjusted, @boxes) = @_; + foreach my $box (@boxes) { + next unless defined $box; + next if $$adjusted{$box}; # since we do args AND props, be careful not to adjust twice! + $$adjusted{$box} = 1; + my $r = ref $box; + next unless $r && ($r !~ /(?:SCALAR|HASH|ARRAY|CODE|REF|GLOB|LVALUE)/) && $r->isaBox; + return if $box->getProperty('explicit_mathstyle'); + next if $box->getProperty('own_mathstyle'); + + if ($r eq 'LaTeXML::Core::Box') { + adjustMathStyle_internal($outerstyle, $box); } + elsif ($r eq 'LaTeXML::Core::List') { + adjustMathstyle($outerstyle, $adjusted, $box->unlist); } + elsif ($r eq 'LaTeXML::Core::Whatsit') { + my $style = adjustMathStyle_internal($outerstyle, $box) || $outerstyle; + # now recurse on contained boxes (args AND properties!) + adjustMathstyle($style, $adjusted, $box->getArgs); + adjustMathstyle($style, $adjusted, values %{ $box->getPropertiesRef }); } } + return; } + +# Heursitic; +# we're wanting to adjust the style AS IF the numerator had been already in the next mathstyle +# This isn't the same as just shifting the mathstyle! +# we're sorta trying to infer WHY the box has a given style...? +our %mathstyle_adjust_map = ( + display => { display => 'text', text => 'script', script => 'script', scriptscript => 'scriptscript' }, + text => { display => 'text', text => 'script', script => 'scriptscript', scriptscript => 'scriptscript' }, + script => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' }, + scriptscript => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' }); + +sub adjustMathStyle_internal { + my ($outerstyle, $box) = @_; + $outerstyle = 'display' unless $outerstyle; + if (my $font = $box->getFont) { + my $origstyle = $font->getMathstyle || 'display'; + my $newstyle = $mathstyle_adjust_map{$outerstyle}{$origstyle}; + $box->setFont($font->merge(mathstyle => $newstyle)); + if (my $recstyle = $box->getProperty('mathstyle')) { # And adjust here, if recorded. + $box->setProperty(mathstyle => $newstyle); + return $newstyle; } } + return; } + +sub fracSizer { + my ($numerator, $denominator) = @_; + my $w = $numerator->getWidth->larger($denominator->getWidth); + my $d = $denominator->getTotalHeight->multiply(0.5); + my $h = $numerator->getTotalHeight->add($d); + return ($w, $h, $d); } + +# \lx@generalized@over{reversion}{keyvals}{top}{bottom} +# keyvals: role,meaning, left,right, thickness +DefConstructor('\lx@generalized@over Undigested RequiredKeyVals', + "?#needXMDual(" + . "" + . "" + . "" + . "" + . "" + . "" + . "" + . "#left)()" + . "" + . "" + . "#top" + . "#bottom" + . "" + . "?#needXMDual(#right" + . "" + . ")()", + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $kv = $whatsit->getArg(2); + # Really, we want the mathstyle that was in effect BEFORE the group starting the numerator! + # (there could be a \displaystyle INSIDE the numerator, but that's not the one we want) + # Of course the group that started the numerator may be the start of the Math, itself! + # AND, the numerator, which was already digested, needs it's mathstyle ADJUSTED + my $font = ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?! + ? $STATE->lookupValue('font') # then just use whatever font we've got + : ($STATE->isValueBound('font', 0) # else if font was set in numerator + && $STATE->valueInFrame('font', 1)) + || $STATE->lookupValue('font') # then just use whatever font we've got + ); + my $style = $font->getMathstyle; + my $role = ToString($kv->getValue('role')); + my $meaning = ToString($kv->getValue('meaning')); + my $thickness = ToString($kv->getValue('thickness')); + $role = 'FRACOP' unless $role; + $meaning = 'divide' if (!$meaning) && ($thickness ne '0pt'); + # Unfortunately, the numerator's already digested! We have to adjust it's mathstyle + my @top = $stomach->regurgitate; + # really have to pass +/-1, +/-2 etc..! + adjustMathstyle($style, {}, @top); + MergeFont(fraction => 1); + my @bot = $stomach->digestNextBody(); + my $closing = pop(@bot); # We'll leave whatever closed the list (endmath, endgroup...) + $whatsit->setProperties( + top => List(@top, mode => 'math'), + bottom => List(@bot, mode => 'math'), + role => $role, + meaning => $meaning, + thickness => $thickness, + mathstyle => $style); + if ($kv->getValue('left') || $kv->getValue('right')) { + $whatsit->setProperties(needXMDual => 1, + xmkey0 => LaTeXML::Package::getXMArgID(), + xmkey1 => LaTeXML::Package::getXMArgID(), + xmkey2 => LaTeXML::Package::getXMArgID()); } + return $closing; }, # and leave the closing bit, whatever it is. + properties => sub { %{ $_[2]->getKeyVals }; }, + sizer => sub { fracSizer($_[0]->getProperty('top'), $_[0]->getProperty('bottom')); }, + reversion => sub { + my ($whatsit) = @_; + (Revert($whatsit->getProperty('top')), + $whatsit->getArg(1)->unlist, + Revert($whatsit->getProperty('bottom'))); }); + +DefMacro('\above Dimension', + '\lx@generalized@over{\above #1}{meaning=divide,thickness=#1}'); +DefMacro('\abovewithdelims Token Token Dimension', +'\lx@generalized@over{\abovewithdelims #1 #2 #3}{left={\@left#1},right={\@right#2},meaning=divide,thickness=#3}'); +DefMacro('\atop', + '\lx@generalized@over{\atop}{thickness=0pt}'); +DefMacro('\atopwithdelims Token Token', + '\lx@generalized@over{\atopwithdelims #1 #2}{thickness=0pt,left={\@left#1},right={\@right#2}}'); +DefMacro('\over', + '\lx@generalized@over{\over}{meaning=divide}'); +DefMacro('\overwithdelims Token Token', + '\lx@generalized@over{\overwithdelims #1 #2}{left={\@left#1},right={\@right#2},meaning=divide}'); +# My thinking was that this is a "fraction" providing the dimension is > 0! + +#====================================================================== +# +#---------------------------------------------------------------------- +# \mkern c adds a math kern item to the current math list. +# \mskip c adds math glue to the current math list. +# \thinmuskip pm is ``thin'' math glue inserted into formulas. +# \medmuskip pm is ``medium'' math glue inserted into formulas. +# \thickmuskip pm is ``thick'' math glue inserted into formulas. +# \abovedisplayskip pg is normal glue placed before a displayed equation. +# \abovedisplayshortskip pg is alternate glue placed before a displayed equation. +# \belowdisplayskip pg is normal glue placed after a displayed equation. +# \belowdisplayshortskip pg is alternate glue placed after a displayed equation. + +DefPrimitive('\mkern MuGlue', sub { + my ($stomach, $length) = @_; + my $s = DimensionToSpaces($length); + Box($s, undef, undef, Invocation(T_CS('\mkern'), $length), + width => $length, isSpace => 1); }); + +DefPrimitive('\mskip MuGlue', sub { + my ($stomach, $length) = @_; + my $s = DimensionToSpaces($length); + Box($s, undef, undef, Invocation(T_CS('\mskip'), $length), + width => $length, isSpace => 1); }); + +# MuGlue registers; TeXBook p.274 +DefRegister('\thinmuskip' => MuGlue("3mu")); +DefRegister('\medmuskip' => MuGlue("4mu plus 2mu minus 4mu")); +DefRegister('\thickmuskip' => MuGlue("5mu plus 5mu")); + +DefRegister('\abovedisplayskip' => Glue('12pt plus 3pt minus 9pt')); +DefRegister('\abovedisplayshortskip' => Glue('0pt plus 3pt')); +DefRegister('\belowdisplayskip' => Glue('12pt plus 3pt minus 9pt')); +DefRegister('\belowdisplayshortskip' => Glue('0pt plus 3pt')); + +#====================================================================== +# +#---------------------------------------------------------------------- +# \binoppenalty pi is the penalty for a line break after a binary operation. +# \postdisplaypenalty pi is the penalty added immediately after a math display. +# \predisplaypenalty pi is the penalty added immediately before a math display. +# \relpenalty pi is the penalty for a line break after a relation. +# \displaywidowpenalty pi is the penalty added after the penultimate line immediately preceeding a display. +# \skewchar iq is -1 or the character used to fine-tune the positioning of math accents . +# \defaultskewchar pi is -1 or the \skewchar value for a font when it is loaded. +# \delimitershortfall pd is the second parameter used to compute the size of delimeters required by \left and \right. +# \displayindent pd is the amount to shift a line holding a displayed equation. +# \displaywidth pd is the width of the line holding a displayed equation. +# \mathsurround pd is extra space added when switching in and out of math mode. +# \nulldelimiterspace pd is the width of a null or missing delimiter. +# \predisplaysize pd is the effective width of the line preceeding a displayed equation. +# \scriptspace pd is extra space added after a subscript or a superscript. +# \delimiterfactor pi is the first parameter used to compute the size of delimeters required by \left and \right. + +DefRegister('\binoppenalty' => Number(700)); +DefRegister('\relpenalty' => Number(500)); +DefRegister('\relpenalty' => Number(700)); +DefRegister('\displaywidowpenalty' => Number(50)); +DefRegister('\predisplaypenalty' => Number(10000)); +DefRegister('\postdisplaypenalty' => Number(0)); + +DefRegister('\skewchar FontToken' => Number(0), + getter => sub { + my ($font) = @_; + my $info = lookupFontinfo($font); + return ($info && $$info{skewchar}) || Number(0); }, + setter => sub { + my ($value, $scope, $font) = @_; + if (my $info = lookupFontinfo($font)) { + $$info{skewchar} = $value; } } +); +DefRegister('\defaultskewchar' => Number(-1)); + +# Dimen registers; TeXBook p. 274 +DefRegister('\delimitershortfall' => Dimension('5pt')); +DefRegister('\nulldelimiterspace' => Dimension('1.2pt')); +DefRegister('\scriptspace' => Dimension('0.5pt')); +DefRegister('\mathsurround' => Dimension(0)); +DefRegister('\predisplaysize' => Dimension(0)); +DefRegister('\displaywidth' => Dimension(0)); +DefRegister('\displayindent' => Dimension(0)); +DefRegister('\delimiterfactor' => Number(0)); + +#====================================================================== +# Equation numbers +#---------------------------------------------------------------------- +# \eqno c puts an equation number at the right-hand margin. +# \leqno c puts an equation number at the left-hand margin. + +# \eqno & \leqno are really bizzare. +# They should seemingly digest until $ (or while still in math mode), +# and use that stuff as the reference number. +# However, since people abuse this, and we're really not quite TeX, +# we really can't do it Right. +# Even a \begin{array} ends up expanding into a $ !!! +DefMacroI('\eqno', undef, sub { + my ($gullet) = @_; + my $locator = $gullet->getLocator; + my @stuff = (); + # This is risky!!! + while (my $t = $gullet->readXToken(0)) { + if ($t->defined_as(T_BEGIN)) { + push(@stuff, $t, $gullet->readBalanced, T_END); } + # What do I need to explicitly list here!?!?!? UGGH! + elsif ($t->defined_as(T_MATH) + || $t->defined_as(T_CS('\]')) + # UGH from 2022: also don't jump over rows + || $t->defined_as(T_CS('\cr')) + # see arXiv:math/0001062, for one example + || $t->defined_as(T_CS('\hidden@cr')) + || $t->defined_as(T_CS('\@@ENDDISPLAYMATH')) + || $t->defined_as(T_CS('\begingroup')) # Totally wrong, but to catch expanded environments + || (ToString($t) =~ /^\\(?:begin|end)\{/) # any sort of environ begin or end??? + # This seems needed within AmSTeX environs + ) { + return (Invocation(T_CS('\@@eqno'), Tokens(@stuff)), $t); } + else { + push(@stuff, $t); } } + Error('unexpected', '\eqno', $gullet, "Fell of the end reading tag for \\eqno!", + "started " . ToString($locator)); + return Tokens(@stuff); }); + +Let('\leqno', '\eqno'); +# Revert to nothing, since it really doesn't belong in the TeX string(?) +DefConstructor('\@@eqno{}', + "^ #1", + reversion => ''); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Page.pool.ltxml b/lib/LaTeXML/Engine/TeX_Page.pool.ltxml new file mode 100644 index 000000000..a1cce22f6 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Page.pool.ltxml @@ -0,0 +1,54 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Page | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Page Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Parameters for page layout +#---------------------------------------------------------------------- +# \hoffset pd is a value added to the default 1-inch left margin. +# \voffset pd is a value added to the default 1-inch top margin. +# \topskip pg is special glue added before the first box on each page. +# \pagedepth iq is the actual depth of the last box on the main page. +# \pagetotal iq is the accumulated height of the current page. +# \maxdepth pd is the maximum depth of boxes on the main page. +# \vsize pd is the desired height of the current page. +# \pagegoal iq is the desired height of the current page. +# \pageshrink iq is the amount of finite shrinkability in the current page. +# \pagestretch iq is the amount of finite stretchability in the current page. +# \pagefilllstretch iq is the amount of third-order infinite stretchability in the current page. +# \pagefillstretch iq is the amount of second-order infinite stretchability in the current page. +# \pagefilstretch iq is the amount of first-order infinite stretchability in the current page. + +DefRegister('\hoffset' => Dimension(0)); +DefRegister('\voffset' => Dimension(0)); +DefRegister('\topskip' => Glue('10pt')); +DefRegister('\pagedepth' => Dimension(0)); +DefRegister('\pagetotal' => Dimension(0)); +DefRegister('\maxdepth' => Dimension('4pt')); +DefRegister('\vsize' => Dimension('8.9in')); +DefRegister('\pagegoal' => Dimension(0)); +DefRegister('\pageshrink' => Dimension(0)); +DefRegister('\pagestretch' => Dimension(0)); +DefRegister('\pagefilstretch' => Dimension(0)); +DefRegister('\pagefillstretch' => Dimension(0)); +DefRegister('\pagefilllstretch' => Dimension(0)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Paragraph.pool.ltxml b/lib/LaTeXML/Engine/TeX_Paragraph.pool.ltxml new file mode 100644 index 000000000..6807ba5a9 --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Paragraph.pool.ltxml @@ -0,0 +1,228 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Paragraph | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Paragraph Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Spacing tweaks +#---------------------------------------------------------------------- +# \ignorespaces c makes TeX read and expand tokens but do nothing until a nonspace token is reached. +# \noboundary c if present, breaks ligatures and kerns. +# \vadjust c inserts a vertical list between two lines in a paragraph. + +DefPrimitive('\ignorespaces SkipSpaces', undef); +DefPrimitiveI('\noboundary', undef, undef); +# Note: \vadjust ignores in vertical mode... +DefPrimitive('\vadjust {}', sub { PushValue('vAdjust', $_[1]->unlist); }); + +#====================================================================== +# Basic Paragraph +#---------------------------------------------------------------------- +# \everypar pt holds tokens added at the beginning of every paragraph. +# \indent c begins a new paragraph indented by \parindent. +# \noindent c begins a new paragraph that is not indented. +# \par c is an explicit command to end a paragraph. + +DefRegister('\everypar', Tokens()); +# These determine whether the _next_ paragraph gets indented! +# thus it needs \par to check whether such indentation has been set. +DefConstructorI('\indent', undef, sub { + my ($document) = @_; + my $node = $document->getElement; + if (!$node) { } + elsif ($document->getNodeQName($node) eq 'ltx:para') { + $node->setAttribute(class => "ltx_indent"); } + elsif ($document->canContainSomehow($node, "ltx:para")) { + # Used in a position where a paragraph can be started, start + # However, perversely ignore indent on 1st para after sectioning titles + my $prev = $node->lastChild; + my $noindent = $prev && ($document->getNodeQName($prev) =~ /^ltx:(?:toc)?title$/); + $document->openElement("ltx:para", ($noindent ? () : (class => "ltx_indent"))); } + # Otherwise ignore. + return; }); +DefConstructorI('\noindent', undef, sub { + my ($document) = @_; + my $node = $document->getElement; + if (!$node) { } + elsif ($document->getNodeQName($node) eq 'ltx:para') { + $node->setAttribute(class => "ltx_noindent"); } + elsif ($document->canContainSomehow($node, "ltx:para")) { + # Used in a position where a paragraph can be started, start + $document->openElement("ltx:para", class => "ltx_noindent"); } + # Otherwise ignore. + return; }); + +# represents a Logical Paragraph, whereas is a `physical paragraph'. +# A para can contain both p and displayed equations and such. + +# Remember; \par _closes_, not opens, paragraphs! +# Here, we want to close both an open p and para (if either are open). +# NOTE Also that the whole inPreamble bit is, I think, overused. +# For example, \par should be a NOOP in vertical mode, and that would generally make it +# ignored in the preamble. +DefConstructorI('\lx@normal@par', undef, sub { + my ($document, %props) = @_; + if ($props{inPreamble}) { } + else { + $document->maybeCloseElement('ltx:p'); + my $node = $document->getElement; + my $qname = ($node && $document->getNodeQName($node)) || ''; + if ($qname eq 'ltx:para' && !$node->getAttribute("class")) { # Only set on the para about to close, if unknown! + if (my $c = $props{class}) { + $document->setAttribute($node, class => $c); } } + $document->maybeCloseElement('ltx:para'); } }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + if (LookupValue('inPreamble')) { + $whatsit->setProperty(inPreamble => 1); } + else { + # Check if flags were set by prior \par: + if (my $c = LookupValue("next_para_class")) { + $whatsit->setProperty(class => $c); + AssignValue(next_para_class => undef); } + # Fish out flags for next ltx:para, to be used when the next \par closes: + if (!LookupRegister('\parindent')->valueOf) { + # respect \parindent if no overrides are given + AssignValue(next_para_class => "ltx_noindent"); } + # Vertical adjustments + if (my $vadj = LookupValue('vAdjust')) { + AssignValue(vAdjust => [], 'global'); + Digest(Tokens(@$vadj)); } + else { + return; } } }, + properties => { alignmentSkippable => 1 }, + alias => '\par'); + +Let('\par', '\lx@normal@par'); + +Tag('ltx:para', autoClose => 1, autoOpen => 1, afterClose => \&pruneEmpty); + +sub pruneEmpty { + my ($document, $node) = @_; + # In some cases we could have e.g. a \noindent followed by a {table}, + # in which case we end up with an empty ltx:para which we can prune. + if (!scalar(element_nodes($node))) { + my $prev = element_prev($node); + if (!$prev || ($document->getNodeQName($prev) ne 'ltx:para')) { # If $node WAS the 1st child + $document->addClass($node->parentNode, 'ltx_pruned_first'); } + $node->unlinkNode; } + return; } + +sub trimNodeWhitespace { + my ($document, $node) = @_; + trimNodeLeftWhitespace($document, $node); + trimNodeRightWhitespace($document, $node); + return; } + +sub trimNodeLeftWhitespace { + my ($document, $node) = @_; + if (my (@children) = $node->childNodes) { + my $child = $children[0]; + my $type = $child->nodeType; + if ($type == XML_TEXT_NODE) { + my $string = $child->data; + # if($string =~ s/^\s+//){ + # with some trepidation, I don't think we want to trim nbsp! + if ($string =~ s/^ +//) { + $child->setData($string); } } + elsif ($type == XML_ELEMENT_NODE) { + trimNodeLeftWhitespace($document, $child); } } + return; } + +sub trimNodeRightWhitespace { + my ($document, $node) = @_; + if (my (@children) = $node->childNodes) { + my $child = $children[-1]; + my $type = $child->nodeType; + if ($type == XML_TEXT_NODE) { + my $string = $child->data; + if ($string =~ s/\s+$//) { + $child->setData($string); } } + elsif ($type == XML_ELEMENT_NODE) { + trimNodeRightWhitespace($document, $child); } } + return; } + +Tag('ltx:p', autoClose => 1, autoOpen => 1, afterClose => \&trimNodeWhitespace); + +#====================================================================== +# Paragraph Shape +#---------------------------------------------------------------------- +# \parshape iq specifies an arbitrary paragraph shape. +# \parshape !?!?? +DefPrimitive('\parshape SkipSpaces SkipMatch:= Number', sub { + my ($stomach, $n) = @_; + $n = $n->valueOf; + my $gullet = $stomach->getGullet; + for (my $i = 0 ; $i < $n ; $i++) { + $gullet->readDimension; $gullet->readDimension; } + # we _could_ conceivably store this somewhere for some attempt at stylistic purpose... + return; }); + +#====================================================================== +# Paragraph Shape +#---------------------------------------------------------------------- +# \prevgraf iq is the number of lines in the paragraph most recently completed or partially completed. +# \spacefactor iq controls interword spacing. +# \emergencystretch pd is glue used in the third pass made for bad paragraphs. +# \hangindent pd is the amount of hanging indentation. +# \hsize pd is the width of normal lines in a paragraph. +# \lineskiplimit pd is the cutoff used to select between \baselineskip and \lineskip. +# \parindent pd is the width of indentation at the beginning of a paragraph. +# \baselineskip pg is glue added between lines to keep their baselines consistently spaced. +# \leftskip pg is glue added at the left of every line in a paragraph. +# \rightskip pg is glue added at the right of every line in a paragraph. +# \lineskip pg is alternate interline glue used if the \baselineskip glue is not feasible . +# \parskip pg is extra glue put between paragraphs. +# \parfillskip pg is glue which finishs the last line of a paragraph. +# \spaceskip pg is alternate interword glue. +# \xspaceskip pg is alternate intersentence glue. +# \adjdemerits pi holds the demerits for visually incompatible adjacent lines. +# \doublehyphendemerits pi holds the demerits added if two consecutive lines end with discretionary breaks. +# \finalhyphendemerits pi holds the demerits added if the penultimate line in a paragraph ends with a discretionary break. +# \hangafter pi is the number of lines before hanging indentation changes. +# \looseness pi tells TeX to try and increase or decrease the number of lines in a paragraph. + +# \tolerance pi is the acceptable \badness of lines after hyphenation. +# \pretolerance pi is the acceptable \badness of lines in a paragraph before hyphenation is attempted. + +DefRegister('\spacefactor' => Number(0)); +DefRegister('\prevgraf' => Number(0)); +DefRegister('\emergencystretch' => Dimension(0)); +DefRegister('\hangindent' => Dimension(0)); +DefRegister('\hsize' => Dimension('6.5in')); +DefRegister('\lineskip' => Glue('1pt')); +DefRegister('\lineskiplimit' => Dimension(0)); +DefRegister('\parindent' => Dimension('20pt')); +DefRegister('\baselineskip' => Glue('12pt')); +DefRegister('\leftskip' => Glue(0)); +DefRegister('\rightskip' => Glue(0)); +DefRegister('\parskip' => Glue('0pt plus 1pt')); +DefRegister('\parfillskip' => Glue('0pt plus 1fil')); +DefRegister('\spaceskip' => Glue(0)); +DefRegister('\xspaceskip' => Glue(0)); +DefRegister('\adjdemerits' => Number(10000)); +DefRegister('\doublehyphendemerits' => Number(10000)); +DefRegister('\finalhyphendemerits' => Number(5000)); +DefRegister('\hangafter' => Number(0)); +DefRegister('\looseness' => Number(0)); +DefRegister('\tolerance' => Number(200)); +DefRegister('\pretolerance' => Number(100)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Penalties.pool.ltxml b/lib/LaTeXML/Engine/TeX_Penalties.pool.ltxml new file mode 100644 index 000000000..0ed49df5b --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Penalties.pool.ltxml @@ -0,0 +1,57 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Penalties | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Penalties Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Adding/removing penalties +#---------------------------------------------------------------------- +# \penalty c adds a penalty to the current list. +# \unpenalty c removes a penalty from the current list. +# \lastpenalty iq is 0 or the last penalty on the current list. + +DefPrimitive('\penalty Number', undef); +DefPrimitiveI('\unpenalty', undef, undef); +DefRegister('\lastpenalty' => Number(0), readonly => 1); + +#====================================================================== +# values for various penalties +#---------------------------------------------------------------------- +# \brokenpenalty pi is the penalty added after a line ending with an hyphenated word. +# \clubpenalty pi is the penalty added after the first line in a paragraph. +# \exhyphenpenalty pi is the penalty for a line break after an explicit hyphen. +# \floatingpenalty pi is the penalty for insertions that are split between pages. +# \hyphenpenalty pi is the penalty for a line break after a discretionary hyphen. +# \interlinepenalty pi is the penalty added between lines in a paragraph. +# \linepenalty pi is an amount added to the \badness calculated for every line in a paragraph. +# \outputpenalty pi holds the penalty from the current page break. +# \widowpenalty pi is the penalty added after the penultimate line in a paragraph. + +DefRegister('\brokenpenalty' => Number(100)); +DefRegister('\clubpenalty' => Number(150)); +DefRegister('\exhyphenpenalty' => Number(50)); +DefRegister('\floatingpenalty' => Number(0)); +DefRegister('\hyphenpenalty' => Number(50)); +DefRegister('\interlinepenalty' => Number(0)); +DefRegister('\linepenalty' => Number(10)); +DefRegister('\outputpenalty' => Number(0)); +DefRegister('\widowpenalty' => Number(150)); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml b/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml new file mode 100644 index 000000000..0f875559f --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Registers.pool.ltxml @@ -0,0 +1,106 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Registers | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Registers Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#====================================================================== +# Accessing Registers +#---------------------------------------------------------------------- +# \count iq assigns an integer to a \count register. +# \dimen iq assigns a to a \dimen register. +# \skip iq assigns to a \skip register. +# \toks iq assigns to a \toks register. + +DefRegister('\count Number' => Number(0)); +DefRegister('\dimen Number' => Dimension(0)); +DefRegister('\skip Number' => Glue(0)); +DefRegister('\muskip Number' => MuGlue(0)); +DefRegister('\toks Number' => Tokens()); + +#====================================================================== +# Defining Registers, shorthands +#---------------------------------------------------------------------- +# \countdef c creates a symbolic name for a \count register. +# \dimendef c creates a symbolic name for a \dimen register. +# \skipdef c creates a symbolic name for a \skip register. +# \toksdef c creates a symbolic name for a \toks register. + +# Note that before \countdef, \dimendef, \skipdef, \muskipdef, \chardef, \mathchardef, \toksdef +# read the , they let the token to \relax in case the def is immediately followed +# by a use or assignment; See TeX Program \s 1224 +# See below for \chardef & \mathchardef +# Note that these define a "shorthand" for eg. \count123, but are NOT macros! +sub shorthandDef { + my ($stomach, $cs, $type, $init) = @_; + $STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssign + my $num = $stomach->getGullet->readNumber(); + my $address = $type . $num->valueOf; + DefRegisterI($cs, undef, $init, address => $address); + AfterAssignment(); + return; } + +DefPrimitive('\countdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { + shorthandDef($_[0], $_[1], '\count', Number(0)); }); + +DefPrimitive('\dimendef SkipSpaces Token SkipSpaces SkipMatch:=', sub { + shorthandDef($_[0], $_[1], '\dimen', Dimension(0)); }); + +DefPrimitive('\skipdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { + shorthandDef($_[0], $_[1], '\skip', Glue(0)); }); + +DefPrimitive('\muskipdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { + shorthandDef($_[0], $_[1], '\muskip', MuGlue(0)); }); + +DefPrimitive('\toksdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { + shorthandDef($_[0], $_[1], '\toks', Tokens()); }); + +#====================================================================== +# Numeric Registers +#---------------------------------------------------------------------- +# \advance c increases or decreases a numeric variable. +# \multiply c multiplies a register by an integer. +# \divide c divides a register by an integer. + +DefPrimitive('\advance Variable SkipKeyword:by', sub { + my ($stomach, $var) = @_; + return () unless $var; + my ($defn, @args) = @$var; + return () if !$defn || $defn eq "missing"; + local $LaTeXML::CURRENT_TOKEN = $defn; + $defn->setValue($defn->valueOf(@args)->add($stomach->getGullet->readValue($defn->isRegister)), + undef, @args); }); + +DefPrimitive('\multiply Variable SkipKeyword:by Number', sub { + my ($stomach, $var, $scale) = @_; + return () unless $var; + my ($defn, @args) = @$var; + $defn->setValue($defn->valueOf(@args)->multiply($scale->valueOf), undef, @args); }); + +DefPrimitive('\divide Variable SkipKeyword:by Number', sub { + my ($stomach, $var, $scale) = @_; + return () unless $var; + my ($defn, @args) = @$var; + my $denom = $scale->valueOf; + if ($denom == 0) { + Error('misdefined', $scale, $stomach, "Illegal \\divide by 0; assuming 1"); + $denom = 1; } + $defn->setValue($defn->valueOf(@args)->divide($denom), undef, @args); }); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Engine/TeX_Tables.pool.ltxml b/lib/LaTeXML/Engine/TeX_Tables.pool.ltxml new file mode 100644 index 000000000..232df01ca --- /dev/null +++ b/lib/LaTeXML/Engine/TeX_Tables.pool.ltxml @@ -0,0 +1,815 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | TeX_Tables | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +#use Unicode::Normalize; +#use LaTeXML::Util::Pathname; +#use List::Util qw(min max); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Tables Family of primitive control sequences +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#---------------------------------------------------------------------- +# This is where ALL alignments start & finish +# This creates the object representing the entire alignment! +DefConstructor('\@start@alignment', + "#alignment", + reversion => sub { Revert($_[0]->getProperty('alignment')); }, + sizer => '#alignment', + # beforeDigest => sub { $_[0]->bgroup; }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + $stomach->bgroup; + if (my $alignment = LookupValue('Alignment')) { + $whatsit->setProperty(alignment => $alignment); + $alignment->setBody($whatsit); + digestAlignmentBody($stomach, $whatsit); } + $stomach->egroup; + return; }); + +# Seems odd to need both end markers here... +DefMacroI('\@finish@alignment', undef, + '\hidden@crcr\@close@alignment'); +DefPrimitive('\@close@alignment', sub { }); + +# & gives an error except within the right context +# (which should redefine it!) +DefConstructorI('&', undef, sub { Error('unexpected', '&', $_[0], "Stray alignment \"&\""); }); + +Tag('ltx:td', afterClose => \&trimNodeWhitespace); + +#---------------------------------------------------------------------- +# Primitive column types; +# This is really LaTeX, but the mechanisms are used behind-the-scenes here, too. +DefColumnType('|', sub { + $LaTeXML::BUILD_TEMPLATE->addBetweenColumn(T_CS('\vrule'), T_CS('\relax')); return; }); +DefColumnType('l', sub { + $LaTeXML::BUILD_TEMPLATE->addColumn(after => Tokens(T_CS('\hfil'))); return; }); +DefColumnType('c', sub { + $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil')), + after => Tokens(T_CS('\hfil'))); return; }); +DefColumnType('r', sub { + $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil'))); return; }); + +DefColumnType('p{Dimension}', sub { + $LaTeXML::BUILD_TEMPLATE->addColumn( + before => Tokens(T_CS('\vtop'), T_BEGIN, T_CS('\hbox'), + T_LETTER('t'), T_LETTER('o'), $_[1]->revert, T_CS('\relax'), + T_BEGIN), + after => Tokens(T_END, T_END), + vattach => 'top', + align => 'justify', + ); return; }); + +DefColumnType('*{Number}{}', sub { + my ($gullet, $n, $pattern) = @_; + map { $pattern->unlist } 1 .. $n->valueOf; }); + +DefColumnType('@{}', sub { + my ($gullet, $filler) = @_; + $LaTeXML::BUILD_TEMPLATE->disableIntercolumn; + $LaTeXML::BUILD_TEMPLATE->addBetweenColumn($filler->unlist); + $LaTeXML::BUILD_TEMPLATE->disableIntercolumn; + return; }); + +#====================================================================== +# Table Line endings +#---------------------------------------------------------------------- +# \cr c is a visible command which ends one row in a table. +# \crcr c is an alternate to \cr. +# \everycr pt holds tokens inserted after every \cr or nonredundent \crcr. +#\tabskip pg is optional glue put between columns in a table. + +DefConstructorI('\cr', undef, "\n"); +DefConstructorI('\crcr', undef, "\n"); +# These are useful for reversion of higher-level macros that use alignment +# internally, but don't use explicit &,\cr in the user markup +DefConstructorI('\hidden@cr', undef, "\n", alias => ''); +DefConstructorI('\hidden@crcr', undef, "\n", alias => ''); +DefConstructorI('\hidden@align', undef, "", alias => ''); + +DefRegister('\everycr' => Tokens()); +DefRegister('\tabskip' => Glue(0)); + +#====================================================================== +# Aligment exceptions +#---------------------------------------------------------------------- +# \noalign c inserts vertical mode material after a \cr in a table. +# \omit c is used in the body of a table to change an entry's template from the one in the preamble. +# \span c combines adjacent entries in a table into a single entry. + +# Handled directly in alignments, but must be defined as non-macros +DefPrimitiveI('\noalign', undef, sub { + $_[0]->bgroup; + Error('unexpected', '\noalign', $_[0], "\\noalign cannot be used here"); + Let(T_ALIGN, T_CS('\relax')); + Let(T_CS('\noalign'), T_CS('\relax')); + Let(T_CS('\omit'), T_CS('\relax')); + Let(T_CS('\span'), T_CS('\relax')); + return; }); +DefPrimitiveI('\omit', undef, sub { + Error('unexpected', '\omit', $_[0], "\\omit cannot be used here"); + $_[0]->bgroup; + Let(T_ALIGN, T_CS('\relax')); + Let(T_CS('\noalign'), T_CS('\relax')); + Let(T_CS('\omit'), T_CS('\relax')); + Let(T_CS('\span'), T_CS('\relax')); + return; }); +DefPrimitiveI('\span', undef, sub { + $_[0]->bgroup; + Error('unexpected', '\span', $_[0], "\\span cannot be used here"); + Let(T_ALIGN, T_CS('\relax')); + Let(T_CS('\noalign'), T_CS('\relax')); + Let(T_CS('\omit'), T_CS('\relax')); + Let(T_CS('\span'), T_CS('\relax')); + return; }); + +#====================================================================== +# Horizontal alignments +#---------------------------------------------------------------------- +# \halign c begins the horizontal alignment of material (i.e., makes a table containing rows). + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Now, for \halign itself +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# See \@@LTX@noalign for some \noalign cases +# See \@multicolumn for cases of \span,\omit +# See alignmentBindings for default bindings +# But also see others for different handling of (eg) open@row, etc. +# Probably we have to handle these cases by more generic default code +# and appropriate tweaks of alignment data???? + +# Algorithm: +# open@alignment +# Loop while read_column +#====================================================================== +DefConstructor('\halign BoxSpecification', + "#alignment", + reversion => sub { + my ($whatsit, $spec) = @_; + my $template = $whatsit->getProperty('template'); + my $alignment = $whatsit->getProperty('alignment'); + Tokens(T_CS('\halign'), Revert($spec), T_BEGIN, Revert($template), T_CS('\cr'), + Revert($alignment), T_END); }, + bounded => 1, + # sizer => '#1', + sizer => sub { $_[0]->getProperty('alignment')->getSize; }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + $stomach->bgroup; # This will be closed by the \halign's closing } (or will it?) + my $template = parseHAlignTemplate($stomach->getGullet, $whatsit); + my $spec = $whatsit->getArg(1); + alignmentBindings($template, undef, + attributes => { width => orNull(GetKeyVal($spec, 'to')) }); + digestAlignmentBody($stomach, $whatsit); + $stomach->egroup; + $LaTeXML::ALIGN_STATE--; # Balance the opening { OUTSIDE of the masking of ALIGN_STATE + return; }); + +# Parse an \halign style alignment template from Gullet +sub parseHAlignTemplate { + my ($gullet, $whatsit) = @_; + my $t = $gullet->readNonSpace; + Error('expected', '\bgroup', $gullet, "Missing \\halign box") unless $t->defined_as(T_BEGIN); + my $before = 1; # true if we're before a # in current column + my @pre = (); + my @post = (); + my @cols = (); + my $repeated = 0; + my @nonreps = (); + my $tabskip = LookupRegister('\tabskip'); + my $nexttabskip = $tabskip; + my @tokens = (); + ## Only expand certain things; See TeX book p.238 + local $LaTeXML::ALIGN_STATE = 1000000; + while ($t = $gullet->readToken) { + my $cc = $t->getCatcode; + if ($t->equals(T_CS('\tabskip'))) { # Read the tabskip assignment + $gullet->readKeyword('='); + $nexttabskip = $gullet->readGlue; } + elsif ($t->equals(T_CS('\span'))) { # ex-span-ded next token. + $gullet->unread($gullet->readXToken(0)); } + elsif ($cc == CC_PARAM) { # Found the template's column slot + $before = 0; + push(@tokens, $t); } + elsif (($cc == CC_ALIGN) + || $t->equals(T_CS('\cr')) || $t->equals(T_CS('\crcr'))) { # End the column + if ($before) { # Leading & ? + $repeated = 1; + @nonreps = @cols; @cols = (); } # A & while we're before a column means Repeated columns + else { # Finished column spec; add it + ## How should we be handling tabskip? An attribute on the cell or spacing? + push(@cols, { + tabskip => $tabskip, + before => Tokens(beforeCellUnlist(Tokens(@pre))), + after => Tokens(afterCellUnlist(Tokens(@post))) }); + $tabskip = $nexttabskip; + @pre = @post = (); $before = 1; } + last unless $cc == CC_ALIGN; + push(@tokens, $t); } + elsif ($before) { # Other random tokens go into the column's pre-template + push(@pre, $t) if @pre || ($cc != CC_SPACE); + push(@tokens, $t); } + else { # Or the post-template + push(@post, $t) if @post || ($cc != CC_SPACE); + push(@tokens, $t); } } + # Now create & return the template object + my $template = LaTeXML::Core::Alignment::Template->new( + ($repeated + ? (columns => [@nonreps], repeated => [@cols]) + : (columns => [@cols])), + tokens => [@tokens]); + $whatsit->setProperty(template => $template); + return $template; } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# And the general alignment processing. +# If the Template is appropriately constructed, either by \halign or various \begin{tabular} +# the body of the alignment is processed the same way. + +sub alignmentBindings { + my ($template, $mode, %properties) = @_; + $mode = LookupValue('MODE') unless $mode; + my $ismath = $mode =~ /math$/; + my $container = ($ismath ? 'ltx:XMArray' : 'ltx:tabular'); + my $rowtype = ($ismath ? 'ltx:XMRow' : 'ltx:tr'); + my $coltype = ($ismath ? 'ltx:XMCell' : 'ltx:td'); + my $alignment = LaTeXML::Core::Alignment->new( + template => $template, + openContainer => sub { $_[0]->openElement($container, @_[1 .. $#_]); }, + closeContainer => sub { $_[0]->closeElement($container); }, + openRow => sub { $_[0]->openElement($rowtype, @_[1 .. $#_]); }, + closeRow => sub { $_[0]->closeElement($rowtype); }, + openColumn => sub { $_[0]->openElement($coltype, @_[1 .. $#_]); }, + closeColumn => sub { $_[0]->closeElement($coltype); }, + isMath => $ismath, + properties => {%properties}); + AssignValue(Alignment => $alignment); + Debug("Halign $alignment: New " . $template->show) if $LaTeXML::DEBUG{halign}; + Let(T_MATH, ($ismath ? '\@dollar@in@mathmode' : '\@dollar@in@textmode')); + return; } + +DefMacroI('\@row@before', undef, undef); +DefMacroI('\@row@after', undef, undef); +DefMacroI('\@column@before', undef, undef); +DefMacroI('\@column@after', undef, undef); + +sub pRevert { + my ($arg) = @_; + local $LaTeXML::DUAL_BRANCH = 'presentation'; + return Revert($arg); } + +sub cRevert { + my ($arg) = @_; + local $LaTeXML::DUAL_BRANCH = 'content'; + return Revert($arg); } + +use constant T_close_alignment => T_CS('\@close@alignment'); + +sub digestAlignmentBody { + my ($stomach, $whatsit) = @_; + my $gullet = $stomach->getGullet; + local $LaTeXML::ALIGN_STATE = 0; + # Now read & digest the body. + # Note that the body MUST end with a \cr, and that we've made Special Arrangments + # with \alignment@cr to recognize the end of the \halign + my $alignment = LookupValue('Alignment'); + local $LaTeXML::READING_ALIGNMENT = $alignment; + if (!$alignment) { + Error('missing', 'alignment', $stomach, "There is no open alignment structure here"); + return; } + $whatsit->setProperty(alignment => $alignment); + $alignment->setBody($whatsit); + Debug("Halign $alignment: BODY Processing...") if $LaTeXML::DEBUG{halign}; + my $lastwascr = undef; + my @reversion = (); + my @creversion = (); + while (1) { + my ($cell, $next, $type, $hidden) = digestAlignmentColumn($stomach, $alignment, $lastwascr); + Debug("Halign $alignment: BODY got CELL" + . "[" . $alignment->currentRowNumber . "," . $alignment->currentColumnNumber . "]" + . ToString($cell) . " ended at " . Stringify($next)) if $LaTeXML::DEBUG{halign}; + if (!$cell) { + Debug("Halign $alignment: BODY DONE!") if $LaTeXML::DEBUG{halign}; + last; } + if ($cell) { + push(@reversion, trimColumnTemplate($alignment, pRevert($cell))); + push(@creversion, trimColumnTemplate($alignment, cRevert($cell))); } + extractAlignmentColumn($alignment, $cell); + $lastwascr = undef; + if (!$type && (!$next + || $next->defined_as(T_END) # End of alignment + || $next->defined_as(T_close_alignment))) { # End of alignment + $alignment->endRow(); + last; } + elsif ($type eq 'align') { + $alignment->endColumn(); + if (!$hidden) { + push(@reversion, $next); # and record the & + push(@creversion, $next); } } # and record the & + elsif ($type eq 'insert') { + $alignment->endColumn(); } + elsif (($type eq 'cr') || ($type eq 'crcr')) { + $alignment->endRow(); + if (!$hidden) { + push(@reversion, $next); + push(@creversion, $next); } + elsif ($type eq 'cr') { + my $arg = $stomach->digest($gullet->readArg()); + push(@reversion, pRevert($arg)); + push(@creversion, cRevert($arg)); } + elsif ($type eq 'crcr') { } + $lastwascr = 1; } # Note, in case next is \crcr + elsif ($next) { + Error('unexpected', $next, $stomach, "Column ended with " . Stringify($next)); } } + $alignment->endRow(); + $alignment->setReversion(Tokens(@reversion)); + $alignment->setContentReversion(Tokens(@creversion)); + Debug("Halign $alignment: BODY DONE!\n" + . "=> " . join(',', map { Stringify($_); } @reversion)) if $LaTeXML::DEBUG{halign}; + return; } + +use constant T_crcr => T_CS('\crcr'); +use constant T_hidden_crcr => T_CS('\hidden@crcr'); +use constant T_omit => T_CS('\omit'); +use constant T_noalign => T_CS('\noalign'); +use constant T_hidden_noalign => T_CS('\hidden@noalign'); + +# Read & digest an alignment column's data, +# accommodating the current template and any special cs's +# Returns the column's digested boxes, the ending token, and it's alignment type. +sub digestAlignmentColumn { + my ($stomach, $alignment, $lastwascr) = @_; + my $gullet = $stomach->getGullet; + my $ismath = $STATE->lookupValue('IN_MATH'); + local @LaTeXML::LIST = (); + # Scan for leading \omit, skipping over (& saving) \hline. + Debug("Halign $alignment: COLUMN starting scan " + . "(" . ($ismath ? " math" : " text") . ")") if $LaTeXML::DEBUG{halign}; + my $token; + my $spanning = 0; + while (1) { # Outer loop; collects 1 column (possibly multiple spans) return from within! + ## Scan till we get something NOT \omit, \noalign + while ($token = $gullet->readXToken(0)) { + if ($token->equals(T_SPACE) # Skip leading space. + || $token->equals(T_CS('\par')) # Skip or blank line(?) + || ($lastwascr && # Or \crcr following a \cr + ($token->defined_as(T_crcr) || $token->defined_as(T_hidden_crcr)))) { + } + elsif ($token->defined_as(T_omit)) { # \omit removes template for this column. + Debug("Halign $alignment: OMIT at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; + $alignment->startRow() unless $$alignment{in_row}; + $alignment->omitNextColumn; } + elsif ($token->defined_as(T_noalign)) { # \puts something in vertical list + Debug("Halign $alignment: noalign at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; + $alignment->endRow() if $$alignment{in_row}; + $alignment->startColumn(1); + $alignment->lastColumn; + my $r = $stomach->digest($gullet->readArg); + $alignment->endRow(); + return ($r, T_CS('\cr'), 'cr'), undef; } # Pretend this is a whole row??? + elsif ($token->defined_as(T_hidden_noalign)) { # \puts something in vertical list + Debug("Halign $alignment: COLUMN invisible noalign") if $LaTeXML::DEBUG{halign}; + push(@LaTeXML::LIST, $stomach->invokeToken($token)); } + else { + last; } } + Debug("Halign $alignment: COLUMN end scan at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; + if (!$token || $token->defined_as(T_END) || $token->defined_as(T_close_alignment)) { + return (undef, $token, undef, undef); } + # Next column, unless spanning (then combine columns) + if ($spanning) { + $spanning = 0; + $alignment->nextColumn; } + else { + $alignment->startColumn(); } + # Push before template, Marker and put the token back + Debug("Halign $alignment: COLUMN preload at " + . Stringify(Tokens($alignment->getColumnBefore, T_MARKER('before-column'), $token))) + if $LaTeXML::DEBUG{halign}; + $gullet->unread($alignment->getColumnBefore, T_MARKER('before-column'), $token); + while ($token = $gullet->readXToken(0)) { + my ($atoken, $type, $hidden) = $gullet->isColumnEnd($token); + if ($atoken) { + if ($type eq 'span') { # next column, but continue accumulating + Debug("Halign $alignment: COLUMN span") if $LaTeXML::DEBUG{halign}; + $spanning = 1; + last; } + else { + Debug("Halign $alignment: COLUMN ended with " . Stringify($token) . "\n" + . " => " . ToString(List(@LaTeXML::LIST))) if $LaTeXML::DEBUG{halign}; + return (List(@LaTeXML::LIST, mode => ($ismath ? 'math' : 'text')), + $token, $type, $hidden); } } + elsif ($token->defined_as(T_hidden_noalign)) { # \puts something in vertical list + Debug("Halign $alignment: COLUMN invisible noalign") if $LaTeXML::DEBUG{halign}; + push(@LaTeXML::LIST, $stomach->invokeToken($token)); } + else { # Else, we're getting some actual content for the column + Debug("Halign $alignment: COLUMN invoking " . Stringify($token)) if $LaTeXML::DEBUG{halign}; + push(@LaTeXML::LIST, $stomach->invokeToken($token)); + Debug("Halign $alignment: COLUMN " . Stringify($token) . " ==> " . Stringify(List(@LaTeXML::LIST))) + if $LaTeXML::DEBUG{halign}; + } } } + return; } + +# This attempts to trim off the column template parts from contents of the full column, +# leaving only the author supplied part for a sensible reversion. +# It's not nearly clever enough, given that macros can be in the template, +# but works surprisingly well so far. +# A better alternative might be based on sneaking some Marker tokens/boxes through +# but they would likely interfere with the macros tehmselves. +sub trimColumnTemplate { + my ($alignment, @tokens) = @_; + return Tokens(@tokens) if $alignment->currentRow->{pseudorow}; + my @pre = $alignment->getColumnBefore->unlist; + my @post = $alignment->getColumnAfter->unlist; + Debug("Halign $alignment: COLUMN Compare:\n" + . " Column: " . ToString(Tokens(@tokens)) . "\n" + . " Before: " . ToString(Tokens(@pre)) . "\n" + . " After : " . ToString(Tokens(@post)) . "\n") if $LaTeXML::DEBUG{halign}; + while (scalar(@pre) && scalar(@tokens)) { + my $t = shift(@pre); + if ($t->equals($tokens[0])) { + shift(@tokens); } } + while (scalar(@post) && scalar(@tokens)) { + my $t = pop(@post); + if ($t->equals($tokens[-1])) { + pop(@tokens); } } + Debug(" Trimmed: " . ToString(Tokens(@tokens))) if $LaTeXML::DEBUG{halign}; + return Tokens(@tokens); } + +# Given the boxes for an alignment cell, +# extract & remove the various fills and rules from the ends to annotate the cell structure +sub extractAlignmentColumn { + my ($alignment, $boxes) = @_; + return () unless $alignment; # ?? + # Note: $n0,$n1 is a VERY round-about way of tracking the column spanning! + my $ismath = $STATE->lookupValue('IN_MATH'); + my $n0 = (LookupValue('alignmentStartColumn') || 0) + 1; + my $n1 = $alignment->currentColumnNumber; + my $colspec = $alignment->getColumn($n0); + my $align = $$colspec{align} || 'left'; + my $border = ''; + # Peel off any boxes from both sides until we get the "meat" of the column. + # from this we can establish borders, alignment and emptiness. + # But we, of course, immediately put them back... + my @boxes = $boxes->unlist; + my @saveleft = (); + my @saveright = (); + my (@lspaces, @rspaces); + if (my $skip = $$colspec{tabskip}) { + push(@lspaces, Digest(Tokens(T_CS('\hskip'), $skip->revert, T_CS('\relax')))); } + while (@boxes) { + if (ref $boxes[0] eq 'LaTeXML::Core::List') { + unshift(@boxes, shift(@boxes)->unlist); } + elsif ($boxes[0]->getProperty('isFill')) { + $align = 'right'; + shift(@boxes); + last; } + elsif ($boxes[0]->getProperty('isVerticalRule')) { + $border .= 'l'; + if (my $prev = $alignment->getColumn($n0 - 1)) { # space before | ? move to previous column + $$prev{rspaces} = List(($$prev{rspaces} || ()), @lspaces) if @lspaces; } + @lspaces = (); # then discard + shift(@boxes); } + elsif ($boxes[0]->getProperty('isSpace')) { + push(@lspaces, shift(@boxes)); } + elsif ($boxes[0]->getProperty('isHorizontalRule') + || $boxes[0]->getProperty('alignmentSkippable') + || (ref $boxes[0] eq 'LaTeXML::Core::Comment')) { + push(@saveleft, shift(@boxes)); } + else { + last; } } + while (@boxes) { + if (ref $boxes[-1] eq 'LaTeXML::Core::List') { + push(@boxes, pop(@boxes)->unlist); } + elsif ($boxes[-1]->getProperty('isFill')) { + if ($align eq 'right') { $align = 'center'; } + pop(@boxes); + last; } + elsif ($boxes[-1]->getProperty('isVerticalRule')) { + $border .= 'r'; + @rspaces = (); # discard spacing after rule!!! (should save for next column?) + pop(@boxes); } + elsif ($boxes[-1]->getProperty('isSpace')) { + unshift(@rspaces, pop(@boxes)); } + elsif ($boxes[-1]->getProperty('isHorizontalRule') + || $boxes[-1]->getProperty('alignmentSkippable') + || (ref $boxes[-1] eq 'LaTeXML::Core::Comment')) { + unshift(@saveright, pop(@boxes)); } + else { + last; } } + delete $$colspec{width} unless $align eq 'justify'; + # Replacing boxes with the fil padding & vertical rules stripped off + @boxes = (@saveleft, @boxes, @saveright); + $boxes = List(@boxes, mode => ($boxes->isMath ? 'math' : 'text')); + # record relevant info in the Alignment. + $$colspec{align} = $align; + $$colspec{border} = $border = ($$colspec{border} || '') . $border; + $$colspec{boxes} = $boxes; + $$colspec{lspaces} = List(@lspaces) if @lspaces; + $$colspec{rspaces} = List(@rspaces) if @rspaces; + $$colspec{colspan} = $n1 - $n0 + 1; + + if ($$alignment{in_tabular_head} || $$alignment{in_tabular_foot}) { + $$colspec{thead}{column} = 1; } + for (my $i = $n0 + 1 ; $i <= $n1 ; $i++) { + my $c = $alignment->getColumn($i); + $$c{skipped} = 1 if $c; } + Debug("Halign $alignment: INSTALL column " . join(',', map { $_ . "=" . ToString($$colspec{$_}); } sort keys %$colspec)) if $LaTeXML::DEBUG{halign}; + return $boxes; } + +#====================================================================== +# Vertical alignments +#---------------------------------------------------------------------- +# \valign c begins the vertical alignment of material (i.e., makes a table containing columns). + +# Implement ??? +# DefMacro('\vrule','\relax'); +DefMacro('\valign', ''); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Extra bits that are partly infrastructure, partly TeX plain, partly LaTeX !?!?!?!?! + +######### +# Support for \\[dim] .... TO BE WORKED OUT! +# NOTE that this does NOT skip spaces before * or []!!!!! +# As if: \@alignment@newline OptionalMatch:* [Dimension] +# Read arguments for \\, namely * and/or [Dimension] +# BUT optionally do it while skipping spaces (latex style) or not (ams style) +sub readNewlineArgs { + my ($gullet, $skipspaces) = @_; + my $alignment = $STATE->lookupValue('Alignment'); + local $LaTeXML::ALIGN_STATE = 1000000; + $gullet->skipSpaces if $skipspaces; + my $next = $gullet->readToken; + my ($star, $optional); + if ($next && $next->equals(T_OTHER('*'))) { + $star = 1; + $gullet->skipSpaces if $skipspaces; + $next = $gullet->readToken; } + if ($next && $next->equals(T_OTHER('['))) { + $optional = $gullet->readUntil(T_OTHER(']')); + $next = undef; } + $gullet->unread($next) if $next; + return ($star, $optional); } + +# VERY tricky (and mostly Wrong). +# The issue is for \\ to look ahead for * and [], +# Eventually we'll expand into \cr (which should be preceded by the RHS of the template) +# BUT it should NOT trigger the template if it bumps into a & +# which happens when the 1st column of an alignment is empty. +# In proper LaTeX this is inhibited by a curious construct +# {\ifnum0='} +# and possibly by proper tracking of a Master Counter !?!?!? +# But we're not there (yet) + +# This is the internal macro for \\[dim] used by LaTeX for various arrays, tabular, etc +DefMacroI('\@alignment@newline', undef, sub { + my ($gullet) = @_; + my ($star, $optional) = readNewlineArgs($gullet, 1); + return (T_CS('\hidden@cr'), T_BEGIN, + ($optional + ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END) + : T_CS('\@alignment@newline@marker')), + T_END); }); +# However, the above will skip spaces --AND a newline! -- looking for [], +# which is kinda weird in math, since there may be a reasonable math [ in the 1st column! +# AMS kindly avoids that, by using a special version of \\ +DefMacroI('\@alignment@newline@noskip', undef, sub { + my ($gullet) = @_; + my ($star, $optional) = readNewlineArgs($gullet); + return (T_CS('\hidden@cr'), T_BEGIN, + ($optional + ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END) + : T_CS('\@alignment@newline@marker')), + T_END); }); + +# These are the markers that produce \\ in the reversion, +# and (eventually will) add vertical space to the row! +DefConstructor('\@alignment@newline@marker', '', + reversion => Tokens(T_CS("\\\\"), T_CR)); +# AND add the spacing to the alignment!!! +DefConstructor('\@alignment@newline@markertall {Dimension}', '', + afterDigest => sub { + if (my $alignment = LookupValue('Alignment')) { + $alignment->currentRow->{padding} = $_[1]->getArg(1); } + return; }, + reversion => sub { + Tokens(T_CS("\\\\"), T_OTHER('['), Revert($_[1]), T_OTHER(']'), T_CR); }); + +DefMacroI('\tabularnewline', undef, '\cr'); # ??? + +# \lx@intercol is our replacement for LaTeX's \@acol which places intercolumn space in tabular +# (but NOT used by TeX's \halign!) +DefMacro('\lx@intercol', ''); +# Candidates for binding \lx@intercol for LaTeX tabular or math arrays +# These provide "padding" of half tabcolsep, since added before & after columns +# [these could be \hskip\tabcolsep, but the expansion confounds trimColumnSpec] +DefConstructor('\lx@text@intercol', sub { + my ($document, %props) = @_; + $document->absorb(DimensionToSpaces($props{width})); }, + reversion => '\lx@intercol', + properties => sub { + my $defn; + my $w = (($defn = $STATE->lookupDefinition(T_CS('\tabcolsep'))) && $defn->isRegister + ? $defn->valueOf : Dimension(0)); + (width => $w, isSpace => 1); }); +DefConstructor('\lx@math@intercol', "", # mspace ??? + reversion => '\lx@intercol', + properties => sub { + my $defn; + my $w = (($defn = $STATE->lookupDefinition(T_CS('\arraycolsep'))) && $defn->isRegister + ? $defn->valueOf : Dimension(0)); + (width => $w, isSpace => 1); }); + +#====================================================================== +# Various decorations within alignments, rules, headers, etc + +# Like \noalign, takes an arg; handled within alignment processing. +# But doesn't create a pseudo-row (??? Or does it?; is it still needed?) +DefConstructor('\hidden@noalign{}', '#1', + reversion => '', + properties => sub { + # Sometimes, we're smuggling stuff that needs to be carried into the XML. + my $preserve = grep { $_->getProperty('alignmentPreserve'); } $_[1]->unlist; + (alignmentSkippable => 1, alignmentPreserve => $preserve); }); + +DefMacro('\hline', '\noalign{\@@alignment@hline}'); +DefConstructorI('\@@alignment@hline', undef, '', + afterDigest => sub { + if (my $alignment = LookupValue('Alignment')) { + $alignment->addLine('t'); } + return; }, + properties => { isHorizontalRule => 1 }, + sizer => 0, alias => '\hline'); + +DefMacroI('\@tabular@begin@heading', undef, sub { + my $alignment = LookupValue('Alignment'); + $$alignment{in_tabular_head} = 1; + return; }); +DefMacroI('\@tabular@end@heading', undef, sub { + my $alignment = LookupValue('Alignment'); + $$alignment{in_tabular_head} = 0; + return; }); + +#====================================================================== +# Math mode in alignment +# Special forms for $ appearing within alignments. +# Note that $ within a math alignment (eg array environment), +# switches to text mode! There's no $$ for display math. + +# This is the "normal" case: $ appearing with an alignment that is in text mode. +# It's just like regular $, except it doesn't look for $$ (no display math). +DefPrimitiveI('\@dollar@in@textmode', undef, sub { + no warnings 'recursion'; + $_[0]->invokeToken(T_CS((LookupValue('IN_MATH') ? '\@@ENDINLINEMATH' : '\@@BEGININLINEMATH'))); }); + +# This one is for $ appearing within an alignment that's already math. +# This should switch to text mode (because it's balancing the hidden $ +# wrapping each alignment cell!!!!!!) +# However, it should be like a normal $ if it's inside something like \mbox +# that itself makes a text box!!!!!! +# Thus, we need to know at what boxing level we started the last math or text. +# This is all complicated by the need to know _how_ we got into or out of math mode! +# Gawd, this is awful! +# NOTE: Probably the most "Right" thing to do would be to process +# alignments in text mode only (like TeX), sneaking $'s in where needed, +# but then afterwards, morph them into math arrays? +# This would be complicated by the need to hide these $ from untex. +DefPrimitiveI('\@dollar@in@mathmode', undef, sub { + my ($stomach) = @_; + my $level = $stomach->getBoxingLevel; + if ((LookupValue('MATH_ALIGN_$_BEGUN') || 0) == $level) { # If we're begun making _something_ with $. + my @l = (); + if (LookupValue('IN_MATH')) { # But we're somehow in math? + @l = $stomach->invokeToken(T_CS('\@@ENDINLINEMATH')); } + else { + @l = $stomach->invokeToken(T_CS('\@@ENDINLINETEXT')); } + AssignValue('MATH_ALIGN_$_BEGUN' => 0); # Reset this AFTER finishing the something + @l; } + else { + AssignValue('MATH_ALIGN_$_BEGUN' => $level + 1); # Note that we've begun something + if (LookupValue('IN_MATH')) { # If we're "still" in math + $stomach->invokeToken(T_CS('\@@BEGININLINETEXT')); } + else { + $stomach->invokeToken(T_CS('\@@BEGININLINEMATH')); } } }); + +DefConstructorI('\@@BEGININLINETEXT', undef, + "" + . "#body" + . "", + alias => T_MATH, beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); +DefConstructorI('\@@ENDINLINETEXT', undef, "", alias => T_MATH, + beforeDigest => sub { $_[0]->endMode('text'); }); + +DefPrimitiveI('\@LTX@nonumber', undef, sub { AssignValue(EQUATIONROW_NUMBER => 0, 'global'); }); + +DefMacroI('\hidewidth', undef, Tokens()); + +#====================================================================== +# Multicolumn support +DefMacro('\multispan{Number}', sub { + my ($gullet, $span) = @_; + $span = $span->valueOf; + (T_CS('\omit'), map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1); }); + +DefRegisterI('\@alignment@ncolumns', undef, Dimension(0), + getter => sub { + if (my $alignment = LookupValue('Alignment')) { + Number(scalar($alignment->getTemplate->columns)); } + else { Number(0); } }); +DefRegisterI('\@alignment@column', undef, Dimension(0), + getter => sub { + if (my $alignment = LookupValue('Alignment')) { + Number($alignment->currentColumnNumber); } + else { Number(0); } }); + +DefMacro('\@multicolumn {Number} AlignmentTemplate {}', sub { + my ($gullet, $span, $template, $tokens) = @_; + my $column = $template->column(1); + $span = $span->valueOf; + # First part, like \multispan + (T_CS('\omit'), (map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1), + # Next part, just put the template in-line, since it's only used once. + ($column ? beforeCellUnlist($$column{before}) : ()), + $tokens->unlist, + ($column ? afterCellUnlist($$column{after}) : ())); }); + +DefConditionalI('\if@in@alignment', undef, sub { LookupValue('Alignment'); }); + +DefPrimitive('\@alignment@bindings AlignmentTemplate []', sub { + my ($stomach, $template, $mode) = @_; + alignmentBindings($template, $mode); }); + +# Utility, not really TeX, but used by LaTeX, AmSTeX... +# Convert a vertical positioning, optional argument. +# t = "top", b = "bottom"; default is "middle". +# Note that the default for vattach attribute is "baseline". +sub translateAttachment { + my ($pos) = @_; + $pos = ($pos ? ToString($pos) : ''); + return ($pos eq 't' ? 'top' : ($pos eq 'b' ? 'bottom' : 'middle')); } # undef meaning 'baseline' + +# This trims trailing whitespace from the current digested list, +# for use within latex tabular-style columns. +# But note that \halign does NOT remove this trailing space! +DefPrimitiveI('\lx@column@trimright', undef, sub { + my $box; + my @save = (); + my $s; + while ($box = $LaTeXML::LIST[-1]) { + if ($box->getProperty('alignmentSkippable') + || $box->getProperty('isFill') + || IsEmpty($box)) { + push(@save, pop(@LaTeXML::LIST)); } + elsif (ref $box eq 'LaTeXML::Core::List') { # Unwrap and continue + pop(@LaTeXML::LIST); + push(@LaTeXML::LIST, $box->unlist); } + elsif ((ref $box eq 'LaTeXML::Core::Box') + && defined($s = $box->getString) && ($s =~ /^\s*$/)) { + pop(@LaTeXML::LIST); } # remove any box containing only spaces + else { + last; } } + push(@LaTeXML::LIST, @save); + return; }); + +use constant T_hfil => T_CS('\hfil'); +# Yet more special case hacking. Sometimes the order of tokens works for +# TeX, but confuses us... In particular the order of $ and \hfil! +sub beforeCellUnlist { + my ($tokens) = @_; + return () unless $tokens; + my @toks = $tokens->unlist; + my @new = (); + while (my $t = shift(@toks)) { + if ($t->defined_as(T_MATH) && @toks && $toks[0]->defined_as(T_hfil)) { + push(@new, shift(@toks)); unshift(@toks, $t); } + else { + push(@new, $t); } } + return @new; } + +sub afterCellUnlist { + my ($tokens) = @_; + return () unless $tokens; + my @toks = $tokens->unlist; + my @new = (); + while (my $t = pop(@toks)) { + if ($t->defined_as(T_MATH) && @toks && $toks[-1]->defined_as(T_hfil)) { + unshift(@new, pop(@toks)); push(@toks, $t); } + else { + unshift(@new, $t); } } + return @new; } + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Package/eTeX.pool.ltxml b/lib/LaTeXML/Engine/eTeX.pool.ltxml similarity index 100% rename from lib/LaTeXML/Package/eTeX.pool.ltxml rename to lib/LaTeXML/Engine/eTeX.pool.ltxml diff --git a/lib/LaTeXML/Package/expl3.pool.ltxml b/lib/LaTeXML/Engine/expl3.pool.ltxml similarity index 100% rename from lib/LaTeXML/Package/expl3.pool.ltxml rename to lib/LaTeXML/Engine/expl3.pool.ltxml diff --git a/lib/LaTeXML/Package/pdfTeX.pool.ltxml b/lib/LaTeXML/Engine/pdfTeX.pool.ltxml similarity index 100% rename from lib/LaTeXML/Package/pdfTeX.pool.ltxml rename to lib/LaTeXML/Engine/pdfTeX.pool.ltxml diff --git a/lib/LaTeXML/Engine/plain.pool.ltxml b/lib/LaTeXML/Engine/plain.pool.ltxml new file mode 100644 index 000000000..a59d6ae06 --- /dev/null +++ b/lib/LaTeXML/Engine/plain.pool.ltxml @@ -0,0 +1,1656 @@ +# -*- mode: Perl -*- +# /=====================================================================\ # +# | plain | # +# | Core TeX Implementation for LaTeXML | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Package::Pool; +use strict; +use warnings; +use LaTeXML::Package; +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#********************************************************************** +# Plain; Extracted from Appendix B. +#********************************************************************** + +# Remember, we're assigning a NUMBER (codepoint) to a CHARACTER! +foreach my $letter (ord('A') .. ord('Z')) { + $STATE->assignLCcode(chr($letter), $letter + 0x20, 'global'); + $STATE->assignUCcode(chr($letter), $letter, 'global'); + $STATE->assignLCcode(chr($letter + 0x20), $letter + 0x20, 'global'); + $STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); } + +DefRegister('\magnification' => Number(1000)); + +Let('\bye', '\end'); + +# Most of these are ignored, but... +DefMacro('\tracingall', + '\tracingonline=1 \tracingcommands=2 \tracingstats=2' + . ' \tracingpages=1 \tracingoutput=1 \tracinglostchars=1' + . ' \tracingmacros=2 \tracingparagraphs=1 \tracingrestores=1' + . ' \showboxbreadth=\maxdimen \showboxdepth=\maxdimen \errorstopmode'); +DefMacroI('\tracingnone', undef, Tokens()); +DefMacroI('\hideoutput', undef, Tokens()); + +#====================================================================== +# \choose & friends, also need VERY special argument handling + +DefMacro('\choose', + '\lx@generalized@over{\choose}{meaning=binomial,thickness=0pt,left=\@left(,right=\@right)}'); +DefMacro('\brace', + '\lx@generalized@over{\brace}{thickness=0pt,left=\@left\{,right=\@right\}}'); +DefMacro('\brack', + '\lx@generalized@over{\brack}{thickness=0pt,left=\@left[,right=\@right]}'); + +#====================================================================== +# Special Characters. +# Try to give them some sense in math... +DefMacroI('\#', undef, '\ifmmode\lx@math@hash\else\lx@text@hash\fi', protected => 1); +DefMacroI('\&', undef, '\ifmmode\lx@math@amp\else\lx@text@amp\fi', protected => 1); +DefMacroI('\%', undef, '\ifmmode\lx@math@percent\else\lx@text@percent\fi', protected => 1); +DefMacroI("\\\$", undef, '\ifmmode\lx@math@dollar\else\lx@text@dollar\fi', protected => 1); +DefMacroI('\_', undef, '\ifmmode\lx@math@underscore\else\lx@text@underscore\fi', protected => 1); +DefPrimitiveI('\lx@text@hash', undef, '#', alias => '\#'); +DefPrimitiveI('\lx@text@amp', undef, '&', alias => '\&'); +DefPrimitiveI('\lx@text@percent', undef, '%', alias => '\%'); +DefPrimitiveI('\lx@text@dollar', undef, "\$", alias => "\\\$"); +DefPrimitiveI('\lx@text@underscore', undef, '_', alias => '\_'); +DefMathI('\lx@math@hash', undef, '#', alias => '\#'); +DefMathI('\lx@math@amp', undef, '&', role => 'ADDOP', meaning => 'and', alias => '\&'); +DefMathI('\lx@math@percent', undef, '%', role => 'POSTFIX', meaning => 'percent', alias => '\%'); +DefMathI('\lx@math@dollar', undef, "\$", role => 'OPERATOR', meaning => 'currency-dollar', + alias => "\\\$"); +DefMathI('\lx@math@underscore', undef, '_', alias => '\_'); + +# Discretionary times; just treat as invisible ? +DefMathI('\*', undef, "\x{2062}", role => 'MULOP', name => '', meaning => 'times'); # INVISIBLE TIMES (or MULTIPLICATION SIGN = 00D7) + +# These 3 should have some `name' assigned ... but what??? + +#====================================================================== +# If an XMWrap (presumably from \mathop, \mathbin, etc) +# has multiple children, ALL are XMTok, within a restricted set of roles, +# we want to concatenate the text content into a single XMTok. +DefMathRewrite(xpath => 'descendant-or-self::ltx:XMWrap[' + # Only XMWrap's from the above class of operators + . '(@role="OP" or @role="BIGOP" or @role="RELOP" ' + . 'or @role="ADDOP" or @role="MULOP" or @role="BINOP" ' + . 'or @role="OPEN" or @role="CLOSE")' + . ' and count(child::*) > 1 ' + # with only XMTok as children with the roles in (roughly) the same set + . ' and not(child::*[local-name() != "XMTok"])' + . ' and not(ltx:XMTok[' + . '@role !="OP" and @role!="BIGOP" and @role!="RELOP" and @role!="METARELOP" ' + . 'and @role!="ADDOP" and @role!="MULOP" and @role!="BINOP" ' + . 'and @role!="OPEN" and @role!="CLOSE"' + . '])]', + replace => sub { + my ($document, $node) = @_; + my $replacement = $node->cloneNode(0); + my $content = $node->textContent; + $replacement->appendText($content); + $replacement->setName('ltx:XMTok'); + $document->getNode->appendChild($replacement); + }); + +#====================================================================== +# TeX's ligatures handled by rewrite regexps. +# Note: applied in reverse order of definition (latest defined applied first!) +# Note also, these area only applied in text content, not in attributes! +DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); }); +DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); }); + +sub nonTypewriter { + my ($font) = @_; + return ($font->getFamily ne 'typewriter'); } + +sub nonTypewriterT1 { + my ($font) = @_; + return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); } + +# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH}) +DefLigature(qr{--}, "\x{2013}", fontTest => \&nonTypewriter); # EN dash +DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter); # EM dash + +# Ligatures for doubled single left & right quotes to convert to double quotes +# [should ligatures be part of a font, in the first place? (it is in TeX!) +DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1); # double left quote +DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1); # double right quote +DefLigature(qr{\?\x{2018}}, UTF(0xBF), fontTest => \&nonTypewriterT1); # ? backquote +DefLigature(qr{!\x{2018}}, UTF(0xA1), fontTest => \&nonTypewriterT1); # ! backquote +# These ligatures are also handled by TeX. +# However, it appears that decent modern fonts in modern browsers handle these at that level. +# So it's likely not worth doing it at the conversion level, possibly adversely affecting search. +# DefLigature(qr{ff}, "\x{FB00}", fontTest => \&nonTypewriterT1); +# DefLigature(qr{fi}, "\x{FB01}", fontTest => \&nonTypewriterT1); +# DefLigature(qr{fl}, "\x{FB02}", fontTest => \&nonTypewriterT1); +# DefLigature(qr{ffi}, "\x{FB03}", fontTest => \&nonTypewriterT1); +# DefLigature(qr{ffl}, "\x{FB04}", fontTest => \&nonTypewriterT1); + +DefConstructor('\TeX', + "", + sizer => sub { (Dimension('1.9em'), Dimension('1.6ex'), Dimension('0.5ex')); }); +DefPrimitiveI('\i', undef, "\x{0131}"); # LATIN SMALL LETTER DOTLESS I +DefPrimitiveI('\j', undef, "\x{0237}"); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Alignment code +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#====================================================================== +# Low-level bits that appear within alignments or \halign + +# "Initialized" alignment; presets spacing, but since we're ignoring it anyway... +Let('\ialign', '\halign'); + +# Overlapping alignments ??? +DefMacro('\oalign{}', + '\@@oalign{\@start@alignment#1\@finish@alignment}'); +DefConstructor('\@@oalign{}', + '#1', + reversion => '\oalign{#1}', bounded => 1, mode => 'text', + beforeDigest => sub { alignmentBindings('l'); }); + +# This is actually different; the lines should lie ontop of each other. +# How should this be represented? +DefMacro('\ooalign{}', + '\@@ooalign{\@start@alignment#1\@finish@alignment}'); +DefConstructor('\@@ooalign{}', + '#1', + reversion => '\ooalign{#1}', bounded => 1, mode => 'text', + beforeDigest => sub { alignmentBindings('l'); }); + +DefConstructor('\buildrel Until:\over {}', + "" + . "" + . "#2" + . "#1" + . "", + properties => { scriptpos => sub { "mid" . $_[0]->getScriptLevel; } }); + +#====================================================================== +# TeX Book, Appendix B, p. 344 +#====================================================================== +RawTeX('\outer\def^^L{\par}'); +DefMacro('\dospecials', '\do\ \do\\\do\{\do\}\do\$\do\&\do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~'); + +#====================================================================== +# TeX Book, Appendix B, p. 345 + +RawTeX(<<'EoTeX'); + \chardef\active=13 + \chardef\@ne=1 + \chardef\tw@=2 + \chardef\thr@@=3 + \chardef\sixt@@n=16 + \chardef\@cclv=255 + \mathchardef\@cclvi=256 + \mathchardef\@m=1000 + \mathchardef\@M=10000 + \mathchardef\@MM=20000 + \countdef\m@ne=21\relax + \m@ne=-1 +EoTeX + +#====================================================================== +# TeX Book, Appendix B, p. 346 + +RawTeX(<<'EoTeX'); + \countdef\count@=255 + \toksdef\toks@=0 + \skipdef\skip@=0 + \dimendef\dimen@=0 + \dimendef\dimen@i=1 + \dimendef\dimen@ii=2 +\count10=22 % allocates \count registers 23, 24, ... +\count11=9 % allocates \dimen registers 10, 11, ... +\count12=9 % allocates \skip registers 10, 11, ... +\count13=9 % allocates \muskip registers 10, 11, ... +\count14=9 % allocates \box registers 10, 11, ... +\count15=9 % allocates \toks registers 10, 11, ... +\count16=-1 % allocates input streams 0, 1, ... +\count17=-1 % allocates output streams 0, 1, ... +\count18=3 % allocates math families 4, 5, ... +\count19=0 % allocates \language codes 1, 2, ... +\count20=255 % allocates insertions 254, 253, ... +\countdef\insc@unt=20 +\countdef\allocationnumber=21 +\countdef\m@ne=22 \m@ne=-1 +EoTeX +# Various \count's are set; should we? + +#====================================================================== +# TeX Book, Appendix B, p. 347 +DefPrimitive('\wlog{}', sub { + NoteLog(ToString(Expand($_[1]))); + return; }, + locked => 1); +# From plain.tex +DefPrimitive('\newcount DefToken', sub { + DefRegisterI($_[1], undef, Number(0), allocate => '\count'); }); +DefPrimitive('\newdimen DefToken', sub { + DefRegisterI($_[1], undef, Dimension(0), allocate => '\dimen'); }); +DefPrimitive('\newskip DefToken', sub { + DefRegisterI($_[1], undef, Glue(0), allocate => '\skip'); }); +DefPrimitive('\newmuskip DefToken', sub { + DefRegisterI($_[1], undef, MuGlue(0), allocate => '\muskip'); }); +AssignValue(allocated_boxes => 0); +DefPrimitive('\newbox DefToken', sub { + my $n = LookupValue('allocated_boxes'); + AssignValue(allocated_boxes => $n + 1, 'global'); + AssignValue("box$n", List()); + DefRegisterI($_[1], undef, Number($n), readonly => 1); }); +DefPrimitive('\newhelp DefToken {}', sub { AssignValue(ToString($_[1]) => $_[2]); }); +DefPrimitive('\newtoks DefToken', sub { DefRegisterI($_[1], undef, Tokens()); }); +# the next 4 actually work by doing a \chardef instead of \countdef, etc. +# which means they actually work quite differently +DefPrimitive('\alloc@@ {}', sub { + my ($stomach, $type) = @_; + my $c = 'allocation @' . ToString($type); + my $n = LookupValue($c) || '0'; + $n = $n->valueOf if ref $n; + AssignValue($c => $n + 1, 'global'); + AssignRegister('\allocationnumber' => Number($n), 'global'); }); +DefMacro('\newread DefToken', '\alloc@@{read}\global\chardef#1=\allocationnumber'); +DefMacro('\newwrite DefToken', '\alloc@@{write}\global\chardef#1=\allocationnumber'); +DefMacro('\newfam DefToken', '\alloc@@{fam}\global\chardef#1=\allocationnumber'); +DefMacro('\newlanguage DefToken', '\alloc@@{language}\global\chardef#1=\allocationnumber'); + +DefMacro('\e@alloc{}{}{}{}{}{}', + '\global\advance#3\@ne +% \e@ch@ck{#3}{#4}{#5}#1% + \allocationnumber#3\relax + \global#2#6\allocationnumber +% \wlog{\string#6=\string#1\the\allocationnumber} +'); +DefMacro('\alloc@{}{}{}{}', '\e@alloc#2#3{\count1#1}#4\float@count'); +DefMacro('\newread', '\e@alloc\read \chardef{\count16}\m@ne\sixt@@n'); +DefMacro('\newwrite', '\e@alloc\write + {\ifnum\allocationnumber=18 + \advance\count17\@ne + \allocationnumber\count17 % + \fi + \global\chardef}% + {\count17}% + \m@ne + {128}'); + +# This implementation is quite wrong +DefPrimitive('\newinsert Token', sub { DefRegisterI($_[1], undef, Number(0)); }); +# \alloc@, \ch@ck + +# TeX plain uses \newdimen, etc. for these. +# Is there any advantage to that? +DefRegister('\maxdimen', Dimension(16383.99999 * $UNITY)); +DefRegister('\hideskip', Glue('-1000pt plus 1fill')); +DefRegister('\centering', Glue('0pt plus 1000pt minus 1000pt')); +DefRegister('\p@', Dimension($UNITY)); +DefRegister('\z@', Dimension(0)); +DefRegister('\z@skip', Glue(0, 0, 0)); + +# Spacing stuff +DefConstructor('\@', ''); + +# First approximation. till I figure out \newbox +RawTeX('\newbox\voidb@x'); +#====================================================================== +# TeX Book, Appendix B, p. 348 + +DefPrimitive('\newif DefToken', sub { + my ($ignore, $cs) = @_; + DefConditionalI($cs, undef); + return; }); + +# See the section Registers & Parameters, above for setting default values. +#====================================================================== +# TeX Book, Appendix B, p. 349 +# See the section Registers & Parameters, above for setting default values. + +# These are originally defined with \newskip, etc +DefRegister('\smallskipamount' => Glue('3pt plus 1pt minus 1pt')); +DefRegister('\medskipamount' => Glue('6pt plus 2pt minus 2pt')); +DefRegister('\bigskipamount' => Glue('12pt plus 4pt minus 4pt')); +DefRegister('\normalbaselineskip' => Glue('12pt')); +DefRegister('\normallineskip' => Glue('1pt')); +DefRegister('\normallineskiplimit' => Dimension('0pt')); +DefRegister('\jot' => Dimension('3pt')); +DefRegister('\lx@default@jot' => LookupRegister('\jot')); +DefRegister('\interdisplaylinepenalty' => Number(100)); +DefRegister('\interfootnotelinepenalty' => Number(100)); + +DefMacroI('\magstephalf', undef, '1095'); +our @mags = (1000, 1200, 1440, 1728, 2074, 2488); +DefMacro('\magstep{}', sub { + my $level = ToString($_[1]); + $level = ($level =~ /^\d$/) ? int($level) : 0; + $level = 0 unless $level >= 0 and $level < 6; + Explode($mags[$level]); }); + +#====================================================================== +# TeX Book, Appendix B, p. 350 + +# Font stuff ... +RawTeX(<<'EoTeX'); + \font\tenrm=cmr10 + \font\sevenrm=cmr7 + \font\fiverm=cmr5 + \font\teni=cmmi10 + \font\seveni=cmmi7 + \font\fivei=cmmi7 + \font\tensy=cmsy10 + \font\sevensy=cmsy7 + \font\fivesy=cmsy5 + \font\tenex=cmex10 + \font\tenbf=cmbx10 + \font\sevenbf=cmbx7 + \font\fivebf=cmbx5 + \font\tensl=cmsl10 + \font\tentt=cmtt10 + \font\tenit=cmti10 + \newfam\itfam + \newfam\slfam + \newfam\bffam + \newfam\ttfam +\textfont0=\tenrm\scriptfont0=\sevenrm\scriptscriptfont0=\fiverm +\textfont1=\teni\scriptfont1=\seveni\scriptscriptfont1=\fivei +\textfont2=\tensy\scriptfont2=\sevensy\scriptscriptfont2=\fivesy +\textfont3=\tenex +EoTeX +# Note: \newfam in math should be font switching(?) + +#====================================================================== +# TeX Book, Appendix B, p. 351 + +# Old style font styles. +# The trick is to create an empty Whatsit preserved till assimilation (for reversion'ing) +# but to change the current font used in boxes. +# (some of these were defined on different pages? or even latex...) +Tag('ltx:text', autoOpen => 1, autoClose => 1); + +# Note that these, unlike \rmfamily, should set the other attributes to the defaults! +DefPrimitiveI('\rm', undef, undef, + font => { family => 'serif', series => 'medium', shape => 'upright' }); +DefPrimitiveI('\sf', undef, undef, + font => { family => 'sansserif', series => 'medium', shape => 'upright' }); +DefPrimitiveI('\bf', undef, undef, + font => { series => 'bold', family => 'serif', shape => 'upright' }); +DefPrimitiveI('\it', undef, undef, + font => { shape => 'italic', family => 'serif', series => 'medium' }); +DefPrimitiveI('\tt', undef, undef, + font => { family => 'typewriter', series => 'medium', shape => 'upright' }); +# No effect in math for the following 2 ? +DefPrimitiveI('\sl', undef, undef, + font => { shape => 'slanted', family => 'serif', series => 'medium' }); +DefPrimitiveI('\sc', undef, undef, + font => { shape => 'smallcaps', family => 'serif', series => 'medium' }); + +DefPrimitiveI('\cal', undef, undef, + font => { family => 'caligraphic', series => 'medium', shape => 'upright' }); + +# Ideally, we should set these sizes from class files +AssignValue(NOMINAL_FONT_SIZE => 10); +DefPrimitiveI('\tiny', undef, undef, font => { size => 5 }); +DefPrimitiveI('\scriptsize', undef, undef, font => { size => 7 }); +DefPrimitiveI('\footnotesize', undef, undef, font => { size => 8 }); +DefPrimitiveI('\small', undef, undef, font => { size => 9 }); +DefPrimitiveI('\normalsize', undef, undef, font => { size => 10 }); +DefPrimitiveI('\large', undef, undef, font => { size => 12 }); +DefPrimitiveI('\Large', undef, undef, font => { size => 14.4 }); +DefPrimitiveI('\LARGE', undef, undef, font => { size => 17.28 }); +DefPrimitiveI('\huge', undef, undef, font => { size => 20.74 }); +DefPrimitiveI('\Huge', undef, undef, font => { size => 29.8 }); + +DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' }); + +DefPrimitiveI('\frenchspacing', undef, undef); +DefPrimitiveI('\nonfrenchspacing', undef, undef); +DefMacroI('\normalbaselines', undef, + '\lineskip=\normallineskip\baselineskip=\normalbaselineskip\lineskiplimit=\normallineskiplimit'); +DefMacroI('\space', undef, Tokens(T_SPACE)); +DefMacroI('\lq', undef, "`"); +DefMacroI('\rq', undef, "'"); +Let('\empty', '\@empty'); +DefMacroI('\null', undef, '\hbox{}'); +Let('\bgroup', T_BEGIN); +Let('\egroup', T_END); +Let('\endgraf', '\par'); +Let('\endline', '\cr'); + +DefPrimitiveI('\endline', undef, undef); + +# Use \r for the newline from TeX!!! +DefMacroI("\\\r", undef, '\ '); # \ == \ Interesting (see latex.ltx) +Let(T_ACTIVE("\r"), '\par'); # (or is this just LaTeX?) + +Let("\\\t", "\\\r"); # \ == \, also + +#====================================================================== +# TeX Book, Appendix B, p. 352 + +DefPrimitiveI('\obeyspaces', undef, sub { + AssignCatcode(" " => 13); + Let(T_ACTIVE(" "), '\space'); + return }); +# Curiously enough, " " (a space) is ALREADY defined to be the same as "\space" +# EVEN before it is made active. (see p.380) +Let(T_ACTIVE(" "), '\space'); + +DefPrimitiveI('\obeylines', undef, sub { + AssignCatcode("\r" => 13); + Let(T_ACTIVE("\r"), '\@break'); # More appropriate than \par, I think? + return }); + +DefConstructor('\@break', "", properties => { isBreak => 1 }); + +RawTeX(<<'EoTeX'); +\def\loop#1\repeat{\def\body{#1}\iterate} +\def\iterate{\body \let\next=\iterate \else\let\next=\relax\fi \next} +\let\repeat=\fi +EoTeX + +DefPrimitiveI('\enskip', undef, sub { + Box("\x{2002}", undef, undef, T_CS('\enskip'), + name => 'enskip', width => Dimension('0.5em'), isSpace => 1); }); + +DefPrimitiveI('\enspace', undef, sub { + Box("\x{2002}", undef, undef, T_CS('\enspace'), + name => 'enskip', width => Dimension('0.5em'), isSpace => 1); }); + +DefPrimitiveI('\quad', undef, sub { + Box("\x{2003}", undef, undef, T_CS('\quad'), + name => 'quad', width => Dimension('1em'), isSpace => 1); }); + +# Conceivably should be treated as punctuation! (but maybe even \quad should !?!) +DefPrimitiveI('\qquad', undef, sub { + Box("\x{2003}\x{2003}", undef, undef, T_CS('\qquad'), + name => 'qquad', width => Dimension('2em'), isSpace => 1, asHint => 1); }); + +DefPrimitiveI('\thinspace', undef, sub { + Box("\x{2009}", undef, undef, T_CS('\thinspace'), + name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); }); + +DefPrimitiveI('\negthinspace', undef, sub { + Box("", undef, undef, T_CS('\negthinspace'), + name => 'negthinspace', width => Dimension('-0.16667em'), isSpace => 1); }); + +# DefConstructor('\hglue Glue', "?#isMath()(\x{2003})", +# properties => sub { (isSpace => 1, width => $_[1]); }); + +DefPrimitive('\hglue Glue', sub { + my ($stomach, $length) = @_; + my $s = DimensionToSpaces($length); + return unless defined $s; + Box($s, undef, undef, Invocation(T_CS('\hglue'), $length), + name => 'hglue', width => $length, isSpace => 1); }); + +DefPrimitive('\vglue Glue', undef); +DefPrimitiveI('\topglue', undef, undef); +DefPrimitiveI('\nointerlineskip', undef, undef); +DefPrimitiveI('\offinterlineskip', undef, undef); + +DefMacroI('\smallskip', undef, '\vskip\smallskipamount'); +DefMacroI('\medskip', undef, '\vskip\medskipamount'); +DefMacroI('\bigskip', undef, '\vskip\bigskipamount'); + +#====================================================================== +# TeX Book, Appendix B, p. 353 + +DefPrimitiveI('\break', undef, undef); +DefPrimitiveI('\nobreak', undef, undef); +DefPrimitiveI('\allowbreak', undef, undef); + +DefPrimitiveI('\nobreakspace', undef, sub { + Box(UTF(0xA0), undef, undef, T_ACTIVE("~"), + width => Dimension('0.333em'), isSpace => 1); }); + +DefMacro("~", '\nobreakspace{}'); + +DefMacroI('\slash', undef, '/'); +DefPrimitiveI('\filbreak', undef, undef); +DefMacroI('\goodbreak', undef, '\par'); +DefMacroI('\eject', undef, '\par\LTX@newpage'); +Let('\newpage', '\eject'); +DefConstructorI('\LTX@newpage', undef, "^"); + +DefMacroI('\supereject', undef, '\par\LTX@newpage'); +DefPrimitiveI('\removelastskip', undef, undef); +DefMacroI('\smallbreak', undef, '\par'); +DefMacroI('\medbreak', undef, '\par'); +DefMacroI('\bigbreak', undef, '\par'); + +DefMacroI('\line', undef, '\hbox to \hsize'); +DefMacro('\leftline Undigested', '\ltx@leftline{\hbox{#1}}'); +DefMacro('\rightline Undigested', '\ltx@rightline{\hbox{#1}}'); +DefMacro('\centerline Undigested', '\ltx@centerline{\hbox{#1}}'); +DefConstructor('\ltx@leftline{}', sub { + alignLine($_[0], $_[1], 'left'); }, + alias => '\leftline', + bounded => 1); +DefConstructor('\ltx@rightline{}', sub { + alignLine($_[0], $_[1], 'right'); }, + alias => '\rightline', + bounded => 1); +DefConstructor('\ltx@centerline{}', sub { + alignLine($_[0], $_[1], 'center'); }, + alias => '\centerline', + bounded => 1); + +sub alignLine { + my ($document, $line, $alignment) = @_; + if ($document->isOpenable('ltx:p')) { + $document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); } + elsif ($document->isOpenable('ltx:text')) { + $document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment); + $document->insertElement('ltx:break'); } + else { + $document->absorb($line); } + return; } + +# These should be 0 width, but perhaps also shifted? +DefMacro('\llap{}', '\hbox to 0pt{\hss#1}'); +DefMacro('\rlap{}', '\hbox to 0pt{#1\hss}'); + +DefMacroI('\m@th', undef, '\mathsurround=0pt '); + +# \strutbox +DefMacroI('\strut', undef, Tokens()); +RawTeX('\newbox\strutbox'); + +#====================================================================== +# TeX Book, Appendix B. p. 354 + +# TODO: Not yet done!! +# tabbing stuff!!! + +DefMacroI('\settabs', undef, undef); + +#====================================================================== +# TeX Book, Appendix B. p. 355 + +# TODO: \item, \itemitem not done! +# This could probably be adopted from LaTeX, if the could auto-open +# and close! +DefMacro('\hang', '\hangindent\parindent'); +DefMacro('\item', '\par\hang\textindent'); +DefMacro('\itemitem', '\par\indent \hangindent2\parindent \textindent'); +DefMacro('\textindent{}', '\indent\llap{#1\enspace}\ignorespaces'); +DefMacro('\narrower', '\advance\leftskip by\parindent' + . '\advance\rightskip by\parindent'); + +# If folks start using plain TeX macros, and never load LaTeX.pool, +# they might benefit from a ltx-plain.css? +DefMacro('\beginsection Until:\par', '\@beginsection{{\bf #1}}'); +DefConstructor('\@beginsection {}', + "#1"); + +# POSSIBLY #1 is a name or reference number and #2 is the theoremm TITLE +# If so, how do know when the theorem ends? +DefMacroI('\proclaim', parseDefParameters('\proclaim', Tokenize('#1. #2\par')), + '\@proclaim{{\bf #1}}{{\sl #2}}'); +DefConstructor('\@proclaim{}{}', + "" + . "#title" + . "#2", + afterConstruct => sub { $_[0]->maybeCloseElement('ltx:theorem'); }, + properties => sub { + my $title = $_[1]; + (title => $title, titlefont => $title->getFont); }); + +#====================================================================== +# TeX Book, Appendix B. p. 356 + +DefPrimitiveI('\raggedright', undef, undef); +DefPrimitiveI('\raggedleft', undef, undef); # this is actually LaTeX +DefPrimitiveI('\ttraggedright', undef, undef); +DefPrimitiveI('\leavevmode', undef, undef); +DefMacro('\mathhexbox{}{}{}', '\leavevmode\hbox{$\m@th \mathchar"#1#2#3$}'); + +#====================================================================== +# TeX Book, Appendix B. p. 358 + +#---------------------------------------------------------------------- +# Actually from LaTeX; Table 3.3, Greek, p.41 +#---------------------------------------------------------------------- +DefMathI('\alpha', undef, "\x{03B1}"); +DefMathI('\beta', undef, "\x{03B2}"); +DefMathI('\gamma', undef, "\x{03B3}"); +DefMathI('\delta', undef, "\x{03B4}"); +DefMathI('\epsilon', undef, "\x{03F5}"); +DefMathI('\varepsilon', undef, "\x{03B5}"); +DefMathI('\zeta', undef, "\x{03B6}"); +DefMathI('\eta', undef, "\x{03B7}"); +DefMathI('\theta', undef, "\x{03B8}"); +DefMathI('\vartheta', undef, "\x{03D1}"); +DefMathI('\iota', undef, "\x{03B9}"); +DefMathI('\kappa', undef, "\x{03BA}"); +DefMathI('\lambda', undef, "\x{03BB}"); +DefMathI('\mu', undef, "\x{03BC}"); +DefMathI('\nu', undef, "\x{03BD}"); +DefMathI('\xi', undef, "\x{03BE}"); +DefMathI('\pi', undef, "\x{03C0}"); +DefMathI('\varpi', undef, "\x{03D6}"); +DefMathI('\rho', undef, "\x{03C1}"); +DefMathI('\varrho', undef, "\x{03F1}"); +DefMathI('\sigma', undef, "\x{03C3}"); +DefMathI('\varsigma', undef, "\x{03C2}"); +DefMathI('\tau', undef, "\x{03C4}"); +DefMathI('\upsilon', undef, "\x{03C5}"); +DefMathI('\phi', undef, "\x{03D5}"); +DefMathI('\varphi', undef, "\x{03C6}"); +DefMathI('\chi', undef, "\x{03C7}"); +DefMathI('\psi', undef, "\x{03C8}"); +DefMathI('\omega', undef, "\x{03C9}"); +DefMathI('\Gamma', undef, "\x{0393}"); +DefMathI('\Delta', undef, "\x{0394}"); +DefMathI('\Theta', undef, "\x{0398}"); +DefMathI('\Lambda', undef, "\x{039B}"); +DefMathI('\Xi', undef, "\x{039E}"); +DefMathI('\Pi', undef, "\x{03A0}"); +DefMathI('\Sigma', undef, "\x{03A3}"); +DefMathI('\Upsilon', undef, "\x{03A5}"); +DefMathI('\Phi', undef, "\x{03A6}"); +DefMathI('\Psi', undef, "\x{03A8}"); +DefMathI('\Omega', undef, "\x{03A9}"); + +#---------------------------------------------------------------------- +# Actually from LaTeX; Table 3.2. Non-English Symbols, p.39 + +# The following shouldn't appear in math. +DefPrimitiveI('\OE', undef, "\x{0152}"); # LATIN CAPITAL LIGATURE OE +DefPrimitiveI('\oe', undef, "\x{0153}"); # LATIN SMALL LIGATURE OE +DefPrimitiveI('\AE', undef, UTF(0xC6)); # LATIN CAPITAL LETTER AE +DefPrimitiveI('\ae', undef, UTF(0xE6)); # LATIN SMALL LETTER AE +DefPrimitiveI('\AA', undef, UTF(0xC5)); # LATIN CAPITAL LETTER A WITH RING ABOVE +DefPrimitiveI('\aa', undef, UTF(0xE5)); # LATIN SMALL LETTER A WITH RING ABOVE +DefPrimitiveI('\O', undef, UTF(0xD8)); # LATIN CAPITAL LETTER O WITH STROKE +DefPrimitiveI('\o', undef, UTF(0xF8)); # LATIN SMALL LETTER O WITH STROKE +DefPrimitiveI('\L', undef, "\x{0141}"); # LATIN CAPITAL LETTER L WITH STROKE +DefPrimitiveI('\l', undef, "\x{0142}"); # LATIN SMALL LETTER L WITH STROKE +DefPrimitiveI('\ss', undef, UTF(0xDF)); # LATIN SMALL LETTER SHARP S + +# apparently the rest can appear in math. +DefPrimitiveI('\lx@sectionsign', undef, UTF(0xa7), alias => '\S'); # SECTION SIGN +DefPrimitiveI('\lx@paragraphsign', undef, UTF(0xB6), alias => '\P'); # PILCROW SIGN +DefMacroI('\S', undef, '\lx@sectionsign'); +DefMacroI('\P', undef, '\lx@paragraphsign'); +DefPrimitiveI('\dag', undef, "\x{2020}"); # DAGGER +DefPrimitiveI('\ddag', undef, "\x{2021}"); # DOUBLE DAGGER +DefPrimitiveI('\copyright', undef, UTF(0xA9)); # COPYRIGHT SIGN +DefPrimitiveI('\pounds', undef, UTF(0xA3)); # POUND SIGN + +#====================================================================== +# Specific accents (see TeX-Character) +#---------------------------------------------------------------------- + +DefAccent('\`', "\x{0300}", UTF(0x60)); # COMBINING GRAVE ACCENT & GRAVE ACCENT +DefAccent("\\'", "\x{0301}", UTF(0xB4)); # COMBINING ACUTE ACCENT & ACUTE ACCENT +DefAccent('\^', "\x{0302}", UTF(0x5E)); # COMBINING CIRCUMFLEX ACCENT & CIRCUMFLEX ACCENT +DefAccent('\"', "\x{0308}", UTF(0xA8)); # COMBINING DIAERESIS & DIAERESIS +DefAccent('\~', "\x{0303}", "~"); # COMBINING TILDE +DefAccent('\=', "\x{0304}", UTF(0xAF)); # COMBINING MACRON & MACRON +DefAccent('\.', "\x{0307}", "\x{02D9}"); # COMBINING DOT ABOVE & DOT ABOVE +DefAccent('\u', "\x{0306}", "\x{02D8}"); # COMBINING BREVE & BREVE +DefAccent('\v', "\x{030C}", "\x{02C7}"); # COMBINING CARON & CARON +DefAccent('\@ringaccent', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining +DefAccent('\r', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining +DefAccent('\H', "\x{030B}", "\x{02DD}"); # COMBINING DOUBLE ACUTE ACCENT & non-combining +DefAccent('\c', "\x{0327}", UTF(0xB8), below => 1); # COMBINING CEDILLA & CEDILLA + # NOTE: The next two get define for math, as well; See below +DefAccent('\@text@daccent', "\x{0323}", '.', below => 1); # COMBINING DOT BELOW & DOT (?) +DefAccent('\@text@baccent', "\x{0331}", UTF(0xAF), below => 1); # COMBINING MACRON BELOW & MACRON +DefAccent('\t', "\x{0361}", "-"); # COMBINING DOUBLE INVERTED BREVE & ???? What???? + # this one's actually defined in mathscinet.sty, but just stick it here! +DefAccent('\lfhook', "\x{0326}", ",", below => 1); # COMBINING COMMA BELOW + # I doubt that latter covers multiple chars...? + #DefAccent('\bar',"\x{0304}", ?); # COMBINING MACRON or is this the longer overbar? + +# Note that these two apparently work in Math? BUT the argument is treated as text!!! +DefMacro('\d{}', '\ifmmode\@math@daccent{#1}\else\@text@daccent{#1}\fi'); +DefMacro('\b{}', '\ifmmode\@math@baccent{#1}\else\@text@baccent{#1}\fi'); + +DefConstructor('\@math@daccent {}', + "\x{22c5}" + . "?#textarg(#textarg)(#matharg)" + . "", + mode => 'text', alias => '\d', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $arg = $whatsit->getArg(1); + if ($arg->isMath) { + $whatsit->setProperty(matharg => $arg->getBody); } + else { + $whatsit->setProperty(textarg => $arg); } + return; }); + +DefConstructor('\@math@baccent {}', + "" . UTF(0xAF) . "" + . "?#textarg(#textarg)(#matharg)" + . "", + mode => 'text', alias => '\b', + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $arg = $whatsit->getArg(1); + if ($arg->isMath) { + $whatsit->setProperty(matharg => $arg->getBody); } + else { + $whatsit->setProperty(textarg => $arg); } + return; }); + +#====================================================================== +# TeX Book, Appendix B. p. 357 + +DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit more explicitly relation-like? + +foreach my $op ('\hrulefill', '\dotfill', '\rightarrowfill', '\leftarrowfill', + '\upbracefill', '\downbracefill') { + DefPrimitiveI($op, undef, undef); } + +#Let('\bye', '\end'); + +Let('\sp', T_SUPER); +Let('\sb', T_SUB); + +DefPrimitiveI('\lx@thinmuskip', undef, sub { + Box("\x{2009}", undef, undef, T_CS('\,'), + name => 'thinspace', isSpace => 1, + width => LookupRegister('\thinmuskip')); }); +DefPrimitiveI('\lx@thinspace', undef, sub { + Box("\x{2009}", undef, undef, T_CS('\,'), + name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); }); +DefMacroI('\,', undef, '\ifmmode\lx@thinmuskip\else\lx@thinspace\fi', protected => 1); + +DefPrimitiveI('\!', undef, sub { + Box("\x{200B}", undef, undef, T_CS('\!'), # zero width space + name => 'negthinspace', isSpace => 1, + width => LookupRegister('\thinmuskip')->negate); }); +DefPrimitiveI('\>', undef, sub { + Box("\x{2005}", undef, undef, T_CS('\>'), + name => 'medspace', isSpace => 1, + width => LookupRegister('\medmuskip')); }); + +DefPrimitiveI('\;', undef, sub { + Box("\x{2004}", undef, undef, T_CS('\;'), + name => 'thickspace', isSpace => 1, + width => LookupRegister('\thickmuskip')); }); + +Let('\:', '\>'); + +DefPrimitiveI("\\\t", undef, sub { + Box(UTF(0xA0), undef, undef, T_CS("\\\t"), + isSpace => 1, width => Dimension('1em')); }); + +#---------------------------------------------------------------------- +# Actually from LaTeX; Table 3.7. Miscellaneous Symbols, p.43 +#---------------------------------------------------------------------- +# Some should be differential operators, qualifiers, ... +DefMathI('\aleph', undef, "\x{2135}"); +DefMathI('\hbar', undef, "\x{210F}", role => 'ID', meaning => 'Planck-constant-over-2-pi'); +DefMathI('\imath', undef, "\x{0131}"); +DefMathI('\jmath', undef, "\x{0237}"); +DefMathI('\ell', undef, "\x{2113}"); +DefMathI('\wp', undef, "\x{2118}", meaning => 'Weierstrass-p'); +DefMathI('\Re', undef, "\x{211C}", role => 'OPFUNCTION', meaning => 'real-part'); +DefMathI('\Im', undef, "\x{2111}", role => 'OPFUNCTION', meaning => 'imaginary-part'); +DefMathI('\mho', undef, "\x{2127}"); + +DefMathI('\prime', undef, "\x{2032}", role => 'SUPOP', locked => 1); +DefMathI('\emptyset', undef, "\x{2205}", role => 'ID', meaning => 'empty-set'); +DefMathI('\nabla', undef, "\x{2207}", role => 'OPERATOR'); +DefMathI('\surd', undef, "\x{221A}", role => 'OPERATOR', meaning => 'square-root'); +DefMathI('\top', undef, "\x{22A4}", role => 'ADDOP', meaning => 'top'); +DefMathI('\bot', undef, "\x{22A5}", role => 'ADDOP', meaning => 'bottom'); +DefMathI('\|', undef, "\x{2225}", role => 'VERTBAR', name => '||'); +# should get meaning => 'parallel-to' when used as infix, but NOT when for OPEN|CLOSE +DefMathI('\angle', undef, "\x{2220}"); + +# NOTE: This is probably the wrong role. +# Also, should probably carry info about Binding for OpenMath +DefMathI('\forall', undef, "\x{2200}", role => 'BIGOP', meaning => 'for-all'); +DefMathI('\exists', undef, "\x{2203}", role => 'BIGOP', meaning => 'exists'); +DefMathI('\neg', undef, UTF(0xAC), role => 'BIGOP', meaning => 'not'); +DefMathI('\lnot', undef, UTF(0xAC), role => 'BIGOP', meaning => 'not'); +DefMathI('\flat', undef, "\x{266D}"); +DefMathI('\natural', undef, "\x{266E}"); +DefMathI('\sharp', undef, "\x{266F}"); +DefMathI('\backslash', undef, UTF(0x5C), role => 'MULOP'); +DefMathI('\partial', undef, "\x{2202}", role => 'DIFFOP', meaning => 'partial-differential'); + +DefMathI('\infty', undef, "\x{221E}", role => 'ID', meaning => 'infinity'); +DefMathI('\Box', undef, "\x{25A1}"); +DefMathI('\Diamond', undef, "\x{25C7}"); +DefMathI('\triangle', undef, "\x{25B3}"); +DefMathI('\clubsuit', undef, "\x{2663}"); +DefMathI('\diamondsuit', undef, "\x{2662}"); +DefMathI('\heartsuit', undef, "\x{2661}"); +DefMathI('\spadesuit', undef, "\x{2660}"); + +#---------------------------------------------------------------------- +DefMath('\smallint', "\x{222B}", meaning => 'integral', role => 'INTOP', + font => { size => 9 }, scriptpos => \&doScriptpos, mathstyle => 'text'); # INTEGRAL + +#---------------------------------------------------------------------- +# Actually LaTeX; Table 3.8. Variable-sized Symbols, p.44. +#---------------------------------------------------------------------- +sub doScriptpos { + return (LookupValue('font')->getMathstyle eq 'display' ? 'mid' : 'post'); } + +sub doVariablesizeOp { + return (LookupValue('font')->getMathstyle eq 'display' ? 'display' : 'text'); } + +DefMathI('\sum', undef, "\x{2211}", + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'sum', + mathstyle => \&doVariablesizeOp); +DefMathI('\prod', undef, "\x{220F}", + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'product', + mathstyle => \&doVariablesizeOp); +DefMathI('\coprod', undef, "\x{2210}", + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'coproduct', + mathstyle => \&doVariablesizeOp); +DefMathI('\int', undef, "\x{222B}", + role => 'INTOP', + meaning => 'integral', + mathstyle => \&doVariablesizeOp); +DefMathI('\oint', undef, "\x{222E}", + role => 'INTOP', + meaning => 'contour-integral', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigcap', undef, "\x{22C2}", # versus \x{2229} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'intersection', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigcup', undef, "\x{22C3}", # versus \x{222A} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'union', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigsqcup', undef, "\x{2A06}", # versus \x{2294} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'square-union', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigvee', undef, "\x{22C1}", # versus \x{2229} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'or', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigwedge', undef, "\x{22C0}", # versus \x{2227} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'and', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigodot', undef, "\x{2A00}", # versus \x{2299} + role => 'SUMOP', #meaning=> ? + scriptpos => \&doScriptpos, + mathstyle => \&doVariablesizeOp); +DefMathI('\bigotimes', undef, "\x{2A02}", # versus \x{2297} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'tensor-product', + mathstyle => \&doVariablesizeOp); +DefMathI('\bigoplus', undef, "\x{2A01}", # versus \x{2295} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'direct-sum', + mathstyle => \&doVariablesizeOp); +DefMathI('\biguplus', undef, "\x{2A04}", # versus \x{228e} + role => 'SUMOP', + scriptpos => \&doScriptpos, + meaning => 'symmetric-difference', + mathstyle => \&doVariablesizeOp); + +#---------------------------------------------------------------------- +# Actually from LaTeX; Table 3.4. Binary Operation Symbols, p.42 +#---------------------------------------------------------------------- +DefMathI('\pm', undef, UTF(0xB1), role => 'ADDOP', meaning => 'plus-or-minus'); +DefMathI('\mp', undef, "\x{2213}", role => 'ADDOP', meaning => 'minus-or-plus'); +DefMathI('\times', undef, UTF(0xD7), role => 'MULOP', meaning => 'times'); +DefMathI('\div', undef, UTF(0xF7), role => 'MULOP', meaning => 'divide'); +DefMathI('\ast', undef, "\x{2217}", role => 'MULOP'); +DefMathI('\star', undef, "\x{22C6}", role => 'MULOP'); +DefMathI('\circ', undef, "\x{2218}", role => 'MULOP', meaning => 'compose'); +DefMathI('\bullet', undef, "\x{2219}", role => 'MULOP'); +DefMathI('\cdot', undef, "\x{22C5}", role => 'MULOP'); +## , meaning=>'inner-product'); that's pushing it a bit far... + +# Need to classify set operations more carefully.... +DefMathI('\cap', undef, "\x{2229}", role => 'ADDOP', meaning => 'intersection'); +DefMathI('\cup', undef, "\x{222A}", role => 'ADDOP', meaning => 'union'); +DefMathI('\uplus', undef, "\x{228E}", role => 'ADDOP'); +DefMathI('\sqcap', undef, "\x{2293}", role => 'ADDOP', meaning => 'square-intersection'); +DefMathI('\sqcup', undef, "\x{2294}", role => 'ADDOP', meaning => 'square-union'); +DefMathI('\vee', undef, "\x{2228}", role => 'ADDOP', meaning => 'or'); +DefMathI('\lor', undef, "\x{2228}", role => 'ADDOP', meaning => 'or'); +DefMathI('\wedge', undef, "\x{2227}", role => 'ADDOP', meaning => 'and'); +DefMathI('\land', undef, "\x{2227}", role => 'ADDOP', meaning => 'and'); +DefMathI('\setminus', undef, "\x{2216}", role => 'ADDOP', meaning => 'set-minus'); +DefMathI('\wr', undef, "\x{2240}", role => 'MULOP'); + +# Should this block be ADDOP or something else? +DefMathI('\diamond', undef, "\x{22C4}", role => 'ADDOP'); +DefMathI('\bigtriangleup', undef, "\x{25B3}", role => 'ADDOP'); +DefMathI('\bigtriangledown', undef, "\x{25BD}", role => 'ADDOP'); +DefMathI('\triangleleft', undef, "\x{25C1}", role => 'ADDOP'); +DefMathI('\triangleright', undef, "\x{25B7}", role => 'ADDOP'); +DefMathI('\lhd', undef, "\x{22B2}", role => 'ADDOP', meaning => 'subgroup-of'); +DefMathI('\rhd', undef, "\x{22B3}", role => 'ADDOP', meaning => 'contains-as-subgroup'); +DefMathI('\unlhd', undef, "\x{22B4}", role => 'ADDOP', meaning => 'subgroup-of-or-equals'); +DefMathI('\unrhd', undef, "\x{22B5}", role => 'ADDOP', meaning => 'contains-as-subgroup-or-equals'); + +DefMathI('\oplus', undef, "\x{2295}", role => 'ADDOP', meaning => 'direct-sum'); +DefMathI('\ominus', undef, "\x{2296}", role => 'ADDOP', meaning => 'symmetric-difference'); +DefMathI('\otimes', undef, "\x{2297}", role => 'MULOP', meaning => 'tensor-product'); +DefMathI('\oslash', undef, "\x{2298}", role => 'MULOP'); +DefMathI('\odot', undef, "\x{2299}", role => 'MULOP', meaning => 'direct-product'); +DefMathI('\bigcirc', undef, "\x{25CB}", role => 'MULOP'); +DefMathI('\dagger', undef, "\x{2020}", role => 'MULOP'); +DefMathI('\ddagger', undef, "\x{2021}", role => 'MULOP'); +DefMathI('\amalg', undef, "\x{2210}", role => 'MULOP', meaning => 'coproduct'); + +#---------------------------------------------------------------------- +# LaTeX; Table 3.5. Relation Symbols, p.43 +#---------------------------------------------------------------------- +DefMathI('\leq', undef, "\x{2264}", role => 'RELOP', meaning => 'less-than-or-equals'); +DefMathI('\prec', undef, "\x{227A}", role => 'RELOP', meaning => 'precedes'); +DefMathI('\preceq', undef, "\x{2AAF}", role => 'RELOP', meaning => 'precedes-or-equals'); +DefMathI('\ll', undef, "\x{226A}", role => 'RELOP', meaning => 'much-less-than'); +DefMathI('\subset', undef, "\x{2282}", role => 'RELOP', meaning => 'subset-of'); +DefMathI('\subseteq', undef, "\x{2286}", role => 'RELOP', meaning => 'subset-of-or-equals'); +DefMathI('\sqsubset', undef, "\x{228F}", role => 'RELOP', meaning => 'square-image-of'); +DefMathI('\sqsubseteq', undef, "\x{2291}", role => 'RELOP', meaning => 'square-image-of-or-equals'); +DefMathI('\in', undef, "\x{2208}", role => 'RELOP', meaning => 'element-of'); +DefMathI('\vdash', undef, "\x{22A2}", role => 'METARELOP', meaning => 'proves'); + +DefMathI('\geq', undef, "\x{2265}", role => 'RELOP', meaning => 'greater-than-or-equals'); +DefMathI('\succ', undef, "\x{227B}", role => 'RELOP', meaning => 'succeeds'); +DefMathI('\succeq', undef, "\x{2AB0}", role => 'RELOP', meaning => 'succeeds-or-equals'); +DefMathI('\gg', undef, "\x{226B}", role => 'RELOP', meaning => 'much-greater-than'); +DefMathI('\supset', undef, "\x{2283}", role => 'RELOP', meaning => 'superset-of'); +DefMathI('\supseteq', undef, "\x{2287}", role => 'RELOP', meaning => 'superset-of-or-equals'); +DefMathI('\sqsupset', undef, "\x{2290}", role => 'RELOP', meaning => 'square-original-of'); +DefMathI('\sqsupseteq', undef, "\x{2292}", role => 'RELOP', meaning => 'square-original-of-or-equals'); +DefMathI('\ni', undef, "\x{220B}", role => 'RELOP', meaning => 'contains'); +DefMathI('\dashv', undef, "\x{22A3}", role => 'METARELOP', meaning => 'does-not-prove'); + +# I have the impression think that "identical" is a stronger notion than "equivalence" +# Note that the unicode here is called "Identical To", +# and that the notion of "equivalent to" usually involves the tilde operator. +DefMathI('\equiv', undef, "\x{2261}", role => 'RELOP', meaning => 'equivalent-to'); +DefMathI('\sim', undef, "\x{223C}", role => 'RELOP', meaning => 'similar-to'); +DefMathI('\simeq', undef, "\x{2243}", role => 'RELOP', meaning => 'similar-to-or-equals'); +DefMathI('\asymp', undef, "\x{224D}", role => 'RELOP', meaning => 'asymptotically-equals'); +DefMathI('\approx', undef, "\x{2248}", role => 'RELOP', meaning => 'approximately-equals'); +DefMathI('\cong', undef, "\x{2245}", role => 'RELOP', meaning => 'approximately-equals'); +DefMathI('\neq', undef, "\x{2260}", role => 'RELOP', meaning => 'not-equals'); +DefMathI('\doteq', undef, "\x{2250}", role => 'RELOP', meaning => 'approaches-limit'); +DefMathI('\notin', undef, "\x{2209}", role => 'RELOP', meaning => 'not-element-of'); + +DefMathI('\models', undef, "\x{22A7}", role => 'RELOP', meaning => 'models'); +DefMathI('\perp', undef, "\x{27C2}", role => 'RELOP', meaning => 'perpendicular-to'); +DefMathI('\mid', undef, "\x{2223}", role => 'VERTBAR'); # DIVIDES (RELOP?) ?? well, sometimes... +DefMathI('\parallel', undef, "\x{2225}", role => 'VERTBAR', meaning => 'parallel-to'); +DefMathI('\bowtie', undef, "\x{22C8}", role => 'RELOP'); # BOWTIE +DefMathI('\Join', undef, "\x{2A1D}", role => 'RELOP', meaning => 'join'); +DefMathI('\smile', undef, "\x{2323}", role => 'RELOP'); # SMILE +DefMathI('\frown', undef, "\x{2322}", role => 'RELOP'); # FROWN +DefMathI('\propto', undef, "\x{221D}", role => 'RELOP', meaning => 'proportional-to'); + +# TeX defines these as alternate names... +Let('\le', '\leq'); +Let('\ge', '\geq'); +Let('\ne', '\neq'); +# And it defines some others as alternate names, but they seem to +# potentially imply slightly different meanings??? Leave them out for now.. + +#---------------------------------------------------------------------- +# Not; (Is fullwidth solidus appropriate for when \not appears in isolation?) +DefMathI('\not', undef, "\x{FF0F}", role => 'OPFUNCTION', meaning => 'not'); +# Match negations of many operators +our %NOTS = ('=' => "\x{2260}", '<' => "\x{226E}", '>' => "\x{226F}", + "\x{2208}" => "\x{2209}", #\in=>\notin + "\x{2264}" => "\x{2270}", "\x{2265}" => "\x{2271}", # Less eq, greater eq. + "\x{227A}" => "\x{2280}", "\x{227B}" => "\x{2281}", # prec, succ + "\x{2AAF}" => "\x{22E0}", "\x{2AB0}" => "\x{22E1}", # preceq, succeq + "\x{2282}" => "\x{2284}", "\x{2283}" => "\x{2285}", # subset, supset + "\x{2286}" => "\x{2288}", "\x{2287}" => "\x{2289}", # subseteq, supseteq + "\x{2291}" => "\x{22E2}", "\x{2290}" => "\x{22E3}", # sqsubseteq, sqsupseteq + "\x{2261}" => "\x{2262}", # equiv + "\x{224D}" => "\x{226D}", "\x{2248}" => "\x{2249}", # asymp, approx + "\x{22B2}" => "\x{22EA}", "\x{22B3}" => "\x{22EB}", # lhd, rhd + "\x{22B4}" => "\x{22EC}", "\x{22B5}" => "\x{22ED}", # unlhd, unrhd + "\x{2203}" => "\x{2204}", # Exists +); + +# For a \not operator that is followed by anything, concoct an appropriate not or cancelation. +DefRewrite(select => ["descendant-or-self::ltx:XMTok[text()='\x{FF0F}' and \@meaning='not']" + . "[ following-sibling::*]", 2], + replace => sub { + my ($doc, $not, $thing) = @_; + my $text = ($doc->getModel->getNodeQName($thing) eq 'ltx:XMTok') + && $thing->textContent; + + if ((!defined $text) || (length($text) != 1)) { # Not simple char token. + my $box = $doc->getNodeBox($not); + $doc->openElement('ltx:XMApp', _box => $box); # Wrap with a cancel op + my $strike = $doc->insertMathToken(undef, role => 'ENCLOSE', enclose => 'updiagonalstrike', + meaning => 'not', _box => $box); + if (my $id = $not->getAttribute('xml:id')) { + $not->removeAttribute('xml:id'); + $doc->unRecordID($id); + $doc->setAttribute($strike, 'xml:id' => $id); } + $doc->getNode->appendChild($thing); + $doc->closeElement('ltx:XMApp'); } + else { + # For simple tokens, we'll modify the relevant content & attributes + # [children removed, id's presumably ignorable] + map { $_->unbindNode() } $thing->childNodes; + my $new = defined $NOTS{$text} ? $NOTS{$text} : $text . "\x{0338}"; + $thing->appendText($new); + if (my $meaning = $thing->getAttribute('meaning')) { + $doc->setAttribute($thing, meaning => "not-$meaning"); } + if (my $name = $thing->getAttribute('name') || $text) { + $doc->setAttribute($thing, name => "not-$name"); } + # and put the node back in + $doc->getNode->appendChild($thing); + # Since the element is disappearing, if it had an id that was referenced...!?!? + if (my $id = $not->getAttribute('xml:id')) { + foreach my $n ($doc->findnodes("descendant-or-self::ltx:XMRef[\@idref='$id']")) { + $doc->removeNode($n); } } # ? Hopefully this is safe. +} }); + +#---------------------------------------------------------------------- +# \joinrel +DefMathI('\relbar', undef, "-", role => 'RELOP'); # ??? +DefMathI('\Relbar', undef, "=", role => 'RELOP'); # ??? + +# \joinrel is \mathrel{\mkern-3\mu} +# Ah, but the Effect is to join 2 "relations" into one! +DefPrimitiveI('\joinrel', undef, sub { + my ($stomach, $op) = @_; + my $gullet = $stomach->getGullet; + $gullet->skipSpaces; + my $left = $LaTeXML::LIST[-1]; + if (!$left) { # Nothing there?... + return (); } # I guess this becomes a no-op??? + else { + pop(@LaTeXML::LIST); + my @stuff = (); + while (my $tok = $gullet->readXToken(0)) { + @stuff = $stomach->invokeToken($tok); + last if @stuff; } + return () unless @stuff; # no-op ???? + my $right = shift(@stuff); + (@stuff, + LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@joinrel')), [$left, $right], + locator => $gullet->getLocator, + font => $right->getFont, isMath => 1)); } }); + +DefConstructor('\@@joinrel{}{}', sub { + my ($document, $left, $right) = @_; + $document->absorb($left); + $document->absorb($right); + # Now if last 2 items are XMTok, replace by a single token with joined content (& attr?) + my $node = $document->getNode; + my @nodes = $document->getChildElements($node); + if (scalar(@nodes) >= 2) { + my @rels = ($nodes[-2], $nodes[-1]); + if (grep { $document->getNodeQName($_) eq 'ltx:XMTok' } @rels) { + my %roles = (); + map { $roles{ $_->getAttribute('role') } = 1 } @rels; + my $role = (scalar(keys %roles) == 1 ? [keys %roles]->[0] : ($roles{ARROW} ? 'ARROW' : 'RELOP')); + map { $node->removeChild($_) } @rels; + $document->insertElement('ltx:XMTok', [map { $_->textContent } @rels], role => $role); + } } }, + reversion => '#1\joinrel #2'); + +#---------------------------------------------------------------------- +# LaTeX; Table 3.6. Arrow Symbols, p.43 +#---------------------------------------------------------------------- +# Arrows get treated somewhat like relations (or meta-relations), +# but it's hard to associate any particular "meaning" to them. + +DefMathI('\leftarrow', undef, "\x{2190}", role => 'ARROW'); # LEFTWARDS ARROW +DefMathI('\Leftarrow', undef, "\x{21D0}", role => 'ARROW'); # LEFTWARDS DOUBLE ARROW +DefMathI('\rightarrow', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW +DefMathI('\Rightarrow', undef, "\x{21D2}", role => 'ARROW'); # RIGHTWARDS DOUBLE ARROW +DefMathI('\leftrightarrow', undef, "\x{2194}", role => 'METARELOP'); # LEFT RIGHT ARROW +DefMathI('\Leftrightarrow', undef, "\x{21D4}", role => 'METARELOP'); # LEFT RIGHT DOUBLE ARROW +DefMathI('\iff', undef, "\x{21D4}", role => 'METARELOP', meaning => 'iff'); # LEFT RIGHT DOUBLE ARROW +DefMathI('\mapsto', undef, "\x{21A6}", role => 'ARROW', meaning => 'maps-to'); +DefMathI('\hookleftarrow', undef, "\x{21A9}", role => 'ARROW'); # LEFTWARDS ARROW WITH HOOK +DefMathI('\leftharpoonup', undef, "\x{21BC}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB UPWARDS +DefMathI('\leftharpoondown', undef, "\x{21BD}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB DOWNWARDS +DefMathI('\rightleftharpoons', undef, "\x{21CC}", role => 'METARELOP'); # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON +DefMathI('\longleftarrow', undef, "\x{27F5}", role => 'ARROW'); # LONG LEFTWARDS ARROW +DefMathI('\Longleftarrow', undef, "\x{27F8}", role => 'ARROW'); # LONG LEFTWARDS DOUBLE ARROW +DefMathI('\longrightarrow', undef, "\x{27F6}", role => 'ARROW'); # LONG RIGHTWARDS ARROW +DefMathI('\Longrightarrow', undef, "\x{27F9}", role => 'ARROW'); # LONG RIGHTWARDS DOUBLE ARROW +DefMathI('\longleftrightarrow', undef, "\x{27F7}", role => 'METARELOP'); # LONG LEFT RIGHT ARROW +DefMathI('\Longleftrightarrow', undef, "\x{27FA}", role => 'METARELOP'); # LONG LEFT RIGHT DOUBLE ARROW +DefMathI('\longmapsto', undef, "\x{27FC}", role => 'ARROW'); # LONG RIGHTWARDS ARROW FROM BAR +DefMathI('\hookrightarrow', undef, "\x{21AA}", role => 'ARROW'); # RIGHTWARDS ARROW WITH HOOK +DefMathI('\rightharpoonup', undef, "\x{21C0}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB UPWARDS +DefMathI('\rightharpoondown', undef, "\x{21C1}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB DOWNWARDS +DefMathI('\leadsto', undef, "\x{219D}", role => 'ARROW', meaning => 'leads-to'); + +DefMathI('\uparrow', undef, "\x{2191}", role => 'ARROW'); # UPWARDS ARROW +DefMathI('\Uparrow', undef, "\x{21D1}", role => 'ARROW'); # UPWARDS DOUBLE ARROW +DefMathI('\downarrow', undef, "\x{2193}", role => 'ARROW'); # DOWNWARDS ARROW +DefMathI('\Downarrow', undef, "\x{21D3}", role => 'ARROW'); # DOWNWARDS DOUBLE ARROW +DefMathI('\updownarrow', undef, "\x{2195}", role => 'ARROW'); # UP DOWN ARROW +DefMathI('\Updownarrow', undef, "\x{21D5}", role => 'ARROW'); # UP DOWN DOUBLE ARROW +DefMathI('\nearrow', undef, "\x{2197}", role => 'ARROW'); # NORTH EAST ARROW +DefMathI('\searrow', undef, "\x{2198}", role => 'ARROW'); # SOUTH EAST ARROW +DefMathI('\swarrow', undef, "\x{2199}", role => 'ARROW'); # SOUTH WEST ARROW +DefMathI('\nwarrow', undef, "\x{2196}", role => 'ARROW'); # NORTH WEST ARROW + +# \mapstochar (3237), \lhook(312C), \rhook(312D) +# These are really wrong; I can't find the right Unicode Glyphs. +# These are only fragments intended to be assembled into meaningful(?) symbols. +DefMathI('\mapstochar', undef, "\x{2E20}"); # TeX 3237 +DefMathI('\lhook', undef, "\x{2E26}"); # TeX 312C +DefMathI('\rhook', undef, "\x{2E27}"); # TeX 312D + +#====================================================================== +# TeX Book, Appendix B. p. 359 + +# Ah, since \ldots can appear in text and math.... +DefMacroI('\ldots', undef, '\lx@ldots'); +DefConstructorI('\lx@ldots', undef, + "?#isMath(\x{2026})(\x{2026})", + sizer => "\x{2026}", + reversion => '\ldots', + properties => sub { + (LookupValue('IN_MATH') + ? (font => LookupValue('font')->merge(family => 'serif', + series => 'medium', shape => 'upright')->specialize("\x{2026}")) + : ()); }); # Since not DefMath! + # And so can \vdots +DefConstructorI('\vdots', undef, + "?#isMath(\x{22EE})(\x{22EE})", + sizer => "\x{22EE}", + properties => sub { + (LookupValue('IN_MATH') + ? (font => LookupValue('font')->merge(family => 'serif', + series => 'medium', shape => 'upright')->specialize("\x{22EE}")) + : ()); }); # Since not DefMath! + # But not these! +DefMathI('\cdots', undef, "\x{22EF}", role => 'ID'); # MIDLINE HORIZONTAL ELLIPSIS + +DefMathI('\ddots', undef, "\x{22F1}", role => 'ID'); # DOWN RIGHT DIAGONAL ELLIPSIS +DefMathI('\colon', undef, ':', role => 'METARELOP'); # Seems like good default role + # Note that amsmath redefines \dots to be `smart'. + # Aha, also can be in text... +DefConstructorI('\dots', undef, + "?#isMath(\x{2026})(\x{2026})", + sizer => "\x{2026}", + properties => sub { + (LookupValue('IN_MATH') + ? (font => LookupValue('font')->merge(family => 'serif', + series => 'medium', shape => 'upright')->specialize("\x{2026}")) + : ()); }); # Since not DefMath! + +# And while we're at it... + +# Pretest for XMath to keep from interpreting math that the DOM may not allow!! +##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots'); + +DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots'); + +DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # ldots + +#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots'); +DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots'); + +#---------------------------------------------------------------------- +# Math Accents. +#---------------------------------------------------------------------- +# LaTeX; Table 3.11. Math Mode Accents, p.50. +# Are these all TeX (or LaTeX)? +# Note that most of these should NOT be stretchy, by default! +DefMath('\hat Digested', UTF(0x5E), + operator_role => 'OVERACCENT', operator_stretchy => 'false'); +DefMath('\check Digested', "\x{02C7}", + operator_role => 'OVERACCENT', operator_stretchy => 'false'); # CARON +DefMath('\breve Digested', "\x{02D8}", operator_role => 'OVERACCENT'); # BREVE +DefMath('\acute Digested', UTF(0xB4), operator_role => 'OVERACCENT'); # ACUTE ACCENT +DefMath('\grave Digested', UTF(0x60), operator_role => 'OVERACCENT'); # GRAVE ACCENT +DefMath('\tilde Digested', UTF(0x7E), + operator_role => 'OVERACCENT', operator_stretchy => 'false'); # TILDE +DefMath('\bar Digested', UTF(0xAF), + operator_role => 'OVERACCENT', operator_stretchy => 'false'); # MACRON +DefMath('\vec Digested', "\x{2192}", + operator_role => 'OVERACCENT', operator_stretchy => 'false'); # RIGHTWARDS ARROW +DefMath('\dot Digested', "\x{02D9}", operator_role => 'OVERACCENT'); # DOT ABOVE +DefMath('\ddot Digested', UTF(0xA8), operator_role => 'OVERACCENT'); # DIAERESIS +DefMath('\widehat Digested', UTF(0x5E), operator_role => 'OVERACCENT'); # CIRCUMFLEX ACCENT [plain? also amsfonts] +DefMath('\widetilde Digested', UTF(0x7E), operator_role => 'OVERACCENT'); # TILDE [plain? also amsfonts] +# These aren't handled as simple accents by TeX, so no Digested +DefMath('\overbrace {}', "\x{23DE}", operator_role => 'OVERACCENT', # TOP CURLY BRACKET + scriptpos => 'mid', robust => 1); +DefMath('\underbrace {}', "\x{23DF}", operator_role => 'UNDERACCENT', # BOTTOM CURLY BRACKET + scriptpos => 'mid', robust => 1); + +# NOTE that all the above accents REQUIRE math mode +# EXCEPT underline, overrightarrow and overleftarrow! + +Let('\underbar', '\underline'); # Will anyone notice? + +DefMacro('\overrightarrow{}', '\protect\ifmmode\math@overrightarrow{#1}\else$\math@overrightarrow{#1}$\fi'); +DefMacro('\overleftarrow{}', '\protect\ifmmode\math@overleftarrow{#1}\else$\math@overleftarrow{#1}$\fi'); + +DefMacro('\skew{}{}{}', '{#2{#3\mkern#1mu}\mkern-#1mu}{}'); # ignore the subtle spacing for now? +#---------------------------------------------------------------------- +# LaTeX; Table 3.10. Delimiters, p.47 +#---------------------------------------------------------------------- +# The meaning of OPEN/CLOSE tends to depend upon the pairing, +# rather than the individual tokens. +# This meaning is handled in MathParser (for now) + +DefMacroI('\{', undef, '\ifmmode\lx@math@lbrace\else\lx@text@lbrace\fi', protected => 1); +DefMacroI('\}', undef, '\ifmmode\lx@math@rbrace\else\lx@text@rbrace\fi', protected => 1); +DefMathI('\lx@math@lbrace', undef, '{', role => 'OPEN', stretchy => 'false', alias => '\{'); +DefMathI('\lx@math@rbrace', undef, '}', role => 'CLOSE', stretchy => 'false', alias => '\}'); +DefPrimitiveI('\lx@text@lbrace', undef, '{', alias => '\{', + # font => { specialize => "{" }); + font => { shape => 'upright' }, bounded => 1); # Since not DefMath! +DefPrimitiveI('\lx@text@rbrace', undef, '}', alias => '\}', + # font => { specialize => "}" }); # Since not DefMath! + font => { shape => 'upright' }, bounded => 1); # Since not DefMath! +Let('\lbrace', '\{'); +Let('\lbrack', T_OTHER('[')); +Let('\rbrace', '\}'); +Let('\rbrack', T_OTHER(']')); +DefMathI('\lceil', undef, "\x{2308}", role => 'OPEN', stretchy => 'false'); # LEFT CEILING +DefMathI('\rceil', undef, "\x{2309}", role => 'CLOSE', stretchy => 'false'); # RIGHT CEILING +DefMathI('\lfloor', undef, "\x{230A}", role => 'OPEN', stretchy => 'false'); # LEFT FLOOR +DefMathI('\rfloor', undef, "\x{230B}", role => 'CLOSE', stretchy => 'false'); # RIGHT FLOOR + # Note: We should be using 27E8,27E9, which are "mathematical", not 2329,232A +DefMathI('\langle', undef, "\x{27E8}", role => 'OPEN', stretchy => 'false'); # LEFT-POINTING ANGLE BRACKET +DefMathI('\rangle', undef, "\x{27E9}", role => 'CLOSE', stretchy => 'false'); # RIGHT-POINTING ANGLE BRACKET + +# Not sure these should be defined here, or latex, or even latex compat mode. +DefMathI('\lgroup', undef, "(", font => { series => 'bold' }, role => 'OPEN', stretchy => 'false'); +DefMathI('\rgroup', undef, ")", font => { series => 'bold' }, role => 'CLOSE', stretchy => 'false'); +DefMathI('\bracevert', undef, "|", font => { series => 'bold' }, role => 'VERTBAR'); + +## DefMath('\lmoustache',"???", font=>{series=>'bold'}, role=>'OPEN'); +## DefMath('\rmoustache',"???", font=>{series=>'bold'}, role=>'OPEN'); + +# TeX marks some symbols as delimiters which can be used with \left,\right, +# but many of which have different grammatical roles otherwise, eg. arrows, <, >. +# Short of setting up TeX's complicated encoding machinery, I need an explicit +# mapping. Unfortunately, this doesn't (yet) support people declaring thier own delimiters! + +# These originally had Token as parameter, rather than {}..... Why? +# Note that in TeX, \big{((} will only enlarge the 1st paren!!! +DefConstructor('\big {}', '#1', bounded => 1, font => { size => 'big' }); +DefConstructor('\Big {}', '#1', bounded => 1, font => { size => 'Big' }); +DefConstructor('\bigg {}', '#1', bounded => 1, font => { size => 'bigg' }); +DefConstructor('\Bigg {}', '#1', bounded => 1, font => { size => 'Bigg' }); + +sub addDelimiterRole { + my ($document, $role) = @_; + my $current = $document->getNode; + my $delim = $document->getLastChildElement($current) || $current; + my $delim_role = (($delim && ($delim->nodeType == XML_ELEMENT_NODE) && $delim->getAttribute('role')) || ''); + # if there is some delimiter-like role on the "delimiter", switch it, otherwise, leave it alone! + if ($delim && ($delim_role =~ /^(OPEN|MIDDLE|CLOSE|VERTBAR|)$/)) { + ## Maybe we shouldn't switch VERTBAR ? + ## The catch is that occasionally people use a single \Bigl (or whatever) + ## where they should have used a \Big + $document->setAttribute($delim, role => $role); } + return; } + +# The "m" versions are defined in e-Tex and other places. +DefConstructor('\bigl {}', '#1', bounded => 1, font => { size => 'big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); +DefConstructor('\bigm {}', '#1', bounded => 1, font => { size => 'big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); +DefConstructor('\bigr {}', '#1', bounded => 1, font => { size => 'big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); + +DefConstructor('\Bigl {}', '#1', bounded => 1, font => { size => 'Big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); +DefConstructor('\Bigm {}', '#1', bounded => 1, font => { size => 'Big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); +DefConstructor('\Bigr {}', '#1', bounded => 1, font => { size => 'Big' }, + afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); + +DefConstructor('\biggl {}', '#1', bounded => 1, font => { size => 'bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); +DefConstructor('\biggm {}', '#1', bounded => 1, font => { size => 'bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); +DefConstructor('\biggr {}', '#1', bounded => 1, font => { size => 'bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); + +DefConstructor('\Biggl {}', '#1', bounded => 1, font => { size => 'Bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); +DefConstructor('\Biggm {}', '#1', bounded => 1, font => { size => 'Bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); +DefConstructor('\Biggr {}', '#1', bounded => 1, font => { size => 'Bigg' }, + afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); + +Let('\vert', T_OTHER('|')); +Let('\Vert', '\|'); + +#====================================================================== +# TeX Book, Appendix B. p. 360 + +# \choose, et al, already handle above. + +DefMacro('\mathpalette{}{}', + '\mathchoice{#1\displaystyle{#2}}{#1\textstyle{#2}}' + . '{#1\scriptstyle{#2}}{#1\scriptscriptstyle{#2}}'); + +DefConstructor('\phantom{}', + "?#isMath()" + . "(#1)", # !?!?!?! + properties => { isSpace => 1 }, + afterDigest => sub { + my $whatsit = $_[1]; + my ($w, $h, $d) = $whatsit->getArg(1)->getSize; + $whatsit->setProperties(width => $w, height => $h, depth => $d); + return; }); + +DefConstructor('\hphantom{}', + "?#isMath()" + . "(#1)", # !?!?!?! + properties => { isSpace => 1 }, + afterDigest => sub { + my $whatsit = $_[1]; + my ($w, $h, $d) = $whatsit->getArg(1)->getSize; + $whatsit->setProperties(width => $w, height => $h, depth => $d); + return; }); + +DefConstructor('\vphantom{}', + "?#isMath()" + . "(#1)", # !?!?!?! + properties => { isSpace => 1 }, + afterDigest => sub { + my $whatsit = $_[1]; + my ($w, $h, $d) = $whatsit->getArg(1)->getSize; + $whatsit->setProperties(width => $w, height => $h, depth => $d); + return; }); + +DefConstructor('\mathstrut', "?#isMath()()", + properties => { isSpace => 1 }); +DefConstructor('\smash{}', "#1"); # well, what? + +#====================================================================== +# TeX Book, Appendix B. p. 361 + +# This is actually LaTeX's definition, but let's just do it this way. +DefConstructor('\sqrt OptionalInScriptStyle Digested', + "?#1(" + . "#1#2" + . ")" + . "(" + . "#2)"); + +DefParameterType('ScriptStyleUntil', sub { + my ($gullet, $until) = @_; + $gullet->readUntil($until); }, + beforeDigest => sub { + $_[0]->bgroup; + MergeFont(mathstyle => 'script'); }, + afterDigest => sub { + $_[0]->egroup; }, + reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); + +DefConstructor('\root ScriptStyleUntil:\of {}', + "" + . "#1#2" + . "", + reversion => '\root #1 \of {#2}'); + +#---------------------------------------------------------------------- +# LaTeX; Table 3.9. Log-like Functions, p.44. +#---------------------------------------------------------------------- +# NOTE: Classifying some as TRIGFUNCTION might clarify 'pi' ambiguities ? +DefMathI('\arccos', undef, "arccos", role => 'OPFUNCTION', meaning => 'inverse-cosine'); +DefMathI('\arcsin', undef, "arcsin", role => 'OPFUNCTION', meaning => 'inverse-sine'); +DefMathI('\arctan', undef, "arctan", role => 'OPFUNCTION', meaning => 'inverse-tangent'); +DefMathI('\arg', undef, "arg", role => 'OPFUNCTION', meaning => 'argument'); + +DefMathI('\cos', undef, "cos", role => 'TRIGFUNCTION', meaning => 'cosine'); +DefMathI('\cosh', undef, "cosh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cosine'); +DefMathI('\cot', undef, "cot", role => 'TRIGFUNCTION', meaning => 'cotangent'); +DefMathI('\coth', undef, "coth", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cotangent'); + +DefMathI('\csc', undef, "csc", role => 'TRIGFUNCTION', meaning => 'cosecant'); +DefMathI('\deg', undef, "deg", role => 'OPFUNCTION', meaning => 'degree'); +DefMathI('\det', undef, "det", role => 'LIMITOP', meaning => 'determinant', + scriptpos => \&doScriptpos); +DefMathI('\dim', undef, "dim", role => 'LIMITOP', meaning => 'dimension'); + +DefMathI('\exp', undef, "exp", role => 'OPFUNCTION', meaning => 'exponential'); +DefMathI('\gcd', undef, "gcd", role => 'OPFUNCTION', meaning => 'gcd', + scriptpos => \&doScriptpos); +DefMathI('\hom', undef, "hom", role => 'OPFUNCTION'); +DefMathI('\inf', undef, "inf", role => 'LIMITOP', meaning => 'infimum', + scriptpos => \&doScriptpos); + +DefMathI('\ker', undef, "ker", role => 'OPFUNCTION', meaning => 'kernel'); +DefMathI('\lg', undef, "lg", role => 'OPFUNCTION'); +DefMathI('\lim', undef, "lim", role => 'LIMITOP', meaning => 'limit', + scriptpos => \&doScriptpos); +DefMathI('\liminf', undef, "lim inf", role => 'LIMITOP', meaning => 'limit-infimum', + scriptpos => \&doScriptpos); + +DefMathI('\limsup', undef, "lim sup", role => 'LIMITOP', meaning => 'limit-supremum', + scriptpos => \&doScriptpos); +DefMathI('\ln', undef, "ln", role => 'OPFUNCTION', meaning => 'natural-logarithm'); +DefMathI('\log', undef, "log", role => 'OPFUNCTION', meaning => 'logarithm'); +DefMathI('\max', undef, "max", role => 'OPFUNCTION', meaning => 'maximum', + scriptpos => \&doScriptpos); + +DefMathI('\min', undef, "min", role => 'OPFUNCTION', meaning => 'minimum', + scriptpos => \&doScriptpos); +DefMathI('\Pr', undef, "Pr", role => 'OPFUNCTION', scriptpos => \&doScriptpos); +DefMathI('\sec', undef, "sec", role => 'TRIGFUNCTION', meaning => 'secant'); +DefMathI('\sin', undef, "sin", role => 'TRIGFUNCTION', meaning => 'sine'); + +DefMathI('\sinh', undef, "sinh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-sine'); +DefMathI('\sup', undef, "sup", role => 'LIMITOP', meaning => 'supremum', + scriptpos => \&doScriptpos); +DefMathI('\tan', undef, "tan", role => 'TRIGFUNCTION', meaning => 'tangent'); +DefMathI('\tanh', undef, "tanh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent'); + +#---------------------------------------------------------------------- +# Modulo + +DefMath('\pmod{}', '\;\;(\mathop{{\rm mod}} #1)', role => 'MODIFIER'); # , meaning=>'modulo'); +DefMath('\bmod', 'mod', role => 'MODIFIEROP', meaning => 'modulo'); + +#====================================================================== +# TeX Book, Appendix B. p. 362 + +DefMacro('\matrix{}', + '\lx@gen@plain@matrix{name=matrix,datameaning=matrix}{#1}'); + +DefMacro('\bordermatrix{}', # Semantics? + '\lx@hack@bordermatrix{\lx@gen@plain@matrix{name=bordermatrix}{#1}}'); +# HACK the newly created border matrix to add columns for the (spanned) parentheses!!! +# Assume (for now) that there's no XMDual structure here. +# What is the semantics, anyway? +DefConstructor('\lx@hack@bordermatrix{}', sub { + my ($document, $matrix) = @_; + $document->absorb($matrix); + my $marray = $document->getNode->lastChild; + my @rows = $document->findnodes('ltx:XMRow', $marray); + my ($h, $d) = (10.0 * $UNITY, 0); # 10pts. + # Contrived, since $matrix may be a List or... + my ($alignment) = grep { $_ } map { $_->getProperty('alignment') } $matrix->unlist; + if ($alignment) { + my $arrayh = $alignment->getHeight->ptValue; + my ($row0, $row1) = $alignment->rows; # What's row 0 ? + $h = $$row1{y}->valueOf; + $d = $h - $arrayh; } + my $md = Dimension(-$d); + $h = Dimension($h); $d = Dimension($d); + + foreach my $row (@rows) { # Add empty cells for 2nd & last colum + $document->openElementAt($row, 'ltx:XMCell'); + $document->openElementAt($row, 'ltx:XMCell'); + $row->insertAfter($row->lastChild, $row->firstChild); # Move to 2nd pos! + } + my @cols = element_nodes($rows[1]); + my $col1 = $cols[1]; + my $coln = $cols[-1]; + my $n = scalar(@rows) - 1; + $col1->setAttribute(rowspan => $n); + $coln->setAttribute(rowspan => $n); + my $pfont = $STATE->lookupValue('font')->specialize('('); + $document->appendTree($col1, + ['ltx:XMWrap', { depth => $d }, + ['ltx:XMTok', { role => 'OPEN', height => 0, depth => $d, yoffset => $md, font => $pfont }, '('], + ['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]); # Effectively, a strut + $document->appendTree($coln, + ['ltx:XMWrap', {}, + ['ltx:XMTok', { role => 'CLOSE', height => 0, depth => $d, yoffset => $md, font => $pfont }, ')'], + ['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]); + return; }, + reversion => '#1'); + +DefMacro('\pmatrix{}', + '\lx@gen@plain@matrix{name=pmatrix,datameaning=matrix,left=\@left(,right=\@right)}{#1}'); + +# Note that 2nd column in \cases is in text mode! +DefMacro('\cases{}', + '\lx@gen@plain@cases{meaning=cases,left=\@left\{,conditionmode=text,style=\textstyle}{#1}'); + +DefPrimitive('\openup Dimension', undef); + +# What should this do? (needs to work with alignments..) +# see https://www.tug.org/TUGboat/tb07-1/tb14beet.pdf +# use in arXiv:hep-th/0001208 +DefMacro('\displaylines{}', '\halign{\hbox to\displaywidth{$\hfil\displaystyle##\hfil$}\crcr#1\crcr}'); + +DefMacro('\eqalign{}', + '\@@eqalign{\@start@alignment#1\@finish@alignment}'); +DefConstructor('\@@eqalign{}', + '#1', + reversion => '\eqalign{#1}', bounded => 1, + beforeDigest => sub { alignmentBindings('rl', 'math', + attributes => { vattach => 'baseline' }); }); + +DefMacro('\eqalignno{}', + '\@@eqalignno{\@start@alignment#1\@finish@alignment}'); +DefConstructor('\@@eqalignno{}', + '#1', + reversion => '\eqalignno{#1}', bounded => 1, + beforeDigest => sub { alignmentBindings('rll', 'math', + attributes => { vattach => 'baseline' }); }); + +DefMacro('\leqalignno{}', + '\@@leqalignno{\@start@alignment#1\@finish@alignment}'); +DefConstructor('\@@leqalignno{}', + '#1', + reversion => '\leqalignno{#1}', bounded => 1, + beforeDigest => sub { alignmentBindings('rll', 'math', + attributes => { vattach => 'baseline' }); }); + +DefRegister('\pageno' => Number(0)); +DefRegister('\headline' => Tokens()); +DefRegister('\footline' => Tokens()); +DefMacroI('\folio', undef, "1"); # What else? + +DefPrimitiveI('\nopagenumbers', undef, undef); +DefMacroI('\advancepageno', undef, '\advance\pageno1\relax'); + +#====================================================================== +# TeX Book, Appendix B. p. 363 + +DefPrimitive('\raggedbottom', undef); +DefPrimitive('\normalbottom', undef); + +# if the mark is not simple, we add it to the content of the note +# otherwise, to the attribute. +DefConstructor('\footnote{}{}', + "^?#prenote(#prenote )()#2", + mode => 'text', bounded => 1, + beforeDigest => sub { reenterTextMode(1); neutralizeFont(); }, + afterDigest => sub { + my ($stomach, $whatsit) = @_; + my $mark = $whatsit->getArg(1); + my $change = 0; + foreach my $token (Revert($mark)) { + unless ($token->getCatcode == CC_LETTER || $token->getCatcode == CC_SPACE || + $token->getCatcode == CC_OTHER) { + $change = 1; last; } } + $whatsit->setProperty(($change ? 'prenote' : 'mark') => $mark); + return; }); +# Until we can do the "v" properly: +DefMacro('\vfootnote', '\footnote'); +DefMacro('\fo@t', '\ifcat\bgroup\noexpand\next \let\next\f@@t \else\let\next\f@t\fi \next'); +DefMacro('\f@@t', '\bgroup\aftergroup\@foot\let\next'); +DefMacro('\f@t{}', '#1\@foot'); +DefMacro('\@foot', '\strut\egroup'); + +DefPrimitiveI('\footstrut', undef, undef); +DefRegister('\footins' => Number(0)); + +DefPrimitiveI('\topinsert', undef, undef); +DefPrimitiveI('\midinsert', undef, undef); +DefPrimitiveI('\pageinsert', undef, undef); +DefPrimitiveI('\endinsert', undef, undef); +# \topins ? + +#====================================================================== +# TeX Book, Appendix B. p. 364 + +# Let's hope nobody is messing with the output routine... + +DefPrimitiveI('\footnoterule', undef, undef); + +#====================================================================== +# End of TeX Book definitions. +#====================================================================== + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Stuff that probably doesn't belong here (LaTeX? or nowhere?) +#DefMacro('\vspace{}', '\vskip#1\relax'); + +#====================================================================== +# In principle, is a nice markup for emphasized. +# Unfortunately, TeX really just treats it as a font switch. +# Something like: \em et.al. \rm more stuff +# works in TeX, but in our case, since there is no explicit {}, +# the stays open! Ugh! +# This could still be made to work, but merge font would +# need to look at any open , and then somehow close it! +DefPrimitiveI('\em', undef, undef, + beforeDigest => sub { + my $font = LookupValue('font'); + my $shape = $font->getShape; + AssignValue(font => $font->merge(shape => ($shape eq 'italic' ? 'normal' : 'italic')), + 'local'); }); + +# Change math font while still in text! +DefPrimitiveI('\boldmath', undef, undef, + beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 1), 'local'); }, + forbidMath => 1); +DefPrimitiveI('\unboldmath', undef, undef, + beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 0), 'local'); }, + forbidMath => 1); + +#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +1; diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 746f35e31..da02014d8 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -1979,7 +1979,7 @@ my %definition_name = ( # [CONSTANT] 'ldf' => 'language definitions', 'def' => 'definitions', 'dfu' => 'definitions'); my $findfile_options = { # [CONSTANT] - type => 1, notex => 1, noltxml => 1, searchpaths_only => 1 }; + type => 1, notex => 1, noltxml => 1, searchpaths_only => 1, installation_subdir => 1 }; sub FindFile { my ($file, %options) = @_; @@ -2033,7 +2033,8 @@ sub FindFile_aux { # If we're looking for ltxml, look within our paths & installation first (faster than kpse) if (!$options{noltxml} - && ($path = pathname_find("$file.ltxml", paths => $ltxml_paths, installation_subdir => 'Package'))) { + && ($path = pathname_find("$file.ltxml", paths => $ltxml_paths, + installation_subdir => $options{installation_subdir} || 'Package'))) { return $path; } # Else if we're interpreting rawtex, and can find the file as is, take it. elsif (!$options{notex} && ($interpreting || $interpretable) @@ -2457,7 +2458,7 @@ sub AddToMacro { my $inputdefinitions_options = { # [CONSTANT] options => 1, withoptions => 1, handleoptions => 1, type => 1, as_class => 1, noltxml => 1, notex => 1, noerror => 1, after => 1, - at_letter => 1, searchpaths_only => 1, reloadable => 1 }; + at_letter => 1, searchpaths_only => 1, reloadable => 1, installation_subdir => 1 }; # options=>[options...] # withoptions=>boolean : pass options from calling class/package # after=>code or tokens or string as $name.$type-h@@k macro. (executed after the package is loaded) @@ -2494,7 +2495,9 @@ sub InputDefinitions { "Option clash for file $filename with options '$curroptions'", "previously loaded with '$prevoptions'") unless $curroptions eq $prevoptions; } } if (my $file = FindFile($filename, type => $options{type}, - notex => $options{notex}, noltxml => $options{noltxml}, searchpaths_only => $options{searchpaths_only})) { + notex => $options{notex}, noltxml => $options{noltxml}, searchpaths_only => $options{searchpaths_only}, + installation_subdir => $options{installation_subdir} + )) { my $pushpop = LookupDefinition(T_CS('\@pushfilename')) && LookupDefinition(T_CS('\@popfilename')); if ($options{handleoptions}) { @@ -2638,7 +2641,8 @@ sub LoadClass { sub LoadPool { my ($pool) = @_; $pool = ToString($pool) if ref $pool; - if (my $success = InputDefinitions($pool, type => 'pool', notex => 1, noerror => 1)) { + if (my $success = InputDefinitions($pool, type => 'pool', notex => 1, noerror => 1, + installation_subdir => 'Engine')) { return $success; } else { Error('missing_file', "$pool.pool.ltxml", $STATE->getStomach->getGullet, diff --git a/lib/LaTeXML/Package/TeX.pool.ltxml b/lib/LaTeXML/Package/TeX.pool.ltxml deleted file mode 100644 index ad8254a9a..000000000 --- a/lib/LaTeXML/Package/TeX.pool.ltxml +++ /dev/null @@ -1,7811 +0,0 @@ -# -*- mode: Perl -*- -# /=====================================================================\ # -# | TeX | # -# | Core TeX Implementation for LaTeXML | # -# |=====================================================================| # -# | Part of LaTeXML: | # -# | Public domain software, produced as part of work done by the | # -# | United States Government & not subject to copyright in the US. | # -# |---------------------------------------------------------------------| # -# | Bruce Miller #_# | # -# | http://dlmf.nist.gov/LaTeXML/ (o o) | # -# \=========================================================ooo==U==ooo=/ # -package LaTeXML::Package::Pool; -use strict; -use warnings; -use LaTeXML::Package; -use Unicode::Normalize; -use LaTeXML::Util::Pathname; -use List::Util qw(min max); - -# NOTE that these define the namespaces we'll (probably) use -# along with the prefixes to be used in "code" -# The generated XML will use the prefixes defined by RegisterDocumentNamespace(...) (if ever) -# or those prefixes defined by the Schema (typically RelaxNGSchema(..) -RegisterNamespace(ltx => "http://dlmf.nist.gov/LaTeXML"); -RegisterNamespace(svg => "http://www.w3.org/2000/svg"); -RegisterNamespace(xlink => "http://www.w3.org/1999/xlink"); # Needed for SVG -# Not directly used, but let's stake out the ground -RegisterNamespace(m => "http://www.w3.org/1998/Math/MathML"); -RegisterNamespace(xhtml => "http://www.w3.org/1999/xhtml"); -# Namespace for arbitrary data attributes (mapped to data-xxx in html5) -RegisterNamespace(data => "http://dlmf.nist.gov/LaTeXML/data"); - -DefMacroI("\\\@empty", undef, Tokens()); - -#====================================================================== -# Core ID functionality. -#====================================================================== -# DOCUMENTID is the ID of the document -# AND prefixes IDs on all other elements. -if (my $docid = LookupValue('DOCUMENTID')) { - # Wrap in T_OTHER so funny chars don't screw up (no space!) - DefMacroI('\thedocument@ID', undef, T_OTHER($docid)); } -else { - Let('\thedocument@ID', '\@empty'); } -NewCounter('@XMARG', 'document', idprefix => 'XM'); - -#====================================================================== - -Tag('ltx:document', afterOpen => \&ProcessPendingResources); -RequireResource('LaTeXML.css'); -#====================================================================== -# The default "initial context" for XML+RDFa specifies some default -# terms and prefixes, but no default vocabulary. -# Ought to have a default for @vocab, but settable? -# can we detect use of simple "term"s in attributes so we know whether we need @vocab? -# Ought to have a default set of prefixes from RDFa Core, -# but allow prefixes to be added. -# Probably ought to scan rdf attributes for all uses of prefixes, -# and include them in @prefix -# The following prefixes are listed in http://www.w3.org/2011/rdfa-context/rdfa-1.1 -{ - my %rdf_prefixes = ( - "cc" => "http://creativecommons.org/ns#", - "ctag" => "http://commontag.org/ns#", - "dc" => "http://purl.org/dc/terms/", - "dcterms" => "http://purl.org/dc/terms/", - "ical" => "http://www.w3.org/2002/12/cal/icaltzd#", - "foaf" => "http://xmlns.com/foaf/0.1/", - "gr" => "http://purl.org/goodrelations/v1#", - "grddl" => "http://www.w3.org/2003/g/data-view#", - "ma" => "http://www.w3.org/ns/ma-ont#", - "og" => "http://ogp.me/ns#", - "owl" => "http://www.w3.org/2002/07/owl#", - "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfa" => "http://www.w3.org/ns/rdfa#", - "rdfs" => "http://www.w3.org/2000/01/rdf-schema#", - "rev" => "http://purl.org/stuff/rev#", - "rif" => "http://www.w3.org/2007/rif#", - "rr" => "http://www.w3.org/ns/r2rml#", - "schema" => "http://schema.org/", - "sioc" => "http://rdfs.org/sioc/ns#", - "skos" => "http://www.w3.org/2004/02/skos/core#", - "skosxl" => "http://www.w3.org/2008/05/skos-xl#", - "v" => "http://rdf.data-vocabulary.org/#", - "vcard" => "http://www.w3.org/2006/vcard/ns#", - "void" => "http://rdfs.org/ns/void#", - "xhv" => "http://www.w3.org/1999/xhtml/vocab#", - "xml" => "http://www.w3.org/XML/1998/namespace", - "xsd" => "http://www.w3.org/2001/XMLSchema#", - "wdr" => "http://www.w3.org/2007/05/powder#", - "wdrs" => "http://www.w3.org/2007/05/powder-s#", - ); - - foreach my $p (keys %rdf_prefixes) { - AssignMapping('RDFa_prefixes', $p => $rdf_prefixes{$p}); } -} - -#********************************************************************** -# CORE TeX; Built-in commands. -#********************************************************************** - -#====================================================================== -# Define parsers for standard parameter types. - -DefParameterType('Plain', sub { - my ($gullet, $inner) = @_; - my $value = $gullet->readArg(); - if ($inner) { - ($value) = $inner->reparseArgument($gullet, $value); } - $value; }, - reversion => sub { - my ($arg, $inner) = @_; - (T_BEGIN, - ($inner ? $inner->revertArguments($arg) : Revert($arg)), - T_END); }); - -DefParameterType('DefPlain', sub { - my ($gullet, $inner) = @_; - my $value = $gullet->readBalanced(0, 1, 1); - if ($inner) { - ($value) = $inner->reparseArgument($gullet, $value); } - return $value; }, - reversion => sub { - my ($arg, $inner) = @_; - (T_BEGIN, - ($inner ? $inner->revertArguments($arg) : Revert($arg)), - T_END); }); - -DefParameterType('Optional', sub { - my ($gullet, $default, $inner) = @_; - my $value = $gullet->readOptional; - if (!$value && $default) { - $value = $default; } - elsif ($inner) { - ($value) = $inner->reparseArgument($gullet, $value); } - $value; }, - optional => 1, - reversion => sub { - my ($arg, $default, $inner) = @_; - my @rev_arg = $arg ? ( - $inner ? $inner->revertArguments($arg) : Revert($arg)) - : (); - if (@rev_arg) { - return (T_OTHER('['), @rev_arg, T_OTHER(']')); } - else { return (); } }); - -# This is a peculiar type of argument of the form -# = { -# however, does get expanded while searching for the initial { -# which IS required in contrast to a general argument; ie a single token is not correct. -DefParameterType('GeneralText', sub { - my ($gullet) = @_; - $gullet->unread($gullet->readXToken); # Force expansion to skip before required { - - return $gullet->readBalanced(0, 0, 1); }); - -DefParameterType('Until', sub { - my ($gullet, $until) = @_; - $gullet->readUntil($until); }, - reversion => sub { - my ($arg, $until) = @_; - (Revert($arg), Revert($until)); }); - -# Skip any spaces, but don't contribute an argument. -DefParameterType('SkipSpaces', sub { $_[0]->skipSpaces; 1; }, novalue => 1); - -DefParameterType('Skip1Space', sub { $_[0]->skip1Space; 1; }, novalue => 1); - -# Read the next token -DefParameterType('Token', sub { $_[0]->readToken; }); - -# Read the next token, after expanding any expandable ones. -DefParameterType('XToken', sub { $_[0]->readXToken; }); - -# Read a number -DefParameterType('Number', sub { $_[0]->readNumber; }); - -# Read a floating point number -DefParameterType('Float', sub { $_[0]->readFloat; }); - -sub ReadFloat { - my ($gullet) = @_; - $gullet->skipSpaces; - return ($gullet->readFloat || Float(0)); } - -# Read a dimension -DefParameterType('Dimension', sub { $_[0]->readDimension; }); - -# Read a Glue (aka skip) -DefParameterType('Glue', sub { $_[0]->readGlue; }); - -# Read a MuDimension (math) -DefParameterType('MuDimension', sub { $_[0]->readMuDimension; }); - -# Read a MuGlue (math) -DefParameterType('MuGlue', sub { $_[0]->readMuGlue; }); - -# Read until the next (balanced) open brace { -# used for the last TeX-style delimited argument -DefParameterType('UntilBrace', sub { - my ($gullet) = @_; - $gullet->readUntilBrace; }); - -# Yet another special case: Require a { but do not read it!!! -DefParameterType('RequireBrace', sub { - my ($gullet) = @_; - if (my $tok = $gullet->readToken) { - $gullet->unread($tok); - if ($tok->getCatcode != CC_BEGIN) { - Error('expected', '{', $gullet, "Expected a { here; Got " . Stringify($tok)); } - $tok; } }, - novalue => 1); - -DefParameterType('XUntil', sub { - my ($gullet, $until) = @_; - ($until) = $until->unlist; # Make sure it's a single token!!! - my ($token, @tokens) = (); - while ($token = $gullet->readXToken(0)) { - if ($token->equals($until)) { - last; } - elsif ($token->getCatcode == CC_BEGIN) { - push(@tokens, $token, $gullet->readBalanced, T_END); } - elsif (my $defn = LookupDefinition($token)) { - push(@tokens, Invocation($token, - ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ()))); } - else { - push(@tokens, $token); } } - Tokens(@tokens); }); - -# This reads a braced tokens list, expanding as it goes, -# but expanding \the-like commands only once. -DefParameterType('Expanded', sub { - my ($gullet) = @_; - $gullet->readBalanced(1, 0, 1); }, - reversion => sub { - my ($arg) = @_; - (T_BEGIN, Revert($arg), T_END); }); - -# This reads an expanded definition body, -# a braced tokens list, expanding as it goes, -# but expanding \the-like commands only once, -# and also packing # parameters -DefParameterType('DefExpanded', sub { - my ($gullet) = @_; - return $gullet->readBalanced(1, 1, 1); }, - reversion => sub { - my ($arg) = @_; - (T_BEGIN, Revert($arg), T_END); }); - -# Read a matching keyword, eg. Match:= -DefParameterType('Match', sub { shift->readMatch(@_); }); - -# Read a keyword; eg. Keyword:to -# (like Match, but ignores catcodes) -DefParameterType('Keyword', sub { shift->readKeyword(@_); }); - -# Read balanced material (?) -DefParameterType('Balanced', sub { $_[0]->readBalanced; }); - -# Read a Semiverbatim argument; ie w/ most catcodes neutralized. -DefParameterType('Semiverbatim', sub { $_[0]->readArg; }, semiverbatim => 1, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# Read a LaTeX-style optional argument (ie. in []), but the contents read as Semiverbatim. -DefParameterType('OptionalSemiverbatim', sub { $_[0]->readOptional; }, - semiverbatim => 1, optional => 1, - reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); - -# Be careful here: if % appears before the initial {, it's still a comment! -# Also, note that non-typewriter fonts will mess up some chars on digestion! -DefParameterType('Verbatim', sub { - my ($gullet) = @_; - $gullet->readUntil(T_BEGIN); - StartSemiverbatim('%', '\\'); - my $arg = $gullet->readBalanced(); - EndSemiverbatim(); - return $arg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(family => 'typewriter'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# Read Verbatim, but allows expanding command sequences -DefParameterType('HyperVerbatim', sub { - my ($gullet) = @_; - $gullet->readUntil(T_BEGIN); - StartSemiverbatim('%'); - DefMacroI('\%', undef, T_OTHER('%'), scope => 'local'); - DefMacroI('\#', undef, T_OTHER('#'), scope => 'local'); - DefMacroI('\&', undef, T_OTHER('&'), scope => 'local'); - DefMacroI('\textunderscore', undef, T_OTHER('_'), scope => 'local'); - Let('\_', '\textunderscore'); - DefMacroI('\hyper@tilde', undef, T_OTHER('~'), scope => 'local'); - Let('\~', '\hyper@tilde'); - Let('\textasciitilde', '\hyper@tilde'); - Let('\\\\', '\@backslashchar'); - my $arg = $gullet->readBalanced(1); - EndSemiverbatim(); - return $arg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(family => 'typewriter'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# Read an argument that will not be digested. -DefParameterType('Undigested', sub { $_[0]->readArg; }, undigested => 1, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# Read a LaTeX-style optional argument (ie. in []), but it will not be digested. -DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; }, - undigested => 1, optional => 1, - reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); - -# Read a keyword value (KeyVals), that will not be digested. -DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1); -DefParameterType('UndigestedDefKey', sub { - $_[0]->readArg->packParameters; }, undigested => 1); - -# Read a token as used when defining it, ie. it may be enclosed in braces. -DefParameterType('DefToken', sub { - my ($gullet) = @_; - my $token = $gullet->readToken; - while ($token && ($token->getCatcode == CC_BEGIN)) { - my $cc; - my @toks = grep { ($cc = $$_[1]) && ($cc != CC_SPACE) && ($cc != CC_COMMENT); } - $gullet->readBalanced->unlist; - $token = shift(@toks); - $gullet->unread(@toks); } - $token; }, - undigested => 1); - -# Stub register for misdefinitions, to avoid a cascade of Errors. -DefRegisterI('\lx@DUMMY@REGISTER', undef, Tokens()); - -# Read a variable, ie. a token (after expansion) that is a writable register. -DefParameterType('Variable', sub { - my ($gullet) = @_; - my $token = $gullet->readXToken; - my $defn = $token && LookupDefinition($token); - if ((defined $defn) && $defn->isRegister && !$defn->isReadonly) { - [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; } - else { - DefRegisterI($token, undef, Dimension(0)); # Don't really know what KIND of variable! - if ($token && ($token->getCatcode == CC_CS)) { - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "Defining it now."); - DefRegisterI($token, undef, Dimension(0)); # Dimension, or what? - return [LookupDefinition($token)]; } - else { - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "But it is not even definable."); - return [LookupDefinition(T_CS('\lx@DUMMY@REGISTER'))]; } } }, - reversion => sub { - my ($var) = @_; - my ($defn, @args) = @$var; - my $params = $defn->getParameters; - return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); - -# Same, but not necessarily writable -DefParameterType('Register', sub { - my ($gullet) = @_; - my $token = $gullet->readXToken; - my $defn = $token && LookupDefinition($token); - if ((defined $defn) && $defn->isRegister) { - [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; } - else { - if ($token && ($token->getCatcode == CC_CS)) { - if ($token->getString eq '\font') { - # \font is a bit of a register-like exception - return [$defn]; } - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "Defining it now."); - DefRegisterI($token, undef, Dimension(0)); # Dimension, or what? - return [LookupDefinition($token)]; } - else { - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "But it is not even definable."); - return [LookupDefinition(T_CS('\lx@DUMMY@REGISTER'))]; } } }, - reversion => sub { - my ($var) = @_; - my ($defn, @args) = @$var; - my $params = $defn->getParameters; - return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); - -DefParameterType('TeXFileName', sub { - my ($gullet) = @_; - my ($token, $cc, @tokens) = (); - $gullet->skipSpaces; - while (($token = $gullet->readXToken(0)) - && (($cc = $token->getCatcode) != CC_SPACE) && ($cc != CC_EOL) && ($cc != CC_COMMENT) && ($cc != CC_CS)) { - push(@tokens, $token); } - $gullet->unread($token) unless ($cc == CC_SPACE) || ($cc == CC_EOL) || ($cc == CC_COMMENT); - # Strip outer "" ??? - if ((scalar(@tokens) > 1) && ($tokens[0]->equals(T_OTHER('"'))) && ($tokens[-1]->equals(T_OTHER('"')))) { - shift(@tokens); pop(@tokens); } - Tokens(@tokens); }); - -# A LaTeX style directory List -DefParameterType('DirectoryList', sub { - my ($gullet) = @_; - my $arg_string = ToString($gullet->readArg); - my @dirs = (); - for my $dir (split(/,|\\par|\n+/, $arg_string)) { - $dir =~ s/^\s+//; - $dir =~ s/\s+$//; - next unless $dir; - while ($dir =~ s/^\s*\{([^\}]*)\}//) { - push @dirs, $1 if $1; } - push @dirs, $dir if $dir; } - LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, itemopen => T_BEGIN, itemclose => T_END, - type => LaTeXML::Package::parseParameters(ToString("Semiverbatim"), "CommaList")->[0], - values => [@dirs]); }); - -# This reads a Box as needed by \raise, \lower, \moveleft, \moveright. -# Hopefully there are no issues with the box being digested -# as part of the reader??? -DefParameterType('MoveableBox', sub { - my ($gullet) = @_; - $gullet->skipSpaces; - my ($box, @stuff) = $STATE->getStomach->invokeToken($gullet->readXToken); - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($box)) - unless $box; -#### && $box->isa('LaTeXML::Core::Whatsit') -#### && ($box->getDefinition->getCSName =~ /^(\\hbox|\\vbox||\\vtop)$/); - $box; }); - -# Read a parenthesis delimited argument. -# Note that this does NOT balance () within the argument. -DefParameterType('BalancedParen', sub { - my ($gullet) = @_; - my $tok = $gullet->readXToken; - if (ref $tok && ToString($tok) eq '(') { - $gullet->readUntil(T_OTHER(')')); - } else { - $gullet->unread($tok) if ref $tok; - undef; } }, - reversion => sub { - (T_OTHER('('), Revert($_[0]), T_OTHER(')')); }); - -# Read a digested argument, digesting as it is being read. -# The usual macro parameter (generally written as {}) gets tokenized and digested -# in separate stages, w/o recognizing any special macros or catcode changes within (eg. \url). -# Rarely, you need a parameter that gets digested AS IT'S READ until ending }. -# Note that this also recognizes args as \bgroup ... \engroup -# It is useful when the content would usually need to have been \protect'd -# in order to correctly deal with catcodes. -# BEWARE: This is NOT a shorthand for a simple digested {}! -DefParameterType('Digested', sub { - no warnings 'recursion'; - my ($gullet) = @_; - $gullet->skipSpaces; - my $ismath = $STATE->lookupValue('IN_MATH'); - my @list = (); - my $token; - do { $token = $gullet->readXToken(0); - } while (defined $token && (($token->getCatcode == CC_SPACE) || $token->equals(T_CS('\relax')))); - if (!defined $token) { } - elsif ($token->getCatcode == CC_BEGIN) { - Digest($token); - push(@list, $STATE->getStomach->digestNextBody()); pop(@list); } # content w/o the braces - else { - push(@list, $STATE->getStomach->invokeToken($token)); } - @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; - List(@list, mode => ($ismath ? 'math' : 'text')); }, - undigested => 1, # since _already_ digested. - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# A variation: Digest until we encounter a given token! -DefParameterType('DigestUntil', sub { - my ($gullet, $until) = @_; - ($until) = $until->unlist; # Make sure it's a single token!!! - $gullet->skipSpaces; - my $ismath = $STATE->lookupValue('IN_MATH'); - my @list = $STATE->getStomach->digestNextBody($until); - @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; - List(@list, mode => ($ismath ? 'math' : 'text')); }, - undigested => 1, # since _already_ digested. - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -# Reads until the current group has ended. -# This is useful for environment-like constructs, -# particularly alignments (which may or may not be actual environments), -# but which need special treatment of some of their content -# as the expansion is carried out. -DefParameterType('DigestedBody', sub { - my ($gullet) = @_; - my $ismath = $STATE->lookupValue('IN_MATH'); - my @list = $STATE->getStomach->digestNextBody(); - # In most (all?) cases, we're really looking for a single Whatsit here... - @list = grep { ref $_ ne 'LaTeXML::Core::Comment' } @list; - List(@list, mode => ($ismath ? 'math' : 'text')); }, - undigested => 1); - -# In addition to the standard TeX Dimension, there are various LaTeX constructs -# (particularly, the LaTeX picture environment, and the various pstricks packages) -# that take a different sort of length. They differ in two ways. -# (1) They do not accept a comma as decimal separator -# (they generally use it to separate coordinates), and -# (2) They accept a plain float which is scaled against a Dimension register. -# Actually, there are two subcases: -# (a) picture accepts a float, which is scaled against \unitlength -# (b) pstricks accepts a float, and optionally a unit, -# If the unit is omitted, it is relative to \psxunit or \psyunit. -# How to capture these ? -## DefParameterType('Length', sub { -## my($gullet,$unit)=@_; - -# CommaList expects something like {balancedstuff,...} -DefParameterType('CommaList', sub { - my ($gullet, $type) = @_; - my $typedef = $type && LaTeXML::Package::parseParameters(ToString($type), "CommaList")->[0]; - my @items = (); - if ($gullet->ifNext(T_BEGIN)) { - $gullet->readToken; - my @tokens = (); - my $comma = T_OTHER(','); - while (my $token = $gullet->readToken) { - my $cc = $token->getCatcode; - if ($cc == CC_END) { - push(@items, Tokens(@tokens)); - last; } - elsif ($token->equals($comma)) { - push(@items, Tokens(@tokens)); @tokens = (); } - elsif ($cc == CC_BEGIN) { - push(@tokens, $token, $gullet->readBalanced, T_END); } - else { - push(@tokens, $token); } } - if ($typedef) { - @items = map { [$typedef->reparseArgument($gullet, $_)]->[0] } @items; } } - else { - # If no brace, just read one item or token, but still make Array! - push(@items, ($typedef ? $typedef->readArguments($gullet, "CommaList") - : ($gullet->readToken))); } - LaTeXML::Core::Array->new(open => T_BEGIN, close => T_END, type => $typedef, - values => [@items]); }); - -### Support for Key / Value arguments. -## The very basic form is -## RequiredKeyVals: $keyset -## OptionalKeyVals: $keyset -## to parse Key-Value pairs from a given keyset (see the 'keyval' package -## documentation for more information). These types of KeyVal -## parameters will return a LaTeXML::Core::KeyVals object, which can then be -## used to access the values of the individual items. -## The difference between the two forms is that RequiredKeyVals expects a set of -## key-value pairs wrapped in T_BEGIN T_END, where as OptionalKeyVals optionally -## expects a set of KeyValue pairs wrapped in T_OTHER('[') T_OTHER(']') -## -## Several extension of the keyval package exist, the most common one we support -## is the xkeyval package. This introduces further variations on the keyval -## arguments parsing, in particular it allows to read keys from more than one -## keyset at once. These can be specified by giving comma-seperated values in -## the keyset argument. By default, a key will only be set in the **first** -## keyset it occurs in. By using -## RequiredKeyVals+: $keysets -## OptionalKeyVals+: $keysets -## the key will be set in all keysets instead. -## -## All keys to be parsed with these arguments should be declared using -## DefKeyVal in LaTeXML::Package. By default, an error is thrown if an unknown -## key is encountered. To surpress this behaviour, and instead store all -## undefined keys, use -## RequiredKeyVals*: $keysets -## OptionalKeyVals*: $keysets -## instead. The '*' and '+' modifiers can be combined by using: -## RequiredKeyVals*+: $keysets -## OptionalKeyVals*+: $keysets -## -## Furthermore, the xkeyval package supports giving prefixes to keys, -## RequiredKeyVals[*][+]: $prefix|$keysets -## OptionalKeyVals[*][+]: $prefix|$keysets -## -## Finally, it is possible to specify specific keys to skip when digesting the -## object. This can be achieved using comma-seperated key values in -## RequiredKeyVals[*][+]: $prefix|$keysets|$skip -## OptionalKeyVals[*][+]: $prefix|$keysets|$skip - -# function to handle all the -sub KeyVals_aux { - my ($gullet, $until, $spec, %options) = @_; - my ($star, $plus, $prefix, $keysets, $skip) = @{$spec}; - - # support both "keysets" and "prefix|keysets" - unless (defined($keysets)) { - $keysets = $prefix; - $prefix = undef; - - # to emulate old behaviour, throw no errors - # when we have a single keyset and no prefix (or no keyset at all) - $star = 1 if (!defined($keysets) || index(',', $keysets) == -1); } - - # create a new set of Key-Value arguments - my $keyvals = LaTeXML::Core::KeyVals->new( - $prefix, $keysets, - setAll => $plus, setInternals => 1, - skip => $skip, skipMissing => $star); - - # and read it from the gullet - $keyvals->readFrom($gullet, $until) if defined($until); - - # we still want to make use of the hash - return $keyvals; } - -sub RequiredKeyVals { - my ($star, $plus, $gullet, @keyspec) = @_; - my $until; - - if ($gullet->ifNext(T_BEGIN)) { - $until = T_END; } - else { - Error('expected', '{', $gullet, "Missing keyval arguments"); } - - return (KeyVals_aux($gullet, $until, [$star, $plus, @keyspec])); } - -DefParameterType('RequiredKeyVals', sub { RequiredKeyVals(0, 0, @_); }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('RequiredKeyVals*', sub { RequiredKeyVals(1, 0, @_); }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('RequiredKeyVals+', sub { RequiredKeyVals(0, 1, @_); }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('RequiredKeyVals*+', sub { RequiredKeyVals(1, 1, @_); }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -sub OptionalKeyVals { - my ($star, $plus, $gullet, @keyspec) = @_; - if ($gullet->ifNext(T_OTHER('['))) { - return (KeyVals_aux($gullet, T_OTHER(']'), [$star, $plus, @keyspec])); } - else { return (undef); } } - -sub revert_optional_keyvals { - my @r = grep { defined $_ } ($_[0] && Revert($_[0])); - return @r ? (T_OTHER('['), @r, T_OTHER(']')) : (); } - -DefParameterType('OptionalKeyVals', sub { OptionalKeyVals(0, 0, @_); }, - optional => 1, reversion => \&revert_optional_keyvals); -DefParameterType('OptionalKeyVals*', sub { OptionalKeyVals(1, 0, @_); }, - optional => 1, reversion => \&revert_optional_keyvals); -DefParameterType('OptionalKeyVals+', sub { OptionalKeyVals(0, 1, @_); }, - optional => 1, reversion => \&revert_optional_keyvals); -DefParameterType('OptionalKeyVals*+', sub { OptionalKeyVals(1, 1, @_); }, - optional => 1, reversion => \&revert_optional_keyvals); - -# Not sure that this is the most elegant solution, but... -# What I'd really like are some sort of parameter modifiers, mathstyle, font... until...? -DefParameterType('DisplayStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(mathstyle => 'display'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('TextStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(mathstyle => 'text'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('ScriptStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(mathstyle => 'script'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -DefParameterType('ScriptscriptStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(mathstyle => 'scriptscript'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -# Perverse naming convention: not script style, but in the style of a script relative to current. -DefParameterType('InScriptStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(scripted => 1); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -# NOTE: the various parameter features don't combine easily!! -# I need a ScriptStyleUntil for \root!!! -# I also need to redo fractions using these new types.... -DefParameterType('OptionalInScriptStyle', sub { - $_[0]->readOptional; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(scripted => 1); }, - afterDigest => sub { - $_[0]->egroup; }, - optional => 1, - reversion => sub { ($_[0] ? (T_OTHER('['), Revert($_[0]), T_OTHER(']')) : ()); }); -DefParameterType('InFractionStyle', sub { - $_[0]->readArg; }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(fraction => 1); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); -#********************************************************************** -# LaTeX has a very particular notion of "Undefined", -# so let's get that squared away at the outset; it's useful for TeX, too! -# Naturally, it uses \csname to check, which ends up DEFINING the possibly undefined macro as \relax -DefMacro('\@ifundefined{}{}{}', sub { - my ($gullet, $name, $if, $else) = @_; - my $cs = T_CS('\\' . ToString(Expand($name))); - if (IsDefined($cs)) { - return $else->unlist; } - else { - $STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssign - return $if->unlist; } }, - locked => 1); - -sub isDefinable { - my ($token) = @_; - return unless $token; - my $meaning = LookupMeaning($token); - my $name = $token->getString; $name =~ s/^\\//; - return (((!defined $meaning) || ($meaning eq LookupMeaning(T_CS('\relax'))) - || LookupValue('2.09_COMPATIBILITY')) # Let redefinitions happen in compatibility mode. - && (($name ne 'relax') && ($name !~ /^end/))); } - -#********************************************************************** -# Expandable Primitives -# See The TeXBook, Ch. 20, Definitions (also called Macros) pp. 212--215 -#********************************************************************** - -#====================================================================== -# Should complain if we aren't actually evaluating an \if - -# The following special cases are built-in to Definition -DefConditional('\else', undef); -DefConditional('\or', undef); -DefConditional('\fi', undef); -DefConditional('\ifcase Number', undef); - -sub compare { - my ($u, $rel, $v) = @_; - $u = $u->valueOf if ref $u; - $v = $v->valueOf if ref $v; - if ($rel->equals(T_OTHER('<')) || $rel->equals(T_CS('\@@<'))) { - return $u < $v; } - elsif ($rel->equals(T_OTHER('='))) { - return $u == $v; } - elsif ($rel->equals(T_OTHER('>')) || $rel->equals(T_CS('\@@>'))) { - return $u > $v; } - else { - Error('expected', '', $STATE->getStomach->getGullet, - "Expected a relational token for comparision", "Got " . Stringify($rel)); - return; } } - -DefConditional('\ifnum Number Token Number', sub { compare($_[1], $_[2], $_[3]); }); -DefConditional('\ifdim Dimension Token Dimension', sub { compare($_[1], $_[2], $_[3]); }); -DefConditional('\ifodd Number', sub { $_[1]->valueOf % 2; }); - -# NOTE: We don't KNOW if we're in vertical, horizontal or inner mode!!!!!!! -DefConditionalI('\ifvmode', undef, sub { 0; }); -DefConditionalI('\ifhmode', undef, sub { 0; }); -DefConditionalI('\ifinner', undef, sub { 0; }); - -DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); }); - -DefParameterType('ExpandedIfToken', sub { - my ($gullet) = @_; - my $token = $gullet->readXToken(0, 1); - if (!$token) { - Error('expected', 'ExpandedIfToken', $gullet, - "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); - $token = T_CS('\@empty'); } - return $token; }); - -DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; }); -DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; }); -DefConditional('\ifx Token Token', sub { XEquals($_[1], $_[2]); }); - -# Kinda rough: We don't really keep track of modes as carefully as TeX does. -# We'll assume that a box is horizontal if there's anything at all, -# but it's not a vbox (!?!?) -sub classify_box { - my ($boxnum) = @_; - my $box = LookupValue('box' . $boxnum->valueOf); - if (!$box) { - return ''; } - elsif ($box->isa('LaTeXML::Core::Whatsit') && ($box->getDefinition eq LookupDefinition(T_CS('\vbox')))) { - return 'vbox'; } - else { - return 'hbox'; } } - -DefConditional('\ifvoid Number', sub { !classify_box($_[1]); }); -DefConditional('\ifhbox Number', sub { classify_box($_[1]) eq 'hbox'; }); -DefConditional('\ifvbox Number', sub { classify_box($_[1]) eq 'vbox'; }); - -DefConditionalI('\iftrue', undef, sub { 1; }); -DefConditionalI('\iffalse', undef, sub { 0; }); - -#====================================================================== -# This makes \relax disappear completely after digestion -# (which seems most TeX like). -DefPrimitive('\relax', sub { (); }); -### However, this keeps a box, so it can appear in UnTeX -### DefPrimitive('\relax',undef); -## But if you do that, you've got to watch out since it usually -### shouldn't be a box; See the isRelax code in handleScripts, below -# Internal token produced by Gullet in response to \dont_expand; -# Acts like \relax, but isn't equal to it. -DefPrimitiveI('\special_relax', undef, sub { (); }); - -DefMacro('\number Number', sub { Explode($_[1]->valueOf); }); -# define it here (only approxmiately), since it's already useful. -Let('\protect', '\relax'); - -#====================================================================== - -DefMacro('\romannumeral Number', sub { roman($_[1]->valueOf); }); -# Hmm... I wonder, should getString itself be dealing with escapechar? -sub escapechar { - my $code = LookupRegister('\escapechar')->valueOf; - return (($code >= 0) && ($code <= 255) ? chr($code) : ''); } - -# 1) Knuth, The TeXBook, page 40, paragraph 1, Chapter 7: How TEX Reads What You Type. -# suggests all characters except spaces are returned in category code Other, i.e. Explode() -DefMacro('\string Token', sub { - my $s = $_[1]->toString; - if ($s =~ s/^\\//) { - $s = escapechar() . $s; } - Explode($s); }); - -DefMacroI('\jobname', undef, Tokens()); # Set to the filename by initialization - -DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); }); - -our @CATCODE_MEANING = ( - "the escape character", "begin-group character", - "end-group character", "math shift character", - "alignment tab character", "end-of-line character", - "macro parameter character", "superscript character", - "subscript character", "ignored character", - "blank space", "the letter", - "the character", "active character", - "comment character", "invalid character", - undef, "latexml marker character", - "macro parameter character"); - -# Not sure about this yet... -# NOTE: Lots of back-and-forth mangle with definition vs cs; don't do that! -DefMacro('\meaning Token', sub { - my ($gullet, $tok) = @_; - my $meaning = 'undefined'; - if (my $definition = ($tok->defined_as(T_ALIGN) ? $tok : LookupMeaning($tok))) { - my $type = ref $definition; - $type =~ s/^LaTeXML:://; - # Pre-step: We can't extract the bodies of definitions which are defined via Perl subroutines. - # So do the next best thing -- represent them as their tokens. - if ($type =~ /(primitive|conditional|constructor)$/i) { - $definition = $definition->getCSorAlias; - $type = ref $definition; - $type =~ s/^LaTeXML:://; - if (my $fontinfo = LookupValue('fontinfo_' . ToString($definition))) { - $meaning = 'select font ' . ($$fontinfo{fontname} || 'fontname'); - $meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at}; - $type = 'font'; } } - # The actual tests start here - if ($type =~ /token$/i) { - my $cc = $definition->getCatcode; - my $char = $definition->toString; - my $meaning_cc = $CATCODE_MEANING[$cc] || ''; - $meaning_cc .= ' ' if $meaning_cc; # append space separator if defined - $meaning = $meaning_cc . $char; } - elsif ($type =~ /register$/i) { - $meaning = $definition->getAddress; } - elsif ($type =~ /expandable$/i) { -# short-circuit some troublesome discrepancies with TeX, which end up macros on LaTeXML's end, but \meaning expects as primitives in the CTAN ecosystem. - my $cs = ToString($definition->getCSorAlias); - # These exceptions could be extended further, as we add more .sty/.cls support - return Explode($cs) if $cs =~ /^\\(?:(?:un)?expanded|detokenize)$/; - my $expansion = $definition->getExpansion; - my $ltxps = $definition->getParameters; - my $arg_index = 0; - my @spec_parts = (); - my @params = $ltxps ? $ltxps->getParameters : (); - my $p_trailer = ''; - - for my $param (@params) { - my $p_spec = $$param{spec}; - if ($p_spec eq 'RequireBrace') { - # tex's \meaning prints out the required braces for "\def\a#{}" variants - $p_trailer = '{'; - $p_spec = '{'; } - elsif ($p_spec eq 'UntilBrace') { # should only ever be used in the last argument? - $p_trailer = '{'; - $p_spec = "#" . (++$arg_index) . '{'; } - elsif ($p_spec =~ s/^Match://) { } # just match, don't increment arg index - elsif ($p_spec =~ s/^\w?Until(\w*)://) { # implied argument at this slot - $p_spec = "#" . (++$arg_index) . $p_spec; } - else { # regular parameter, increment - next if $$param{novalue}; # skip the latexml-only requirement params, but only here, since Match also have "novalue" set. - $p_spec = "#" . (++$arg_index); } - push @spec_parts, $p_spec; } - my $spec = join("", @spec_parts); - $spec =~ s/\{\}//g; - $spec =~ s/Token//g; - my $prefixes = join('', - ($definition->isProtected ? '\protected' : ()), - ($definition->isLong ? '\long' : ()), - ($definition->isOuter ? '\outer' : ()), - ); - my $expansion_str = ''; - if (ref $expansion eq 'LaTeXML::Core::Tokens') { - $expansion_str = writableTokens($expansion); } - else { - $expansion_str = ToString($expansion); } - $meaning = ($prefixes ? $prefixes . ' ' : '') . - "macro:$spec->$expansion_str$p_trailer"; } - elsif ($type =~ /chardef$/i) { # from \chardef or \mathchardef - my $prefix = ($$definition{mathglyph} ? '\mathchar' : '\char'); - $meaning = $prefix . '"' . $definition->valueOf->valueOf; } } - # One catch: make sure all \s in the meaning string are normalized to a simple space ' ' - $meaning =~ s/\s/ /g; - return Explode($meaning); }); - -DefParameterType('CSName', sub { $_[0]->readCSName; }); - -DefMacro('\csname CSName', sub { - my ($gullet, $token) = @_; - $STATE->assignMeaning($token, $STATE->lookupMeaning(T_CS('\relax'))) unless defined LookupMeaning($token); - $token; }); - -DefPrimitive('\endcsname', sub { - my ($stomach) = @_; - Error('unexpected', '\endcsname', $_[0], "Extra \\endcsname", - $stomach->getGullet->showUnexpected); - return; }); - -DefMacro('\expandafter Token Token', sub { - no warnings 'recursion'; - my ($gullet, $tok, $xtok) = @_; - my $defn; - if (defined($defn = $STATE->lookupExpandable($xtok))) { - my @x = (); - { - local $LaTeXML::CURRENT_TOKEN = $xtok; - @x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY! - } - ($tok, @x); } - elsif (!$STATE->lookupMeaning($xtok)) { - # Undefined token is an error, as expansion is expected. - # BUT The unknown token is NOT consumed, (see TeX B book, item 367) - # since probably in a real TeX run it would have been defined. - $STATE->generateErrorStub($gullet, $xtok); - ($tok, $xtok); } - else { - ($tok, $xtok); } }); - -use constant T_expandafter => T_CS('\expandafter'); -DefMacro('\expandafter Token Token', sub { - no warnings 'recursion'; - my ($gullet, $tok, $xtok) = @_; - my $defn; - my @skipped = ($tok); - while ($xtok->defined_as(T_expandafter)) { - push(@skipped, $gullet->readToken); - $xtok = $gullet->readToken; } - if (defined($defn = $STATE->lookupExpandable($xtok))) { - my @x = (); - { - local $LaTeXML::CURRENT_TOKEN = $xtok; - @x = $defn->invoke($gullet, 1); # Expand $xtok ONCE ONLY! - } - (@skipped, @x); } - elsif (!$STATE->lookupMeaning($xtok)) { - # Undefined token is an error, as expansion is expected. - # BUT The unknown token is NOT consumed, (see TeX B book, item 367) - # since probably in a real TeX run it would have been defined. - $STATE->generateErrorStub($gullet, $xtok); - (@skipped, $xtok); } - else { - (@skipped, $xtok); } }); - -# If next token is expandable, prefix it with the internal marker \dont_expand -# That token is never defined, explicitly handled in Gullet & should never escape the Gullet -DefMacroI('\noexpand', undef, sub { - my $token = $_[0]->readToken; - # Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed - return ($token - ? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token) - ? (T_CS('\dont_expand'), $token) - : $token) - : ()); }); - -DefPrimitiveI('\dont_expand', undef, sub { - Error('misdefined', '\dont_expand', $_[0], - "The token \\dont_expand should never reach Stomach!"); }); - -DefMacroI('\topmark', undef, Tokens()); -DefMacroI('\firstmark', undef, Tokens()); -DefMacroI('\botmark', undef, Tokens()); -DefMacroI('\splitfirstmark', undef, Tokens()); -DefMacroI('\splitbotmark', undef, Tokens()); - -DefMacro('\input TeXFileName', sub { - my $filename = $_[1]; - my @t = $filename->unlist; - # If given a LaTeX-style argument, strip braces - if (@t && $t[0] && $t[0]->getCatcode == CC_BEGIN && $t[-1]->getCatcode == CC_END) { - $filename = Tokens(@t[1 .. $#t - 1]); - # and load LaTeX.pool if not already - if (!LookupValue('LaTeX.pool_loaded')) { - LoadPool("LaTeX"); } } - Input($filename, reloadable => 1); }); - -# Note that TeX doesn't actually close the mouth; -# it just flushes it so that it will close the next time it's read! -DefMacroI('\endinput', undef, sub { $_[0]->flushMouth; }); - -# \the -DefMacro('\the Register', sub { - my ($gullet, $variable) = @_; - return () unless $variable; - my ($defn, @args) = @$variable; - if (!$defn || $defn eq 'missing') { - Error('expected', "", $gullet, "a register was expected to be here"); return (); } - my $type = $defn->isRegister; - if (!$type) { - my $cs = ToString($defn->getCS); - if ($cs eq '\font') { # what to do here? - return T_CS('\tenrm'); } - Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); } - my $value = $defn->valueOf(@args); - ## In all cases, these should be OTHER, except for space. (!?) - my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value))); - return @tokens; }); - -#********************************************************************** -# Primitives -# See The TeXBook, Chapter 24, Summary of Vertical Mode -# and Chapter 25, Summary of Horizontal Mode. -# Parsing of basic types (pp.268--271) is (mostly) handled in Gullet.pm -#********************************************************************** - -#====================================================================== -# Registers & Parameters -# See Chapter 24, Summary of Vertical Mode -# Define a whole mess of useless registers here ... -# Values are from Appendix B, pp. 348-349 (for whatever its worth) -#====================================================================== - -#====================================================================== -# Integer registers; TeXBook p. 272-273 - -DefRegister('\tracingmacros', Number(0), - getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_MACROS); }, - setter => sub { my $p = (LookupValue('TRACING') || 0); - AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_MACROS : $p & ~TRACE_MACROS)); }); -DefRegister('\tracingcommands', Number(0), - getter => sub { Number((LookupValue('TRACING') || 0) & TRACE_COMMANDS); }, - setter => sub { my $p = (LookupValue('TRACING') || 0); - AssignValue(TRACING => ($_[0]->valueOf ? $p | TRACE_COMMANDS : $p & ~TRACE_COMMANDS)); }); -{ - my %iparms = ( - pretolerance => 100, tolerance => 200, hbadness => 1000, vbadness => 1000, - linepenalty => 10, hyphenpenalty => 50, exhyphenpenalty => 50, - binoppenalty => 700, relpenalty => 500, - clubpenalty => 150, widowpenalty => 150, displaywidowpenalty => 50, - brokenpenalty => 100, predisplaypenalty => 10000, - postdisplaypenalty => 0, interlinepenalty => 0, - floatingpenalty => 0, outputpenalty => 0, - doublehyphendemerits => 10000, finalhyphendemerits => 5000, adjdemerits => 10000, - looseness => 0, pausing => 0, - holdinginserts => 0, tracingonline => 0, tracingstats => 0, - tracingparagraphs => 0, tracingpages => 0, tracingoutput => 0, - tracinglostchars => 1, - tracingrestores => 0, language => 0, uchyph => 1, lefthyphenmin => 0, - righthyphenmin => 0, globaldefs => 0, defaulthyphenchar => ord('-'), defaultskewchar => -1, - escapechar => ord('\\'), endlinechar => ord("\r"), newlinechar => -1, maxdeadcycles => 0, hangafter => 0, - fam => -1, mag => 1000, magnification => 1000, delimiterfactor => 0, - time => 0, day => 0, month => 0, year => 0, - showboxbreadth => 5, showboxdepth => 3, errorcontextlines => 5); - - foreach my $p (keys %iparms) { - DefRegister("\\$p", Number($iparms{$p})); } -} - -# Most of these are ignored, but... -DefMacro('\tracingall', - '\tracingonline=1 \tracingcommands=2 \tracingstats=2' - . ' \tracingpages=1 \tracingoutput=1 \tracinglostchars=1' - . ' \tracingmacros=2 \tracingparagraphs=1 \tracingrestores=1' - . ' \showboxbreadth=\maxdimen \showboxdepth=\maxdimen \errorstopmode'); -DefMacroI('\tracingnone', undef, Tokens()); -DefMacroI('\hideoutput', undef, Tokens()); -# This may mess up Daemon state? -{ my ($sec, $min, $hour, $mday, $mon, $year) = defined $ENV{SOURCE_DATE_EPOCH} ? gmtime($ENV{SOURCE_DATE_EPOCH}) : localtime(); - AssignValue('\day' => Number($mday), 'global'); - AssignValue('\month' => Number($mon + 1), 'global'); - AssignValue('\year' => Number(1900 + $year), 'global'); - AssignValue('\time' => Number(60 * $hour + $min), 'global'); } - -our @MonthNames = (qw( January February March April May June - July August September October November December)); - -# Return a string for today's date. -sub today { - return $MonthNames[LookupValue('\month')->valueOf - 1] - . " " . LookupValue('\day')->valueOf - . ', ' . LookupValue('\year')->valueOf; } - -# Read-only Integer registers -{ - my %ro_iparms = (lastpenalty => 0, badness => 0); - foreach my $p (keys %ro_iparms) { - DefRegister("\\$p", Number($ro_iparms{$p}), readonly => 1); } -} - -# Special integer registers (?) -# = \spacefactor | \prevgraf | \deadcycles | \insertpenalties -{ - my %sp_iparms = (spacefactor => 0, prevgraf => 0, deadcycles => 0, insertpenalties => 0); - foreach my $p (keys %sp_iparms) { - DefRegister("\\$p", Number($sp_iparms{$p})); } -} - -#====================================================================== -# Dimen registers; TeXBook p. 274 -{ - my %dparms = ( - hfuzz => '0.1pt', vfuzz => '0.1pt', overfullrule => '5pt', - emergencystretch => 0, - hsize => '6.5in', vsize => '8.9in', - maxdepth => '4pt', splitmaxdepth => '16383.99999pt', boxmaxdepth => '16383.99999pt', - lineskiplimit => 0, - delimitershortfall => '5pt', nulldelimiterspace => '1.2pt', scriptspace => '0.5pt', - mathsurround => 0, - predisplaysize => 0, displaywidth => 0, displayindent => 0, parindent => '20pt', - hangindent => 0, hoffset => 0, voffset => 0,); - - foreach my $p (keys %dparms) { - DefRegister("\\$p", Dimension($dparms{$p})); } -} - -# Special dimension registers (?) -# = \prevdepth | \pagegoal | \pagetotal | \pagestretch | \pagefilstretch -# | \pagefillstretch | \pagefilllstretch | pageshrink | \pagedepth -{ - my %sp_dparms = ( - prevdepth => 0, pagegoal => 0, pagetotal => 0, pagestretch => 0, pagefilstretch => 0, - pagefillstretch => 0, pagefilllstretch => 0, pageshrink => 0, pagedepth => 0); - foreach my $p (keys %sp_dparms) { - DefRegister("\\$p", Dimension($sp_dparms{$p})); } -} -#====================================================================== -# Glue registers; TeXBook p.274 -{ - my %gparms = ( - baselineskip => '12pt', lineskip => '1pt', - parskip => '0pt plus 1pt', - abovedisplayskip => '12pt plus 3pt minus 9pt', - abovedisplayshortskip => '0pt plus 3pt', - belowdisplayskip => '12pt plus 3pt minus 9pt', - belowdisplayshortskip => '0pt plus 3pt', - leftskip => 0, rightskip => 0, topskip => '10pt', splittopskip => '10pt', - tabskip => 0, spaceskip => 0, xspaceskip => 0, parfillskip => '0pt plus 1fil'); - - foreach my $p (keys %gparms) { - DefRegister("\\$p", Glue($gparms{$p})); } -} -#====================================================================== -# MuGlue registers; TeXBook p.274 -{ - DefRegister('\thinmuskip' => MuGlue("3mu")); - DefRegister('\medmuskip' => MuGlue("4mu plus 2mu minus 4mu")); - DefRegister('\thickmuskip' => MuGlue("5mu plus 5mu")); -} -#====================================================================== -# Token registers; TeXBook p.275 -{ - my @tparms = qw(output everypar everymath everydisplay everyhbox everyvbox - everyjob everycr everyhelp); - foreach my $p (@tparms) { - DefRegister("\\$p", Tokens()); } -} -#====================================================================== -# Assignment, TeXBook Ch.24, p.275 -#====================================================================== -# = | - -#====================================================================== -# Macros -# See Chapter 24, p.275-276 -# = | -# = -# = \def | \gdef | \edef | \xdef -# = - -sub parseDefParameters { - my ($cs, $params) = @_; - my @tokens = $params->packParameters->unlist; - # Now, recognize parameters and delimiters. - my @params = (); - my $n = 0; - while (@tokens) { - my $t = shift(@tokens); - my $cc = $$t[1]; - if ($cc == CC_PARAM || $cc == CC_ARG) { - if ($cc == CC_PARAM) { - if (!@tokens) { # Special case: lone # NOT following a numbered parameter - # Note that we require a { to appear next, but do NOT read it! - push(@params, LaTeXML::Core::Parameter->new('RequireBrace', 'RequireBrace')); - last; } - else { - $n++; $t = shift(@tokens); } } - else { # CC_ARG case, keep looking at this token - $n++; } - Fatal('expected', "#$n", $STATE->getStomach, - "Parameters for '" . ToString($cs) . "' not in order in " . ToString($params)) - unless (defined $t) && ($n == int($$t[0])); - # Check for delimiting text following the parameter #n - my @delim = (); - my $pc = -1; - INNER_DELIM: while (@tokens) { - my $inner_cc = $tokens[0]->getCatcode; - last INNER_DELIM if $inner_cc == CC_PARAM || $inner_cc == CC_ARG; - my $d = shift(@tokens); - push(@delim, $d) unless $pc == CC_SPACE && $inner_cc == CC_SPACE; # BUT collapse whitespace! - $pc = $inner_cc; } - # Found text that marks the end of the parameter - if (@delim) { - my $expected = Tokens(@delim); - push(@params, LaTeXML::Core::Parameter->new('Until', - 'Until:' . ToString($expected), - extra => [$expected])); } - # Special case: trailing sole # => delimited by next opening brace. - elsif ((scalar(@tokens) == 1) && ($tokens[0]->getCatcode == CC_PARAM)) { - shift(@tokens); - push(@params, LaTeXML::Core::Parameter->new('UntilBrace', 'UntilBrace')); } - # Nothing? Just a plain parameter. - else { - push(@params, LaTeXML::Core::Parameter->new('Plain', '{}')); } } - else { - # Initial delimiting text is required. - my @lit = ($t); - my $lit_cc; - while (@tokens && ($lit_cc = $tokens[0]->getCatcode) && - ($lit_cc != CC_PARAM && $lit_cc != CC_ARG)) { - push(@lit, shift(@tokens)); } - my $expected = Tokens(@lit); - push(@params, LaTeXML::Core::Parameter->new('Match', - 'Match:' . ToString($expected), - extra => [$expected], - novalue => 1)); } - } - return (@params ? LaTeXML::Core::Parameters->new(@params) : undef); } - -sub do_def { - my ($globally, $gullet, $cs, $params, $body) = @_; - if (!$cs) { - Error('expected', 'Token', $gullet, "Expected definition token"); - return; } - elsif (!$params) { - Error('misdefined', $cs, $gullet, "Expected definition parameter list"); - return; } - $params = parseDefParameters($cs, $params); - # noprep=>1 : leave preparing the ##, #1-#9 tokens to the Def parameter types - # to avoid carrying around the masks around and keep core code simple - $STATE->installDefinition(LaTeXML::Core::Definition::Expandable->new($cs, $params, $body, - nopackParameters => 1), - ($globally ? 'global' : undef)); - AfterAssignment(); - return; } - -DefPrimitive('\def SkipSpaces Token UntilBrace DefPlain', sub { do_def(0, @_); }, locked => 1); -DefPrimitive('\gdef SkipSpaces Token UntilBrace DefPlain', sub { do_def(1, @_); }, locked => 1); -DefPrimitive('\edef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(0, @_); }, locked => 1); -DefPrimitive('\xdef SkipSpaces Token UntilBrace DefExpanded', sub { do_def(1, @_); }, locked => 1); - -# = \global | \long | \outer -# See Stomach.pm & Stomach.pm -DefPrimitiveI('\global', undef, sub { $STATE->setPrefix('global'); return; }, isPrefix => 1); -DefPrimitiveI('\long', undef, sub { $STATE->setPrefix('long'); return; }, isPrefix => 1); -DefPrimitiveI('\outer', undef, sub { $STATE->setPrefix('outer'); return; }, isPrefix => 1); - -#====================================================================== -# Non-Macro assignments; TeXBook Ch.24, pp 276--277 -# = | \global - -# = | \relax -# = { - -# = | -# | | | -# | | | -# | \read to -# | \setbox<8bit> -# | \font -# | -# = -# | -# | -# | -# | -# | -# = at | scaled | -# = <8bit> - -sub lookupFontinfo { - my ($token) = @_; - my $defn = LookupDefinition($token); - # return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); } - return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); } - -# This should eventually actually load the font metrics, -# and tie-in to the FontMetrics data used by Font. -DefPrimitive('\font SkipSpaces Token SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { - my ($stomach, $cs, $name) = @_; - my $gullet = $stomach->getGullet; - $name = ToString($name); - my ($at, $scaled); - if ($gullet->readKeyword('at')) { $at = $gullet->readDimension; } - if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; } - my %props = LaTeXML::Common::Font::decodeFontname($name, - $at && $at->ptValue, $scaled && $scaled->valueOf / 1000); - if (!keys %props) { # Failed? - Info('unexpected', $name, $stomach, "Unrecognized font name '$name'", - "Font switch macro " . ToString($cs) . " will have no effect"); } - else { - $props{fontname} = $name; } - my $f = ($at ? $at->divide(Dimension('1em'))->valueOf - : ($scaled ? $scaled->valueOf / 1000 - : 1)); - my $fontinfo = \%props; - $$fontinfo{data} = [map { $_->multiply($f); } - Dimension(0), Dimension('0.5em'), Dimension(0), - Dimension(0), Dimension('1ex'), Dimension('1em')]; - $gullet->skipSpaces; - # Store the font info & metrics - AssignValue('fontinfo_' . ToString($cs) => $fontinfo); - # The font $cs should select the font - DefPrimitiveI($cs, undef, undef, font => $fontinfo); - return; }); - -# Not sure what this should be... -DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' }); - -DefRegister('\count Number' => Number(0)); -DefRegister('\dimen Number' => Dimension(0)); -DefRegister('\skip Number' => Glue(0)); -DefRegister('\muskip Number' => MuGlue(0)); -DefRegister('\toks Number' => Tokens()); - -# = | | \count<8bit> -# = | | \dimen<8bit> -# = | | \skip<8bit> -# = | | \muskip<8bit> - -# = \advance -# | \advance -# | \advance -# | \advance -# | \multiply -# | \divide - -DefPrimitive('\advance Variable SkipKeyword:by', sub { - my ($stomach, $var) = @_; - return () unless $var; - my ($defn, @args) = @$var; - return () if !$defn || $defn eq "missing"; - local $LaTeXML::CURRENT_TOKEN = $defn; - $defn->setValue($defn->valueOf(@args)->add($stomach->getGullet->readValue($defn->isRegister)), - undef, @args); }); - -DefPrimitive('\multiply Variable SkipKeyword:by Number', sub { - my ($stomach, $var, $scale) = @_; - return () unless $var; - my ($defn, @args) = @$var; - $defn->setValue($defn->valueOf(@args)->multiply($scale->valueOf), undef, @args); }); - -DefPrimitive('\divide Variable SkipKeyword:by Number', sub { - my ($stomach, $var, $scale) = @_; - return () unless $var; - my ($defn, @args) = @$var; - my $denom = $scale->valueOf; - if ($denom == 0) { - Error('misdefined', $scale, $stomach, "Illegal \\divide by 0; assuming 1"); - $denom = 1; } - $defn->setValue($defn->valueOf(@args)->divide($denom), undef, @args); }); - -# = \futurelet -# | \let -DefPrimitive('\let SkipSpaces Token SkipSpaces SkipMatch:= Skip1Space Token', sub { - my ($stomach, $token1, $token2) = @_; - Let($token1, $token2); - return; }); - -DefPrimitive('\futurelet Token Token Token', sub { - my ($stomach, $cs, $token1, $token2) = @_; - $stomach->getGullet->unread($token1, $token2); # NOT expandable, but puts tokens back - Let($cs, $token2); - return; }); - -# = \chardef<8bit> -# | \mathchardef <15bit> -# | <8bit> -# = \countdef | \dimendef | \skipdef | \muskipdef | toksdef - -# Note that before \countdef, \dimendef, \skipdef, \muskipdef, \chardef, \mathchardef, \toksdef -# read the , they let the token to \relax in case the def is immediately followed -# by a use or assignment; See TeX Program \s 1224 -# See below for \chardef & \mathchardef -# Note that these define a "shorthand" for eg. \count123, but are NOT macros! -sub shorthandDef { - my ($stomach, $cs, $type, $init) = @_; - $STATE->assignMeaning($cs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssign - my $num = $stomach->getGullet->readNumber(); - my $address = $type . $num->valueOf; - DefRegisterI($cs, undef, $init, address => $address); - AfterAssignment(); - return; } - -DefPrimitive('\countdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { - shorthandDef($_[0], $_[1], '\count', Number(0)); }); - -DefPrimitive('\dimendef SkipSpaces Token SkipSpaces SkipMatch:=', sub { - shorthandDef($_[0], $_[1], '\dimen', Dimension(0)); }); - -DefPrimitive('\skipdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { - shorthandDef($_[0], $_[1], '\skip', Glue(0)); }); - -DefPrimitive('\muskipdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { - shorthandDef($_[0], $_[1], '\muskip', MuGlue(0)); }); - -DefPrimitive('\toksdef SkipSpaces Token SkipSpaces SkipMatch:=', sub { - shorthandDef($_[0], $_[1], '\toks', Tokens()); }); - -# NOTE: Get all these handled as registers -# = | | \lastpenalty -# | | \count<8bit> | <8bit> -# | | | \parshape | \inputlineno -# | \hyphenchar | \skewchar | \badness - -DefRegister('\lastpenalty', Number(0), readonly => 1); - -# \parshape !?!?? -DefPrimitive('\parshape SkipSpaces SkipMatch:= Number', sub { - my ($stomach, $n) = @_; - $n = $n->valueOf; - my $gullet = $stomach->getGullet; - for (my $i = 0 ; $i < $n ; $i++) { - $gullet->readDimension; $gullet->readDimension; } - # we _could_ conceivably store this somewhere for some attempt at stylistic purpose... - return; }); - -DefRegister('\inputlineno', Number(0), - getter => sub { - my $locator = $STATE->getStomach->getGullet->getLocator(); - Number($locator ? $$locator{fromLine} : 0); }, - readonly => 1); - -DefRegister('\badness', Number(0), readonly => 1); - -# = \catcode | \mathcode | \lccode | \uccode | \sfcode | \delcode - -DefRegister('\catcode Number', Number(0), - getter => sub { my $code = LookupCatcode(chr($_[0]->valueOf)); - Number(defined $code ? $code : CC_OTHER); }, - setter => sub { AssignCatcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); -# # Only used for active math characters, so far -DefRegister('\mathcode Number', Number(0), - getter => sub { - my $ch = $_[0]->valueOf; - my $code = $STATE->lookupMathcode(chr($ch)); - Number(defined $code ? $code : $ch); }, # defaults to the char's code itself(?) - setter => sub { $STATE->assignMathcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); -# Not used anywhere (yet) -DefRegister('\sfcode Number', Number(0), - getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf)); - Number(defined $code ? $code : 0); }, - setter => sub { $STATE->assignSFcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); -DefRegister('\lccode Number', Number(0), - getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf)); - Number(defined $code ? $code : 0); }, - setter => sub { $STATE->assignLCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); -DefRegister('\uccode Number', Number(0), - getter => sub { my $code = $STATE->lookupUCcode(chr($_[0]->valueOf)); - Number(defined $code ? $code : 0); }, - setter => sub { $STATE->assignUCcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); -# Not used anywhere (yet) -DefRegister('\delcode Number', Number(0), - getter => sub { my $code = $STATE->lookupDelcode(chr($_[0]->valueOf)); - Number(defined $code ? $code : 0); }, - setter => sub { $STATE->assignDelcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); }); - -# Remember, we're assigning a NUMBER (codepoint) to a CHARACTER! -foreach my $letter (ord('A') .. ord('Z')) { - $STATE->assignLCcode(chr($letter), $letter + 0x20, 'global'); - $STATE->assignUCcode(chr($letter), $letter, 'global'); - $STATE->assignLCcode(chr($letter + 0x20), $letter + 0x20, 'global'); - $STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); } - -# Stub definitions ??? -DefMacro('\hyphenation GeneralText', Tokens()); -DefMacro('\patterns{}', Tokens()); - -# = | \font | -# = <4bit> -# = \textfont | \scriptfont | \scriptscriptfont - -# Doubtful that we can do anything useful with these. -# These look essentially like Registers, although Knuth doesn't call them that. -# NOTE: These should just point to a CS token, right???? -# (although it SHOULD be one defined to be a font switch??) -# NOTE: These should NOT be global(?) -DefRegister('\textfont Number' => T_CS('\tenrm'), - getter => sub { - my ($fam) = @_; - LookupValue('textfont_' . $fam->valueOf); }, - setter => sub { - my ($font, $scope, $fam) = @_; - AssignValue('textfont_' . $fam->valueOf => $font, $scope); }); -DefRegister('\scriptfont Number' => T_CS('\sevenrm'), - getter => sub { - my ($fam) = @_; - LookupValue('scriptfont_' . $fam->valueOf); }, - setter => sub { - my ($font, $scope, $fam) = @_; - AssignValue('scriptfont_' . $fam->valueOf => $font, $scope); }); -DefRegister('\scriptscriptfont Number' => T_CS('\fiverm'), - getter => sub { - my ($fam) = @_; - LookupValue('scriptscriptfont_' . $fam->valueOf); }, - setter => sub { - my ($font, $scope, $fam) = @_; - AssignValue('scriptscriptfont_' . $fam->valueOf => $font, $scope); }); - -# = | | \lastkern -# | | \dimen<8bit> | <8bit> | \fontdimen -DefRegister('\lastkern' => Dimension(0), readonly => 1); - -# = \ht | \wd | \dp -DefRegister('\ht Number', Dimension(0), - getter => sub { - my ($n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - return ($stuff ? $stuff->getHeight : Dimension(0)); }, - setter => sub { - my ($value, $scope, $n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - $stuff->setHeight($value) if $stuff; - return; }); -DefRegister('\wd Number', Dimension(0), - getter => sub { - my ($n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - return ($stuff ? $stuff->getWidth : Dimension(0)); }, - setter => sub { - my ($value, $scope, $n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - $stuff->setWidth($value) if $stuff; - return; }); - -DefRegister('\dp Number', Dimension(0), - getter => sub { - my ($n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - return ($stuff ? $stuff->getDepth : Dimension(0)); }, - setter => sub { - my ($value, $scope, $n) = @_; - my $stuff = $n && LookupValue('box' . $n->valueOf); - $stuff->setDepth($value) if $stuff; - return; }); - -# 2nd arg is = | \font | -# = <4bit number> -# = \textfont | \scriptfont | \scriptscriptfont -DefParameterType('FontToken', sub { - my ($gullet) = @_; - my $token = $gullet->readToken; - if ($token->toString =~ /^\\(text|script|scriptscript)font$/) { - my $type = $1; - if (my $fam = $gullet->readNumber) { - $token = LookupValue($type . 'font_' . $fam->valueOf); } } - elsif ($token->toString eq '\\font') { - $token = LookupValue('textfont_0'); } # ??? I assume shuld get current font? - $token; }); #? - -# Access to the font parameters; Curiously, can be used as scratch arrays (eg LaTeX3) -DefRegister('\fontdimen Number FontToken' => Dimension(0), - getter => sub { - my ($p, $font) = @_; - my $info = lookupFontinfo($font); - $p = ToString($p); - my $data = $info && $$info{data}; - return ($data && $$data[$p - 1]) || Dimension(0); }, - setter => sub { - my ($value, $scope, $p, $font) = @_; - my $info = lookupFontinfo($font); - $p = ToString($p); - if (my $data = $info && $$info{data}) { - $$data[$p - 1] = $value; } } -); - -DefRegister('\hyphenchar FontToken' => Number(ord('-')), - getter => sub { - my ($font) = @_; - my $info = lookupFontinfo($font); - return ($info && $$info{hyphenchar}) || Number(ord('-')); }, - setter => sub { - my ($value, $scope, $font) = @_; - if (my $info = lookupFontinfo($font)) { - $$info{hyphenchar} = $value; } } -); -DefRegister('\skewchar FontToken' => Number(0), - getter => sub { - my ($font) = @_; - my $info = lookupFontinfo($font); - return ($info && $$info{skewchar}) || Number(0); }, - setter => sub { - my ($value, $scope, $font) = @_; - if (my $info = lookupFontinfo($font)) { - $$info{skewchar} = $value; } } -); - -# Could be handled by setting dimensions whenever the box itself is set? - -# = | \lastskip | | \skip<8bit> - -DefRegister('\lastskip' => Glue(0), readonly => 1); - -# = | \lastskip | | \muskip<8bit> - -# = -# = \parshape -# is 2n - -# = | -# | | -# | -# = \fontdimen -# | \hyphenchar | \skewchar -# = \hyphenation -# | \patterns -# = <8bit> -# = \errorstopmode | \scrollmode | \nonstopmode | \batchmode -# These are no-ops; Basically, LaTeXML runs in scrollmode -DefPrimitiveI('\errorstopmode', undef, undef); -DefPrimitiveI('\scrollmode', undef, undef); -DefPrimitiveI('\nonstopmode', undef, undef); -DefPrimitiveI('\batchmode', undef, undef); - -# = -# | - -DefMacro('\fontencoding{}', '\@@@fontencoding{#1}'); - -DefPrimitive('\@@@fontencoding{}', sub { - my ($stomach, $encoding) = @_; - $encoding = ToString(Expand($encoding)); - if (LoadFontMap($encoding)) { - MergeFont(encoding => $encoding); } - else { - MergeFont(encoding => 'OT1'); } # Default to OT1 encoding if no map found - return; }); - -DefMacroI('\f@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); -DefMacroI('\cf@encoding', undef, sub { ExplodeText(LookupValue('font')->getEncoding); }); - -# Used for SemiVerbatim text -DeclareFontMap('ASCII', - [undef, undef, undef, undef, undef, undef, undef, undef, - undef, undef, undef, undef, undef, undef, undef, undef, - undef, undef, undef, undef, undef, undef, undef, undef, - undef, undef, undef, undef, undef, undef, undef, undef, - " ", '!', "\"", '#', '$', '%', '&', "'", - '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', ':', ';', '<', '=', '>', '?', - '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', '[', "\\", ']', "^", "_", - "`", 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - 'x', 'y', 'z', "{", "|", "}", "~", undef]); - -# Note that several entries are used for accents, and in practice will actually -# be used in something like an m:mover; thus they needn't (shouldn't?) be "small" -# There are also some questions about which choices are best -# grave & acute accents (entry 0x12 & 0x13) (often typed using 0x60 & 0x27) -# are probably best using U+60(grave accent) & U+B4(acute accent) -# but could be U+2035 (reversed prime) & U+2032 (prime). (particularly for math?) -# [we do use these for \prime, however!] -# or U+02CB (modifier letter grave accent) & U+02CA (modifier letter acute accent) -# Similarly, hat & tilde (entries 0x5E & 0x7E) -# typed using ^ 0x5E circumflex accent) & ~ 0x7E tilde -# are probably best just sticking with U+5E & U+7E -# but could be U+02C6 (modifier letter circumflex accent) U+02DC (small tilde) -# [Note that generally we're using codepoints characterized as "modifier letter" -# only when no other spacing point is available.] -DeclareFontMap('OT1', - ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", - "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{FB00}", "\x{FB01}", "\x{FB02}", "\x{FB03}", "\x{FB04}", - "\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}", - UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8), - UTF(0xA0) . "\x{0335}", '!', "\x{201D}", '#', '$', '%', '&', "\x{2019}", - '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', ':', ';', UTF(0xA1), '=', UTF(0xBF), '?', - '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', '[', "\x{201C}", ']', "^", "\x{02D9}", - "\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - 'x', 'y', 'z', "\x{2013}", "\x{2014}", "\x{02DD}", UTF(0x7E), UTF(0xA8)]); - -DeclareFontMap('OT1', - ["\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", - "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{2191}", "\x{2193}", "'", UTF(0xA1), UTF(0xBF), - "\x{0131}", "\x{0237}", UTF(0x60), UTF(0xB4), "\x{02C7}", "\x{02D8}", UTF(0xAF), "\x{02DA}", - UTF(0xB8), UTF(0xDF), UTF(0xE6), "\x{0153}", UTF(0xF8), UTF(0xC6), "\x{152}", UTF(0xD8), - "\x{2423}", '!', "\"", '#', '$', '%', '&', "\x{2019}", - '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', ':', ';', "<", '=', ">", '?', - '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', '[', "\\", ']', "^", "_", - "\x{2018}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - 'x', 'y', 'z', "{", "|", "}", "~", UTF(0xA8)], - family => 'typewriter'); - -DeclareFontMap('OML', - [ # \Gamma \Delta \Theta \Lambda \Xi \Pi \Sigma \Upsilon - "\x{0393}", "\x{0394}", "\x{0398}", "\x{039B}", "\x{039E}", "\x{03A0}", "\x{03A3}", "\x{03A5}", - # \Phi \Psi \Omega alpha beta gamma delta epsilon - "\x{03A6}", "\x{03A8}", "\x{03A9}", "\x{03B1}", "\x{03B2}", "\x{03B3}", "\x{03B4}", "\x{03F5}", - # zeta eta theta iota kappa lambda mu nu - "\x{03B6}", "\x{03B7}", "\x{03B8}", "\x{03B9}", "\x{03BA}", "\x{03BB}", "\x{03BC}", "\x{03BD}", - # xi pi rho sigma tau upsilon phi chi - "\x{03BE}", "\x{03C0}", "\x{03C1}", "\x{03C3}", "\x{03C4}", "\x{03C5}", "\x{03D5}", "\x{03C7}", - # psi omega varepsilon vartheta varpi varrho varsigma varphi - "\x{03C8}", "\x{03C9}", "\x{03B5}", "\x{03D1}", "\x{03D6}", "\x{03F1}", "\x{03C2}", "\x{03C6}", - # l.harp.up l.harp.dn r.harp.up r.harp.dn lhook rhook rt.tri lf.tri - "\x{21BC}", "\x{21BD}", "\x{21C0}", "\x{21C1}", "\x{2E26}", "\x{2E27}", "\x{25B7}", "\x{25C1}", - # old style numerals! (no separate codepoints ?) - # 0 1 2 3 4 5 6 7 - '0', '1', '2', '3', '4', '5', '6', '7', - # 8 9 . , < / > star - '8', '9', '.', ',', UTF(0x3C), UTF(0x2F), UTF(0x3E), "\x{22C6}", - # partial A B C D E F G - "\x{2202}", 'A', 'B', 'C', 'D', 'E', 'F', 'G', - # H I J K L M N O - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - # P Q R S T U V W - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - # X Y Z flat natural sharp smile frown - 'X', 'Y', 'Z', "\x{266D}", "\x{266E}", "\x{266F}", "\x{2323}", "\x{2322}", - # ell a b c d e f g - "\x{2113}", 'a', 'b', 'c', 'd', 'e', 'f', 'g', - # h i j k l m n o - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - # p q r s t u v w - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - # x y z dotless i dotless j weier-p arrow acc. inv.breve - 'x', 'y', 'z', "\x{0131}", "j", "\x{2118}", "\x{2192}", UTF(0xA0) . "\x{0311}"]); -DeclareFontMap('OMS', - [ #minus dot times ast divide diamond plus-minus minus-plus - "-", "\x{22C5}", UTF(0xD7), "\x{2217}", UTF(0xF7), "\x{22C4}", UTF(0xB1), "\x{2213}", - # oplus ominus otimes oslash odot bigcirc circ bullet - "\x{2295}", "\x{2296}", "\x{2297}", "\x{2298}", "\x{2299}", "\x{25CB}", "\x{2218}", "\x{2219}", - # asymp equiv subseteq supseteq leq geq preceq succeq - "\x{224D}", "\x{2261}", "\x{2286}", "\x{2287}", "\x{2264}", "\x{2265}", "\x{2AAF}", "\x{2AB0}", - # sim approx subset supset ll gg prec succ - "\x{223C}", "\x{2248}", "\x{2282}", "\x{2283}", "\x{226A}", "\x{226B}", "\x{227A}", "\x{227B}", - # leftarrow rightarrow uparrow downarrow leftrightar nearrow searrow simeq - "\x{2190}", "\x{2192}", "\x{2191}", "\x{2193}", "\x{2194}", "\x{2197}", "\x{2198}", "\x{2243}", - # Leftarrow Rightarrow Uparrow Downarrow Leftrightar nwarrow swarrow propto - "\x{21D0}", "\x{21D2}", "\x{21D1}", "\x{21D3}", "\x{21D4}", "\x{2196}", "\x{2199}", "\x{221D}", - # prime infty in ni bigtri.up bigtri.dn slash mapsto - "\x{2032}", "\x{221E}", "\x{2208}", "\x{220B}", "\x{25B3}", "\x{25BD}", "/", "\x{21A6}", - # forall exists not emptyset Re Im top bot - "\x{2200}", "\x{2203}", UTF(0xAC), "\x{2205}", "\x{211C}", "\x{2111}", "\x{22A4}", "\x{22A5}", - # aleph cal A cal B cal C cal D cal E cal F cal G - "\x{2135}", "\x{1D49C}", "\x{212C}", "\x{1D49E}", "\x{1D49F}", "\x{2130}", "\x{2131}", "\x{1D4A2}", - # cal H cal I cal J cal K cal L cal M cal N cal O - "\x{210B}", "\x{2110}", "\x{1D4A5}", "\x{1D4A6}", "\x{2112}", "\x{2133}", "\x{1D4A9}", "\x{1D4AA}", - # cal P cal Q cal R cal S cal T cal U cal V cal W -"\x{1D4AB}", "\x{1D4AC}", "\x{211B}", "\x{1D4AE}", "\x{1D4AF}", "\x{1D4B0}", "\x{1D4B1}", "\x{1D4B2}", - # cal X cal Y cal Z cup cap uplus wedge vee - "\x{1D4B3}", "\x{1D4B4}", "\x{1D4B5}", "\x{222A}", "\x{2229}", "\x{228C}", "\x{2227}", "\x{2228}", - # vdash dashv lfloor rfloor lceil rceil lbrace rbrace - "\x{22A2}", "\x{22A3}", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}", - # langle rangle | \| updownarrow Updownarrow backslash wr - "\x{27E8}", "\x{27E9}", "|", "\x{2225}", "\x{2195}", "\x{21D5}", UTF(0x5C), "\x{2240}", - # surd amalg nabla int sqcup sqcap sqsubseteq sqsupseteq - "\x{221A}", "\x{2210}", "\x{2207}", "\x{222B}", "\x{2294}", "\x{2293}", "\x{2291}", "\x{2292}", - # section dagger ddagger para clubsuit diam.suit heartsuit spadesuit - UTF(0xA7), "\x{2020}", "\x{2021}", UTF(0xB6), "\x{2663}", "\x{2662}", "\x{2661}", "\x{2660}"]); - -DeclareFontMap('OMX', - [ # ( ) [ ] lfloor rfloor lceil rceil - "(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", - #lbrace rbrace langle rangle | || / \ - "{", "}", "\x{27E8}", "\x{27E9}", "|", "\x{2225}", "/", UTF(0x5C), - "(", ")", "(", ")", "[", "]", "\x{230A}", "\x{230B}", - "\x{2308}", "\x{2309}", "{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C), - "(", ")", "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", - "{", "}", "\x{27E8}", "\x{27E9}", "/", UTF(0x5C), "/", UTF(0x5C), - # next two rows are just fragments - # l.up.paren r.up.paren l.up.brak r.up.brak l.bot.brak r.bot.brak l.brak.ext r.brak.ext - "\x{239B}", "\x{239E}", "\x{23A1}", "\x{23A4}", "\x{23A3}", "\x{23A6}", "\x{23A2}", "\x{23A5}", - # l.up.brace r.up.brace l.bot.brace r.bot.brace l.brace.mid r.brace.mid brace.ext v.arrow.ext - "\x{23A7}", "\x{23AB}", "\x{23A9}", "\x{23AD}", "\x{23A8}", "\x{23AC}", "\x{23AA}", "\x{23D0}", - # l.bot.paren r.bot.paren l.paren.ext r.paren.ext - "\x{239D}", "\x{23A0}", "\x{239C}", "\x{239F}", "\x{27E8}", "\x{27E9}", "\x{2294}", "\x{2294}", - "\x{222E}", "\x{222E}", "\x{2299}", "\x{2299}", "\x{2295}", "\x{2295}", "\x{2297}", "\x{2297}", - "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", - "\x{2211}", "\x{220F}", "\x{222B}", "\x{22C3}", "\x{22C2}", "\x{228C}", "\x{2227}", "\x{2228}", - "\x{2210}", "\x{2210}", UTF(0x5E), UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), UTF(0x7E), - "[", "]", "\x{230A}", "\x{230B}", "\x{2308}", "\x{2309}", "{", "}", -# [missing rad frags] double arrow ext. - "\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", "\x{23B7}", undef, undef, undef, - # [missing tips for horizontal curly braces] - "\x{2191}", "\x{2193}", undef, undef, undef, undef, "\x{21D1}", "\x{21D3}"]); - -DefPrimitive('\char Number', sub { - Box(FontDecode($_[1]->valueOf), undef, undef, - Tokens(T_CS('\char'), $_[1]->revert, T_CS('\relax'))); }); - -# Almost like a register (and \countdef), but different... -# (including the preassignment to \relax!) -DefPrimitive('\chardef Token SkipSpaces SkipMatch:=', sub { - my ($stomach, $newcs) = @_; - $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment - my $value = $stomach->getGullet->readNumber(); - $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value)); - AfterAssignment(); - return; }); - -our @mathclassrole = (undef, 'BIGOP', 'BINOP', 'RELOP', 'OPEN', 'CLOSE', 'PUNCT', undef); -# Is this "fontinfo" stuff sufficient to maintain a math font "family" ?? -# What we're really after is a connectio nto a font encoding mapping. -sub decodeMathChar { - my ($n) = @_; - my $class = int($n / (16 * 256)); $n = $n % (16 * 256); - my $fam = int($n / 256); $n = $n % 256; - my $font = LookupValue('textfont_' . $fam) - || LookupValue('scriptfont_' . $fam) - || LookupValue('scriptscriptfont_' . $fam); - my $char = chr($n); - # If no specific class, Lookup properties from a DefMath? - my $charinfo = LookupValue('math_token_attributes_' . $char); - my $fontinfo = lookupFontinfo($font); - my $role = $mathclassrole[$class]; - $role = $$charinfo{role} if (!defined $role) && $charinfo; - return ($role, - ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : $char)); } - -DefPrimitive('\mathchar Number', sub { - my ($stomach, $code) = @_; - my ($role, $glyph) = decodeMathChar($code->valueOf); - Box($glyph, undef, undef, - Tokens(T_CS('\mathchar'), $_[1]->revert, T_CS('\relax')), - role => $role); }); - -DefConstructor('\delimiter Number', - "?#glyph(?#isMath(#glyph)(#glyph))", - sizer => '#glyph', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $n = $whatsit->getArg(1)->valueOf; - $n = $n >> 12; # Ignore 3 rightmost digits and treat as \mathchar - my ($role, $glyph) = decodeMathChar($n); - $whatsit->setProperty(glyph => $glyph) if $glyph; - $whatsit->setProperty(role => $role) if defined $role; - $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; - return; }); - -# Almost like a register, but different... -DefPrimitive('\mathchardef Token SkipSpaces SkipMatch:=', sub { - my ($stomach, $newcs) = @_; - $STATE->assignMeaning($newcs, $STATE->lookupMeaning(T_CS('\relax'))); # Let w/o AfterAssignment - my $value = $stomach->getGullet->readNumber(); - my ($role, $glyph) = decodeMathChar($value->valueOf); - $STATE->installDefinition(LaTeXML::Core::Definition::CharDef->new($newcs, $value, - $glyph, role => $role)); - AfterAssignment(); - return; }); - -DefConstructor('\mathaccent Number Digested', - "#glyph#2", - sizer => '#2', # Close enough? - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $n = $whatsit->getArg(1)->valueOf; - my ($role, $glyph) = decodeMathChar($n); - $whatsit->setProperty(glyph => $glyph) if $glyph; - $whatsit->setProperty(font => LookupValue('font')->specialize($glyph)) if $glyph; - return; }); - -# = \box <8bit> | \copy <8bit> | \lastbox | \vsplit <8bit> to -# | \hbox {} -# | \vbox {} -# | \vtop {} -# = to | spread | - -# \setbox=\hbox to {} - -DefPrimitive('\lastbox', sub { # Hopefully, the correct box got seen! - return pop(@LaTeXML::LIST); }); - -DefPrimitive('\setbox Number SkipSpaces SkipMatch:=', sub { - my ($stomach) = @_; - no warnings 'recursion'; - my $box = 'box' . $_[1]->valueOf; - # If there is any afterAssignment tokens, move them over so BoxContents parameter will use them - if (my $token = LookupValue('afterAssignment')) { - AssignValue('afterAssignment' => undef, 'global'); - AssignValue('BeforeNextBox' => $token); } - # Save global flag, since we're digesting to get the box content, which resets the flag! - # Should afterDigest be responsible for resetting flags? - my $scope = $STATE->getPrefix('global') && 'global'; - $STATE->clearPrefixes; # before invoke, below; we've saved the only relevant one (global) - my ($stuff, @rest) = $stomach->invokeToken($stomach->getGullet->readXToken); - AssignValue('box' . $_[1]->valueOf => $stuff, $scope); - @rest; }); - -sub adjustBoxColor { - my ($box) = @_; - my $font = LookupValue('font'); - if (my $color = $font && $font->getColor) { - if (!Black->equals($color)) { - adjustBoxColor_rec($color, {}, $box); } } - return; } - -sub adjustBoxColor_rec { - no warnings 'recursion'; - my ($color, $adjusted, @boxes) = @_; - foreach my $box (@boxes) { - next unless defined $box; - next if $$adjusted{$box}; # since we do args AND props, be careful not to adjust twice! - $$adjusted{$box} = 1; - my $r = ref $box; - next unless $r && ($r !~ /(?:SCALAR|HASH|ARRAY|CODE|REF|GLOB|LVALUE)/) && $r->isaBox; - # NASTY access to internal structure; but worth a whole API for this one hack??? - if ($r eq 'LaTeXML::Core::Box') { - adjustBoxColor_internal($color, $box); } - elsif ($r eq 'LaTeXML::Core::List') { - adjustBoxColor_rec($color, $adjusted, $box->unlist); } - elsif ($r eq 'LaTeXML::Core::Whatsit') { - adjustBoxColor_internal($color, $box); - # now recurse on contained boxes (args AND properties!) - adjustBoxColor_rec($color, $adjusted, $box->getArgs); - adjustBoxColor_rec($color, $adjusted, values %{ $box->getPropertiesRef }); } - elsif ($r eq 'LaTeXML::Core::Alignment') { - foreach my $row (@{ $$box{rows} }) { - foreach my $col (@{ $$row{columns} }) { - adjustBoxColor_rec($color, $adjusted, $$col{boxes}->unlist) if $$col{boxes}; } } } - } - return; } - -sub adjustBoxColor_internal { - my ($color, $box) = @_; - if (my $font = $box->getFont) { - $box->setFont($font->merge(color => $color)); } - return; } - -DefPrimitive('\box Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - AssignValue($box, undef); - ($stuff ? $stuff : List()); }); - -DefPrimitive('\copy Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - ($stuff ? $stuff : List()); }); - -DefPrimitive('\vsplit Number Match:to Dimension', sub { - # analog to \box for now. - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - ($stuff ? $stuff : List()); }); - -sub revert_spec { - my ($whatsit, $keyword) = @_; - my $value = $whatsit->getProperty($keyword); - return ($value ? (Explode($keyword), Revert($value)) : ()); } - -DefParameterType('BoxSpecification', sub { - my ($gullet) = @_; - if (my $key = $gullet->readKeyword('to', 'spread')) { - my $keyvals = LaTeXML::Core::KeyVals->new(undef, undef, skipMissing => 1); - $keyvals->setValue($key, $gullet->readDimension); - $keyvals; } }, - reversion => sub { - my ($spec) = @_; - if (my $to = $spec && $spec->getValue('to')) { - return Tokens(Tokenize('to'), Revert($to)); } - elsif (my $spread = $spec && $spec->getValue('spread')) { - return Tokens(Tokenize('spread'), Revert($spread)); } - else { - return; } }, - optional => 1, undigested => 1); - -# Risky: I think this needs to be digested as a body to work like TeX (?) -# but parameter think's it's just parsing from gullet... -sub readBoxContents { - no warnings 'recursion'; - my ($gullet, $everybox) = @_; - my $t; - while (($t = $gullet->readToken) && !$t->defined_as(T_BEGIN)) { } # Skip till { or \bgroup - # Now, insert some extra tokens, if any, possibly from \afterassignment - if (my $token = LookupValue('BeforeNextBox')) { - AssignValue(BeforeNextBox => undef, 'global'); - $gullet->unread($token); } - # AND, insert any extra tokens passed in, due to everyhbox or everyvbox - $gullet->unread($everybox->unlist) if $everybox; - my ($contents, @stuff) = $STATE->getStomach->invokeToken(T_BEGIN); - return $contents; } - -DefParameterType('HBoxContents', sub { - readBoxContents($_[0], LookupValue('\everyhbox')); }, - undigested => 1); # Cause it already is digested! -DefParameterType('VBoxContents', sub { - readBoxContents($_[0], LookupValue('\everyvbox')); }, - undigested => 1); # Cause it already is digested! - -# This re-binds a number of important control sequences to their default text binding. -# This is useful within common boxing or footnote macros that can appear within -# alignments or special environments that have redefined many of these. -# You'll typically want this within a group or bounded=>1. -AssignValue(TEXT_MODE_BINDINGS => []); -AssignValue(HTEXT_MODE_BINDINGS => []); -AssignValue(VTEXT_MODE_BINDINGS => []); -PushValue(HTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@textmode')]); -PushValue(VTEXT_MODE_BINDINGS => [T_MATH, T_CS('\@dollar@in@normalmode')]); -###PushValue(TEXT_MODE_BINDINGS => [T_CS('\centerline'), T_CS('\relax')]); - -sub reenterTextMode { - my ($verticalmode) = @_; - map { Let($$_[0], $$_[1]) } - @{ LookupValue(($verticalmode ? 'VTEXT_MODE_BINDINGS' : 'HTEXT_MODE_BINDINGS')) }, - @{ LookupValue('TEXT_MODE_BINDINGS') }; - return } - -# Similarly, for metadata appearing within peculiar environments, fonts, etc -# You'll typically want this within a group or bounded=>1. -sub neutralizeFont { - AssignValue(font => LaTeXML::Common::Font->textDefault(), 'local'); - AssignValue(mathfont => LaTeXML::Common::Font->mathDefault(), 'local'); - return; } - -sub REF { - my ($thing, $key) = @_; - return $thing && $$thing{$key}; } - -sub inSVG { - my $document = $LaTeXML::DOCUMENT; - my $context = $document->getElement; - return $context && $document->getNodeQName($context) =~ /^svg:/; } - -# Collapse redundant svg:g nodes that have only certain -# non-cummulative attributes -Tag('svg:g', afterClose => \&collapseSVGGroup); -my %collapsible_group_attributes = map { ($_ => 1); } - qw(fill fill-rule fill-opacity - stroke stroke-width stroke-linecap stroke-linejoin stroke-miterlimit - stroke-dasharray stroke-dashoffset stroke-opacity - color); - -# Collapse/remove/unwrap unneeded svg:g's to reduce depth of tree -sub collapseSVGGroup { - my ($document, $node) = @_; - my ($nempty, $nredundant, $nmerged, $npopped, $npushed) = (0, 0, 0, 0, 0); - # Record the attributes on $node, for later use. - my %nodeattr = (); - foreach my $attr ($node->attributes) { - my $key = $attr->getName; - $nodeattr{$key} = $attr->getValue if ($key !~ /^_/); } - return if defined $nodeattr{'clip-path'}; # Needs separate svg:g node (?) - my @children = element_nodes($node); - # Remove empty svg:g children - foreach my $c (@children) { - if (($document->getNodeQName($c) eq 'svg:g') && !scalar(element_nodes($c))) { - $nempty++; - $document->removeNode($c); } } - @children = element_nodes($node) if $nempty; - # Move ahead, all leading children whose svg:g attributes completely mask $node's attributes. - # Could do same moving trailing children to back - my $c; - while (scalar(@children) && ($document->getNodeQName($c = $children[0]) eq 'svg:g')) { - my $nmasked = 0; - foreach my $attr ($c->attributes) { - my $key = $attr->getName; - if (($key !~ /^_/) && $collapsible_group_attributes{$key} && defined $nodeattr{$key}) { - $nmasked++; } } - last unless $nmasked == scalar(keys %nodeattr); # child completely masks attr of node - $node->parentNode->insertBefore(shift(@children), $node); # move it outside! - $npopped++; } - # Same story for trailing children, but move behind - while (scalar(@children) && ($document->getNodeQName($c = $children[-1]) eq 'svg:g')) { - my $nmasked = 0; - foreach my $attr ($c->attributes) { - my $key = $attr->getName; - if (($key !~ /^_/) && $collapsible_group_attributes{$key} && defined $nodeattr{$key}) { - $nmasked++; } } - last unless $nmasked == scalar(keys %nodeattr); # child completely masks attr of node - $node->parentNode->insertAfter(pop(@children), $node); - $npushed++; } - # Now remove any redundant svg:g's (same attributes & values) [some left after above] - foreach my $c (@children) { - if ($c && ($document->getNodeQName($c) eq 'svg:g')) { # for every nested svg:g - my $issame = 1; - foreach my $attr ($c->attributes) { - my $key = $attr->getName; -### if (($key !~ /^_/) && ($attr->getValue ne ($nodeattr{$key} || ''))) { - if (($key !~ /^_/) - && (($attr->getValue ne ($nodeattr{$key} || '')) || ($key eq 'transform'))) { - $issame = 0; } } - if ($issame) { # child is completely redundant. - $document->unwrapNodes($c); - $nredundant++; } } } - @children = element_nodes($node) if $nredundant; - # Could check if $node is empty now? - # Then if only one left, and it's attributes can be migrated to $node, unwrap it - if ((scalar(@children) == 1) && ($document->getNodeQName($c = $children[0]) eq 'svg:g')) { - my %av = (); - my $mergeable = 1; - foreach my $attr ($c->attributes) { - my $key = $attr->getName; - if (($key =~ /^_/) || $collapsible_group_attributes{$key}) { - $av{$key} = $attr->getValue; } - elsif ($key eq 'transform') { - $av{$key} = ($nodeattr{$key} ? $nodeattr{$key} . ' ' : '') . $attr->getValue; } - else { - $mergeable = 0; } } - if ($mergeable) { - foreach my $key (sort keys %av) { - $nodeattr{$key} = $av{$key}; - $node->setAttribute($key => $av{$key}); } - $nmerged++; - $document->unwrapNodes($c); } } - return; } - -DefConstructor('\hbox BoxSpecification HBoxContents', sub { - # "#2", - no warnings 'recursion'; - my ($document, $spec, $contents, %props) = @_; - my $model = $document->getModel; - my $context = $document->getElement; - my $current = $context; - - # What is the CORRECT (& general) way to ask whether we're in "vertical mode"?? - # my $vmode = $tag eq 'ltx:inline-block'; # ie, explicitly \vbox !?!?!?! - my $issvg = $current && $document->getNodeQName($current) =~ /^svg:/; - my $vmode = $current && $current->getAttribute('_vertical_mode_'); - my $inline = $document->canContain($current, '#PCDATA'); - my $newtag = ($issvg ? 'svg:g' : ($vmode ? ($inline ? 'ltx:inline-block' : 'ltx:p') : 'ltx:text')); - my $node = $document->openElement($newtag, _noautoclose => 1, width => $props{width}); - $document->absorb($contents); - if (!$issvg) { - while (!$document->getElement()->hasAttribute('_beginscope') && $document->maybeCloseElement('svg:g')) { } - $document->maybeCloseElement('svg:svg'); } - if ($issvg) { # ODDLY, svg:g isnt necessarily balanced in tikz? - $document->maybeCloseElement('svg:g'); } - else { - $document->maybeCloseNode($node); } - }, - mode => 'text', bounded => 1, - sizer => '#2', - # Workaround for $ in alignment; an explicit \hbox gives us a normal $. - # And also things like \centerline that will end up bumping up to block level! - beforeDigest => sub { reenterTextMode(); }, - - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $spec = $whatsit->getArg(1); - my $box = $whatsit->getArg(2); - if (my $w = GetKeyVal($spec, 'to')) { - $whatsit->setWidth($w); } - elsif (my $s = GetKeyVal($spec, 'spread')) { - $whatsit->setWidth($box->getWidth->add($s)); } - $whatsit->setProperty(content_box => $box); - return; }); - -# Cleanup foreignObjects: remove empty (or only

); and determine size -Tag('svg:foreignObject', autoOpen => 1, autoClose => 1, - afterClose => sub { - my ($document, $node, $whatsit) = @_; - ## NOTE: The revised schema now allows plain text within foreignObject; watch out for mixed - my @fo = $node->childNodes; # What's in the foreignObject? - if (scalar(@fo) == 0) { # Empty? - $document->removeNode($node); # just remove whole thing - return; } - elsif (!grep { $_->nodeType != XML_TEXT_NODE } @fo) { # All text nodes? - $node = $document->renameNode($node, 'svg:text'); - $node->setAttribute(transform => "matrix(1 0 0 -1 0 0)"); - return; } - elsif ((scalar(@fo) == 1) && ($document->getNodeQName($fo[0]) eq 'ltx:p')) { # Single

? - my @p_c = element_nodes($fo[0]); - if (scalar(@p_c) == 0) { # or Empty

? - $document->removeNode($node); - return; } - # Else, single ltx:picture or ltx:text ? - elsif (scalar(@p_c) == 1) { - my $tag = $document->getNodeQName($p_c[0]); - if (($tag eq 'ltx:picture') || ($tag eq 'ltx:text')) { - my @pic_c = element_nodes($p_c[0]); - # With single svg:svg ? - if ((scalar(@pic_c) == 1) && ($document->getNodeQName($pic_c[0]) eq 'svg:svg')) { - $document->replaceNode($node, element_nodes($pic_c[0])); - return; } } } } - # Otherwise, we've still got an svg:foreignObject; - # Make sure we get a size, in case autoOpen'd - if ($whatsit) { - my ($w, $h, $d) = $whatsit->getSize; - my $y = $STATE->lookupDefinition(T_CS('\baselineskip'))->valueOf->pxValue; - my $ht = $h->add($d); - $node->setAttribute(width => $w->pxValue) unless $node->hasAttribute('width'); - $node->setAttribute(height => $ht->pxValue) unless $node->hasAttribute('height'); - $node->setAttribute(transform => "matrix(1 0 0 -1 0 $y)"); - $node->setAttribute(overflow => 'visible'); } }); - -# This attempts to be a generalize vbox construction; -# The idea is to receeive block-like material, possibly wrapped in appropriate -# container which gets attributes. -# The contents are constructed in an ltx:_CaptureBlock_ element, -# designed to accept all reasonable block material from several levels, -# and then determine which container element is most apprpriate for both the conent & context -# from block, logical-block or sectional-block, or the inline- variants. -sub insertBlock { - my ($document, $contents, %blockattr) = @_; - my $model = $document->getModel; - my $context = $document->getElement; # Where we originally start inserting. - if (!$context) { - # edge case: if we start the doc with a block, the context is empty - $document->absorb($contents); - return; } - my $context_tag = $document->getNodeQName($context); - my $is_svg = ($context_tag =~ /^svg:/); # svg is slightly tricky - my $ignorable_attr = $is_svg || !scalar(keys %blockattr); # if we do not REQUIRE the attributes - if (($context_tag =~ /^ltx:XM/) && ($context_tag ne 'ltx:XMText')) { # but math always needs this - $context = $document->openElement('ltx:XMText'); - $context_tag = $document->getNodeQName($context); } - my $inline = $is_svg || $document->canContain($context_tag, '#PCDATA'); - my $container = $document->openElement('ltx:_CaptureBlock_', '_vertical_mode_' => 1, %blockattr); - $document->absorb($contents); - my @nodes = $container->childNodes; - my @node_tags = map { $document->getNodeQName($_); } @nodes; - my $nnodes = scalar(@nodes); - $document->closeToNode($container, 1); - $document->closeNode($container); - $document->closeToNode($context, 1); - my $newcontainer; - - if ($nnodes < 1) { # Insertion came up empty? - $document->removeNode($container); } # then remove the new block entirely - elsif ($ignorable_attr # No attributes, contents allowed in context? - && !grep { !$document->canContain($context, $_); } @node_tags) { - $document->unwrapNodes($container); } # No container needed, at all. - elsif (($nnodes == 1) # Single node, allowed in context & accepts attributes - && $document->canContain($context, $nodes[0]) - && ($ignorable_attr || !grep { !$document->canHaveAttribute($nodes[0], $_) } keys %blockattr)) { - map { $document->setAttribute($nodes[0], $_ => $blockattr{$_}) } keys %blockattr; - $document->unwrapNodes($container); } # Add attributes and unwrap the single node - elsif (($nnodes == 1) # Single node, but needs auto-wrapper which accepts attributes? - && ($newcontainer = $document->canContainSomehow($context, $nodes[0])) - && ($ignorable_attr || !grep { !$document->canHaveAttribute($newcontainer, $_) } keys %blockattr)) { - $document->renameNode($container, $newcontainer, 1); } # rename the capture to that container - else { # Otherwise, rename the capture - if ($is_svg && grep { $_ =~ /^ltx:/; } @node_tags) { # MAY need foreignObject wrapper - $context = $document->wrapNodes('svg:foreignObject', $container); - $context_tag = $document->getNodeQName($context); } - my @candidates = - ($inline - ? (qw(ltx:inline-block ltx:inline-logical-block ltx:inline-sectional-block)) - : (qw(ltx:block ltx:logical-block ltx:sectional-block ltx:figure))); - my @filtered_candidates = (); # Filtered containers that can contain the content - foreach my $candidate (@candidates) { - push(@filtered_candidates, $candidate) - unless grep { !$document->canContainSomehow($candidate, $_); } @node_tags; } - my @allowed_candidates # and are allowed in the context - = grep { ($document->canContain($context_tag, $_) ? $_ : ()); } @filtered_candidates; - if (my $tag = $allowed_candidates[0] || $filtered_candidates[0]) { - $document->renameNode($container, $tag, 1); } # Rename the capture to the correct container - else { # we didn't know what to do? - Warn('malformed', '_CaptureBlock_', $document, "Did not find a block-like candidate in $context_tag (with attributes (" . join(";", map { "$_=$blockattr{$_}" } keys %blockattr) . ')'); - $document->renameNode($container, 'ltx:block', 1); - } - } - return @nodes; } - -DefConstructor('\vbox BoxSpecification VBoxContents', sub { - my ($document, $spec, $contents, %props) = @_; - my @block = insertBlock($document, $contents, vattach => 'bottom'); }, - sizer => '#2', - properties => { layout => 'vertical', vattach => 'bottom' }, - mode => 'text', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $spec = $whatsit->getArg(1); - my $box = $whatsit->getArg(2); - if (my $h = GetKeyVal($spec, 'to')) { - $whatsit->setHeight($h); } - elsif (my $s = GetKeyVal($spec, 'spread')) { - $whatsit->setHeight($box->getHeight->add($s)); } - $whatsit->setProperty(content_box => $box); - return; }); - -DefConstructor('\vtop BoxSpecification VBoxContents', sub { - my ($document, $spec, $contents, %props) = @_; - insertBlock($document, $contents, vattach => 'top'); }, - sizer => '#2', - properties => { layout => 'vertical', vattach => 'baseline' }, - mode => 'text', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $spec = $whatsit->getArg(1); - my $box = $whatsit->getArg(2); - if (my $h = GetKeyVal($spec, 'to')) { - $whatsit->setHeight($h); } - elsif (my $s = GetKeyVal($spec, 'spread')) { - $whatsit->setHeight($box->getHeight->add($s)); } - $whatsit->setProperty(content_box => $box); - return; }); - -DefParameterType('RuleSpecification', sub { - my ($gullet) = @_; - my $keyvals = LaTeXML::Core::KeyVals->new(undef, undef, skipMissing => 1); - while (my $key = $gullet->readKeyword('width', 'height', 'depth')) { - $keyvals->setValue($key, $gullet->readDimension); } - $keyvals; }, - optional => 1, undigested => 1); - -# \hrule, \vrule are awkward in trying to deal with 3 cases -# * as rules within an alignment/table -# * as separating lines within text -# * as graphical lines within svg -# and each has different requirements for size -DefConstructor('\vrule RuleSpecification', - "?#invisible()(?#isVerticalRule()" - . "(?&inSVG()()" - . "()))", - afterConstruct => sub { # NOTE: Only For xy development! - Warn('unexpected', 'vrule', $_[0], "Encountered \\vrule in SVG") if inSVG(); }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $dims = $whatsit->getArg(1); - my $width = GetKeyVal($dims, 'width'); # || Dimension('0.4pt'); - my $height = GetKeyVal($dims, 'height'); - my $depth = GetKeyVal($dims, 'depth'); - $whatsit->setProperties( - rwidth => $width, cwidth => $width || Dimension('0.4pt'), - rheight => $height, cheight => ($height), sheight => ($height ? $height->pxValue : 0), - rdepth => $depth, cdepth => ($depth || Dimension(0))); - my $w = ($width ? $width->ptValue : undef); - my $h = ($height ? $height->ptValue : undef); - my $d = ($depth ? $depth->ptValue : undef); - if (my $alignment = LookupValue('Alignment')) { - if (((!defined $h) && (!defined $w)) || ((defined $h) && ($h > 20)) - || ((defined $h) && (defined $w) && ($h > 3 * $w))) { - $whatsit->setProperty(isVerticalRule => 1) } } # Marked as rule within alignment - elsif ((defined $w) && ($w == 0)) { - $whatsit->setProperty(invisible => 1); } - if (my $color = LookupValue('font')->getColor) { - if (!Black->equals($color)) { - $whatsit->setProperty(color => $color); } } - return; }); - -DefConstructor('\hrule RuleSpecification', - "?#isHorizontalRule()" - . "(?&inSVG()()" - . "())", - afterConstruct => sub { # NOTE: Only For xy development! - Warn('unexpected', 'hrule', $_[0], "Encountered \\hrule in SVG") if inSVG(); }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $dims = $whatsit->getArg(1); - my $width = GetKeyVal($dims, 'width'); - my $height = GetKeyVal($dims, 'height'); - my $depth = GetKeyVal($dims, 'depth'); - $whatsit->setProperties( - rwidth => $width || '100%', cwidth => $width, swidth => ($width ? $width->pxValue : 0), - rheight => $height || '1px', cheight => ($height || Dimension('0.4pt')), - rdepth => $depth, cdepth => ($depth || Dimension(0))); - my $w = ($width ? $width->ptValue : undef); - my $h = ($height ? $height->ptValue : undef); - my $d = ($depth ? $depth->ptValue : undef); - if (my $alignment = LookupValue('Alignment')) { - # What is the intended logic here? - if (((!defined $h) && (!defined $w)) || ((defined $w) && ($w > 20)) - || ((defined $h) && (defined $w) && ($w > 3 * $h))) { - $alignment->addLine('t'); - $whatsit->setProperty(isHorizontalRule => 1) } } # Marked as rule within alignment - if (my $color = LookupValue('font')->getColor) { - if (!Black->equals($color)) { - $whatsit->setProperty(color => $color); } } - return; }); - -#====================================================================== -# Remaining Mode independent primitives in Ch.24, pp.279-280 -# \relax was done as expandable (isn't that right?) -# } -# Note, we don't bother making sure begingroup is ended by endgroup. - -# These define the handler for { } (or anything of catcode BEGIN, END) - -# These are actually TeX primitives, but we treat them as a Whatsit so they -# remain in the constructed tree. -#DefConstructor('{','#body', beforeDigest=>sub{$_[0]->bgroup;}, captureBody=>1); -DefPrimitive('{', sub { - my ($stomach) = @_; - $stomach->bgroup; - my $open = Box(undef, undef, undef, T_BEGIN, isEmpty => 1, alignmentSkippable => 1); - my $ismath = $STATE->lookupValue('IN_MATH'); - my @body = $stomach->digestNextBody(); - List($open, @body, mode => ($ismath ? 'math' : 'text')); }); - -DefPrimitive('}', sub { - my $f = LookupValue('font'); - $_[0]->egroup; - Box(undef, $f, undef, T_END, isEmpty => 1, alignmentSkippable => 1); }); - -# These are for those screwy cases where you need to create a group like box, -# more than just bgroup, egroup, -# BUT you DON'T want extra {, } showing up in any untex-ing. -DefConstructor('\@hidden@bgroup', '#body', beforeDigest => sub { $_[0]->bgroup; }, captureBody => 1, - reversion => sub { Revert($_[0]->getProperty('body')); }); -DefConstructor('\@hidden@egroup', '', afterDigest => sub { $_[0]->egroup; }, - reversion => ''); - -DefPrimitive('\begingroup', sub { $_[0]->begingroup; }); -DefPrimitive('\endgroup', sub { $_[0]->endgroup; }); - -# Debugging aids; Ignored! -DefPrimitive('\show Token', sub { - my $stuff = Invocation(T_CS('\meaning'), $_[1]); - Note("> " . ($_[1][1] == CC_CS ? ToString($_[1]) . '=' : '') . writableTokens(Expand($stuff))); - Note($_[0]->getLocator->toString()); - return; }); -DefPrimitive('\showbox Number', sub { - my $n = $_[1]->valueOf; - my $stuff = LookupValue('box' . $n); - Debug("Box $n = " . ToString($stuff)); - undef; }); -DefPrimitive('\showlists', undef); -DefPrimitive('\showthe Token', undef); - -# DefPrimitive('\shipout ?? -DefPrimitive('\ignorespaces SkipSpaces', undef); - -DefPrimitiveI('\lx@ignorehardspaces', undef, sub { - my ($stomach) = @_; - my $gullet = $stomach->getGullet; - my ($token, @boxes); - while (($token = $gullet->readXToken) && (@boxes = $stomach->invokeToken($token))) { - while (@boxes && $boxes[0]->getProperty('isSpace')) { - shift(@boxes); } - last if @boxes; } - return @boxes; }); - -# \afterassignment saves ONE token (globally!) to execute after the next assignment -DefPrimitive('\afterassignment Token', sub { AssignValue(afterAssignment => $_[1], 'global'); }); -# \aftergroup saves ALL tokens (from repeated calls) to be executed IN ORDER after the next egroup or } -DefPrimitive('\aftergroup Token', sub { PushValue(afterGroup => $_[1]); }); - -# \uppercase, \lowercase -sub ucToken { - my ($token) = @_; - my $code = $STATE->lookupUCcode($token->getString); - return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); } - -sub lcToken { - my ($token) = @_; - my $code = $STATE->lookupLCcode($token->getString); - return ((defined $code) && ($code != 0) ? Token(chr($code), $token->getCatcode) : $token); } - -# Note that these are NOT expandable, even though the "return" tokens! -DefPrimitive('\uppercase GeneralText', sub { - my ($stomach, $tokens) = @_; - $stomach->getGullet->unread(map { ucToken($_) } $tokens->unlist); - return; }); - -DefPrimitive('\lowercase GeneralText', sub { - my ($stomach, $tokens) = @_; - $stomach->getGullet->unread(map { lcToken($_) } $tokens->unlist); - return; }); - -# Converts $tokens to a string in the fashion of \message and others: -# doubles #, converts to string; optionally adds spaces after control sequences -# in the spirit of the B Book, "show_token_list" routine, in 292. -# [This could be a $tokens->unpackParameters, but for the curious space treatment] -sub writableTokens { - my ($tokens) = @_; - my @tokens = $tokens->unlist; - @tokens = map { - my $cc = $$_[1]; - if ($cc == CC_CS) { ($_, T_SPACE); } - elsif ($cc == CC_SPACE) { (T_SPACE); } - elsif ($cc == CC_PARAM) { ($_, $_); } - elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); } - else { $_; } - } @tokens; - return UnTeX(Tokens(@tokens), 1); } - -DefPrimitive('\message{}', sub { - my ($stomach, $stuff) = @_; - NoteLog(writableTokens(Expand($stuff))); - return; }); - -DefRegister('\errhelp' => Tokens()); -DefPrimitive('\errmessage{}', sub { - my ($stomach, $stuff) = @_; - Note(ToString(Expand($stuff)) . ": " . ToString(Expand(Tokens(T_CS('\the'), T_CS('\errhelp'))))); - return; }); - -# TeX I/O primitives -DefPrimitive('\openin Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { - my ($stomach, $port, $filename) = @_; - # possibly should close $port if it's already been opened? - $port = ToString($port); - $filename = ToString($filename); - # Rely on FindFile to enforce any access restrictions - # It's tempting to pout noltxml=>1 here, since who would want to read in an .ltxml file's perl? - # However, \openin is often used by low-level code to check for existence of a file - # when we SHOULD find an .ltxml version! - # Hopefully, if they get one, they won't actually try to read its content... - if (my $path = FindFile($filename)) { - my $mouth = LaTeXML::Core::Mouth->create($path, - content => LookupValue($path . '_contents')); - AssignValue('input_file:' . $port => $mouth, 'global'); } - return; }); - -DefPrimitive('\closein Number', sub { - my ($stomach, $port, $filename) = @_; - # close the mouth (if any) and clear the variable - $port = ToString($port); - if (my $mouth = LookupValue('input_file:' . $port)) { - $mouth->finish; - AssignValue('input_file:' . $port => undef, 'global'); } - return; }); - -DefPrimitive('\read Number SkipKeyword:to SkipSpaces Token', sub { - my ($stomach, $port, $token) = @_; - $port = ToString($port); - if (my $mouth = LookupValue('input_file:' . $port)) { - $stomach->bgroup; - AssignValue(PRESERVE_NEWLINES => 2); # Special EOL/EOF treatment for \read - AssignValue(INCLUDE_COMMENTS => 0); - my @tokens = (); - my ($t, $level) = (undef, 0); - while ($t = $mouth->readToken) { - my $cc = $t->getCatcode; - push(@tokens, $t) unless $cc == CC_MARKER; # End of line marker - $level++ if $cc == CC_BEGIN; - $level-- if $cc == CC_END; - last if !$level && $mouth->isEOL; } - $stomach->egroup; - DefMacroI($token, undef, Tokens(@tokens), nopackParameters => 1); } - return; }); - -DefConditional('\ifeof Number', sub { - my ($gullet, $port) = @_; - $port = ToString($port); - if (my $mouth = LookupValue('input_file:' . $port)) { - return $$mouth{at_eof}; } - else { - return 1; } }); - -# For output files, we'll write the data to a cached internal copy -# rather than to the actual file system. -DefPrimitive('\openout Number SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { - my ($stomach, $port, $filename) = @_; - $port = ToString($port); - $filename = ToString($filename); - AssignValue('output_file:' . $port => $filename, 'global'); - AssignValue($filename . '_contents' => "", 'global'); - return; }); - -DefPrimitive('\closeout Number', sub { - my ($stomach, $port) = @_; - $port = ToString($port); - AssignValue('output_file:' . $port => undef, 'global'); - return; }); - -DefPrimitive('\write Number {}', sub { - my ($stomach, $port, $tokens) = @_; - $port = ToString($port); - if (my $filename = LookupValue('output_file:' . $port)) { - my $handle = $filename . '_contents'; - my $contents = LookupValue($handle); - AssignValue($handle => $contents . UnTeX(Expand($tokens), 1) . "\n", 'global'); } - else { - Note(UnTeX(Expand($tokens))); } - return; }); - -# Since we don't paginate, we're effectively always "shipping out", -# so all operations are \immediate -DefPrimitive('\immediate', undef); - -#====================================================================== -# Remaining semi- Vertical Mode primitives in Ch.24, pp.280--281 - -DefPrimitive('\special {}', sub { - my ($stomach, $arg) = @_; - my $special_str = ToString($arg); - # recognize one special graphics inclusion case - if ($special_str =~ /\bpsfile=(.+?)(?:\s|\})/) { - my $graphic = $1; - RequirePackage('graphicx', searchpaths_only => 1); - my @kv; - for my $prop (qw(voffset hoffset hscale vscale hsize vsize angle)) { - if ($special_str =~ /\b$prop=(.+?)(?:\s|\})/) { - push(@kv, T_OTHER(',')) if @kv; - push(@kv, T_OTHER($prop), T_OTHER("="), T_OTHER($1)); } } - @kv = (T_OTHER("["), @kv, T_OTHER("]")) if @kv; - $stomach->getGullet->unread( - T_CS('\ltx@special@graphics'), @kv, T_BEGIN, T_OTHER($graphic), T_END); } - else { - Info('ignored', 'special', $stomach, 'Unrecognized TeX Special', $arg); } - return; }); -# adapted from graphicx.sty.ltxml -DefKeyVal('SpecialPS', 'angle', ''); -DefKeyVal('SpecialPS', 'voffset', ''); -DefKeyVal('SpecialPS', 'hoffset', ''); -DefKeyVal('SpecialPS', 'hsize', ''); -DefKeyVal('SpecialPS', 'vsize', ''); -DefKeyVal('SpecialPS', 'hscale', ''); -DefKeyVal('SpecialPS', 'vscale', ''); -DefConstructor('\ltx@special@graphics OptionalKeyVals:SpecialPS Semiverbatim', - "", - sizer => \&image_graphicx_sizer, - properties => sub { - my ($stomach, $kv, $path) = @_; - $path = ToString($path); $path =~ s/^\s+//; $path =~ s/\s+$//; - $path =~ s/("+)(.+)\g1/$2/; - my $searchpaths = LookupValue('GRAPHICSPATHS'); - my @candidates = pathname_findall($path, types => ['*'], paths => $searchpaths); - if (my $base = LookupValue('SOURCEDIRECTORY')) { - @candidates = map { pathname_relative($_, $base) } @candidates; } - my $options = ''; - if ($kv) { # remap psfile options to includegraphics options: - if (my $hscale = $kv->getValue('hscale')) { - $hscale = $hscale && int(ToString($hscale)) / 100; - $options .= ',' if $options; - $options .= "xscale=$hscale"; } - if (my $vscale = $kv->getValue('vscale')) { - $vscale = $vscale && int(ToString($vscale)) / 100; - $options .= ',' if $options; - $options .= "yscale=$vscale"; } - if (my $hsize = $kv->getValue('hsize')) { - $hsize = ToString($hsize); - $options .= ',' if $options; - $options .= "width=$hsize"; } - if (my $vsize = $kv->getValue('vsize')) { - $vsize = ToString($vsize); - $options .= ',' if $options; - $options .= "height=$vsize"; } - if (my $angle = $kv->getValue('angle')) { - $angle = ToString($angle); - $options .= ',' if $options; - $options .= "angle=$angle"; } - my $voffset = $kv->getValue('voffset') || 0; - $voffset = $voffset && int(ToString($voffset)); - my $hoffset = $kv->getValue('hoffset') || 0; - $hoffset = $hoffset && int(ToString($hoffset)); - if ($voffset || $hoffset) { - my $left = -$hoffset; - my $bottom = -$voffset; - $options .= "," if $options; - $options .= "trim=$left $bottom 0 0,clip=true"; } } - (options => $options, path => $path, candidates => join(',', @candidates)); }, - mode => 'text'); -# Since these ultimately generate external resources, it can be useful to have a handle on them. -Tag('ltx:graphics', afterOpen => sub { GenerateID(@_, 'g'); }); - -DefPrimitive('\penalty Number', undef); - -# \kern is heavily used by xy. -# Completely HACK version for the moment -# Note that \kern should add vertical spacing in vertical modes! -DefConstructor('\kern Dimension', sub { - my ($document, $length) = @_; - my $parent = $document->getNode; - if ($document->getNodeQName($parent) eq 'svg:g') { - if (my $x = $length->pxValue) { - # HACK HACK HACK - my $transform = $parent->getAttribute('transform'); - $parent->setAttribute(transform => ($transform ? $transform . ' ' : '') . "translate($x,0)"); - } } - elsif (inSVG()) { - Warn('unexpected', 'kern', $_[0], "Lost kern in SVG " . ToString($length)); } -}); - -DefPrimitiveI('\unpenalty', undef, undef); -DefPrimitiveI('\unkern', undef, undef); -## Worrisome, but... -DefPrimitiveI('\unskip', undef, sub { - my ($stomach) = @_; - my $box; - while (($box = $LaTeXML::LIST[-1]) && IsEmpty($box)) { - pop(@LaTeXML::LIST); } - return; }); - -DefPrimitive('\mark{}', undef); -# \insert<8bit>{} -DefPrimitive('\insert Number', undef); # Just let the insertion get processed(?) -# \vadjust{} -# Note: \vadjust ignores in vertical mode... -DefPrimitive('\vadjust {}', sub { PushValue('vAdjust', $_[1]->unlist); }); - -#====================================================================== -# Remaining Vertical Mode primitives in Ch.24, pp.281--283 -# \vskip, \vfil, \vfill, \vss, \vfilneg -# = \leaders | \cleaders | \xleaders -# = | | -# = \vrule -# = \hrule -# = | -# = width | height | depth - -# Stuff to ignore for now... -foreach my $op ('\vfil', '\vfill', '\vss', '\vfilneg', - '\leaders', '\cleaders', '\xleaders') { - DefPrimitiveI($op, undef, undef); } - -# \moveleft, \moveright -DefConstructor('\moveleft Dimension MoveableBox', - "#2", - afterDigest => sub { - $_[1]->setProperty(x => $_[1]->getArg(1)->multiply(-1)); }); -DefConstructor('\moveright Dimension MoveableBox', - "#2", - afterDigest => sub { - $_[1]->setProperty(x => $_[1]->getArg(1)); }); - -# \unvbox<8bit>, \unvcopy<8bit> -DefPrimitive('\unvbox Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - AssignValue($box, undef); - (defined $stuff ? $stuff->unlist : List()); }); - -DefPrimitive('\unvcopy Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - (defined $stuff ? $stuff->unlist : List()); }); - -#====================================================================== -# If this is the right solution... -# then we also should put the desired spacing on a style attribute?!?!?! -DefConstructor('\vskip Glue', sub { - my ($document, $length) = @_; - $length = $length->ptValue; - if ($length > 10) { # Or what!?!?!?! - if ($document->isCloseable('ltx:para')) { - $document->closeElement('ltx:para'); } - elsif ($document->isOpenable('ltx:break')) { - $document->insertElement('ltx:break'); } } - return; }, - properties => sub { (height => $_[1], isSpace => 1, isVerticalSpace => 1, isBreak => 1); }); - -#====================================================================== -# Basic alignment support needed by most environments & commands. -#====================================================================== - -Tag('ltx:td', afterClose => \&trimNodeWhitespace); - -#---------------------------------------------------------------------- -# Primitive column types; -# This is really LaTeX, but the mechanisms are used behind-the-scenes here, too. -DefColumnType('|', sub { - $LaTeXML::BUILD_TEMPLATE->addBetweenColumn(T_CS('\vrule'), T_CS('\relax')); return; }); -DefColumnType('l', sub { - $LaTeXML::BUILD_TEMPLATE->addColumn(after => Tokens(T_CS('\hfil'))); return; }); -DefColumnType('c', sub { - $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil')), - after => Tokens(T_CS('\hfil'))); return; }); -DefColumnType('r', sub { - $LaTeXML::BUILD_TEMPLATE->addColumn(before => Tokens(T_CS('\hfil'))); return; }); - -DefColumnType('p{Dimension}', sub { - $LaTeXML::BUILD_TEMPLATE->addColumn( - before => Tokens(T_CS('\vtop'), T_BEGIN, T_CS('\hbox'), - T_LETTER('t'), T_LETTER('o'), $_[1]->revert, T_CS('\relax'), - T_BEGIN), - after => Tokens(T_END, T_END), - vattach => 'top', - align => 'justify', - ); return; }); - -DefColumnType('*{Number}{}', sub { - my ($gullet, $n, $pattern) = @_; - map { $pattern->unlist } 1 .. $n->valueOf; }); - -DefColumnType('@{}', sub { - my ($gullet, $filler) = @_; - $LaTeXML::BUILD_TEMPLATE->disableIntercolumn; - $LaTeXML::BUILD_TEMPLATE->addBetweenColumn($filler->unlist); - $LaTeXML::BUILD_TEMPLATE->disableIntercolumn; - return; }); - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Alignment code -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#---------------------------------------------------------------------- -# This is where ALL alignments start & finish -# This creates the object representing the entire alignment! -DefConstructor('\@start@alignment', - "#alignment", - reversion => sub { Revert($_[0]->getProperty('alignment')); }, - sizer => '#alignment', - # beforeDigest => sub { $_[0]->bgroup; }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - $stomach->bgroup; - if (my $alignment = LookupValue('Alignment')) { - $whatsit->setProperty(alignment => $alignment); - $alignment->setBody($whatsit); - digestAlignmentBody($stomach, $whatsit); } - $stomach->egroup; - return; }); - -# Seems odd to need both end markers here... -DefMacroI('\@finish@alignment', undef, - '\hidden@crcr\@close@alignment'); -DefPrimitive('\@close@alignment', sub { }); - -#====================================================================== -# Low-level bits that appear within alignments or \halign - -DefConstructorI('\cr', undef, "\n"); -DefConstructorI('\crcr', undef, "\n"); -# These are useful for reversion of higher-level macros that use alignment -# internally, but don't use explicit &,\cr in the user markup -DefConstructorI('\hidden@cr', undef, "\n", alias => ''); -DefConstructorI('\hidden@crcr', undef, "\n", alias => ''); -DefConstructorI('\hidden@align', undef, "", alias => ''); - -# Handled directly in alignments, but must be defined as non-macros -DefPrimitiveI('\noalign', undef, sub { - $_[0]->bgroup; - Error('unexpected', '\noalign', $_[0], "\\noalign cannot be used here"); - Let(T_ALIGN, T_CS('\relax')); - Let(T_CS('\noalign'), T_CS('\relax')); - Let(T_CS('\omit'), T_CS('\relax')); - Let(T_CS('\span'), T_CS('\relax')); - return; }); -DefPrimitiveI('\omit', undef, sub { - Error('unexpected', '\omit', $_[0], "\\omit cannot be used here"); - $_[0]->bgroup; - Let(T_ALIGN, T_CS('\relax')); - Let(T_CS('\noalign'), T_CS('\relax')); - Let(T_CS('\omit'), T_CS('\relax')); - Let(T_CS('\span'), T_CS('\relax')); - return; }); -DefPrimitiveI('\span', undef, sub { - $_[0]->bgroup; - Error('unexpected', '\span', $_[0], "\\span cannot be used here"); - Let(T_ALIGN, T_CS('\relax')); - Let(T_CS('\noalign'), T_CS('\relax')); - Let(T_CS('\omit'), T_CS('\relax')); - Let(T_CS('\span'), T_CS('\relax')); - return; }); - -######### -# Support for \\[dim] .... TO BE WORKED OUT! -# NOTE that this does NOT skip spaces before * or []!!!!! -# As if: \@alignment@newline OptionalMatch:* [Dimension] -# Read arguments for \\, namely * and/or [Dimension] -# BUT optionally do it while skipping spaces (latex style) or not (ams style) -sub readNewlineArgs { - my ($gullet, $skipspaces) = @_; - my $alignment = $STATE->lookupValue('Alignment'); - local $LaTeXML::ALIGN_STATE = 1000000; - $gullet->skipSpaces if $skipspaces; - my $next = $gullet->readToken; - my ($star, $optional); - if ($next && $next->equals(T_OTHER('*'))) { - $star = 1; - $gullet->skipSpaces if $skipspaces; - $next = $gullet->readToken; } - if ($next && $next->equals(T_OTHER('['))) { - $optional = $gullet->readUntil(T_OTHER(']')); - $next = undef; } - $gullet->unread($next) if $next; - return ($star, $optional); } - -# VERY tricky (and mostly Wrong). -# The issue is for \\ to look ahead for * and [], -# Eventually we'll expand into \cr (which should be preceded by the RHS of the template) -# BUT it should NOT trigger the template if it bumps into a & -# which happens when the 1st column of an alignment is empty. -# In proper LaTeX this is inhibited by a curious construct -# {\ifnum0='} -# and possibly by proper tracking of a Master Counter !?!?!? -# But we're not there (yet) - -# This is the internal macro for \\[dim] used by LaTeX for various arrays, tabular, etc -DefMacroI('\@alignment@newline', undef, sub { - my ($gullet) = @_; - my ($star, $optional) = readNewlineArgs($gullet, 1); - return (T_CS('\hidden@cr'), T_BEGIN, - ($optional - ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END) - : T_CS('\@alignment@newline@marker')), - T_END); }); -# However, the above will skip spaces --AND a newline! -- looking for [], -# which is kinda weird in math, since there may be a reasonable math [ in the 1st column! -# AMS kindly avoids that, by using a special version of \\ -DefMacroI('\@alignment@newline@noskip', undef, sub { - my ($gullet) = @_; - my ($star, $optional) = readNewlineArgs($gullet); - return (T_CS('\hidden@cr'), T_BEGIN, - ($optional - ? (T_CS('\@alignment@newline@markertall'), T_BEGIN, $optional, T_END) - : T_CS('\@alignment@newline@marker')), - T_END); }); - -# These are the markers that produce \\ in the reversion, -# and (eventually will) add vertical space to the row! -DefConstructor('\@alignment@newline@marker', '', - reversion => Tokens(T_CS("\\\\"), T_CR)); -# AND add the spacing to the alignment!!! -DefConstructor('\@alignment@newline@markertall {Dimension}', '', - afterDigest => sub { - if (my $alignment = LookupValue('Alignment')) { - $alignment->currentRow->{padding} = $_[1]->getArg(1); } - return; }, - reversion => sub { - Tokens(T_CS("\\\\"), T_OTHER('['), Revert($_[1]), T_OTHER(']'), T_CR); }); - -DefMacroI('\tabularnewline', undef, '\cr'); # ??? - -# \lx@intercol is our replacement for LaTeX's \@acol which places intercolumn space in tabular -# (but NOT used by TeX's \halign!) -DefMacro('\lx@intercol', ''); -# Candidates for binding \lx@intercol for LaTeX tabular or math arrays -# These provide "padding" of half tabcolsep, since added before & after columns -# [these could be \hskip\tabcolsep, but the expansion confounds trimColumnSpec] -DefConstructor('\lx@text@intercol', sub { - my ($document, %props) = @_; - $document->absorb(DimensionToSpaces($props{width})); }, - reversion => '\lx@intercol', - properties => sub { - my $defn; - my $w = (($defn = $STATE->lookupDefinition(T_CS('\tabcolsep'))) && $defn->isRegister - ? $defn->valueOf : Dimension(0)); - (width => $w, isSpace => 1); }); -DefConstructor('\lx@math@intercol', "", # mspace ??? - reversion => '\lx@intercol', - properties => sub { - my $defn; - my $w = (($defn = $STATE->lookupDefinition(T_CS('\arraycolsep'))) && $defn->isRegister - ? $defn->valueOf : Dimension(0)); - (width => $w, isSpace => 1); }); - -#====================================================================== -# Various decorations within alignments, rules, headers, etc - -# Like \noalign, takes an arg; handled within alignment processing. -# But doesn't create a pseudo-row (??? Or does it?; is it still needed?) -DefConstructor('\hidden@noalign{}', '#1', - reversion => '', - properties => sub { - # Sometimes, we're smuggling stuff that needs to be carried into the XML. - my $preserve = grep { $_->getProperty('alignmentPreserve'); } $_[1]->unlist; - (alignmentSkippable => 1, alignmentPreserve => $preserve); }); - -DefMacro('\hline', '\noalign{\@@alignment@hline}'); -DefConstructorI('\@@alignment@hline', undef, '', - afterDigest => sub { - if (my $alignment = LookupValue('Alignment')) { - $alignment->addLine('t'); } - return; }, - properties => { isHorizontalRule => 1 }, - sizer => 0, alias => '\hline'); - -DefMacroI('\@tabular@begin@heading', undef, sub { - my $alignment = LookupValue('Alignment'); - $$alignment{in_tabular_head} = 1; - return; }); -DefMacroI('\@tabular@end@heading', undef, sub { - my $alignment = LookupValue('Alignment'); - $$alignment{in_tabular_head} = 0; - return; }); - -#====================================================================== -# Math mode in alignment -# Special forms for $ appearing within alignments. -# Note that $ within a math alignment (eg array environment), -# switches to text mode! There's no $$ for display math. - -# This is the "normal" case: $ appearing with an alignment that is in text mode. -# It's just like regular $, except it doesn't look for $$ (no display math). -DefPrimitiveI('\@dollar@in@textmode', undef, sub { - no warnings 'recursion'; - $_[0]->invokeToken(T_CS((LookupValue('IN_MATH') ? '\@@ENDINLINEMATH' : '\@@BEGININLINEMATH'))); }); - -# This one is for $ appearing within an alignment that's already math. -# This should switch to text mode (because it's balancing the hidden $ -# wrapping each alignment cell!!!!!!) -# However, it should be like a normal $ if it's inside something like \mbox -# that itself makes a text box!!!!!! -# Thus, we need to know at what boxing level we started the last math or text. -# This is all complicated by the need to know _how_ we got into or out of math mode! -# Gawd, this is awful! -# NOTE: Probably the most "Right" thing to do would be to process -# alignments in text mode only (like TeX), sneaking $'s in where needed, -# but then afterwards, morph them into math arrays? -# This would be complicated by the need to hide these $ from untex. -DefPrimitiveI('\@dollar@in@mathmode', undef, sub { - my ($stomach) = @_; - my $level = $stomach->getBoxingLevel; - if ((LookupValue('MATH_ALIGN_$_BEGUN') || 0) == $level) { # If we're begun making _something_ with $. - my @l = (); - if (LookupValue('IN_MATH')) { # But we're somehow in math? - @l = $stomach->invokeToken(T_CS('\@@ENDINLINEMATH')); } - else { - @l = $stomach->invokeToken(T_CS('\@@ENDINLINETEXT')); } - AssignValue('MATH_ALIGN_$_BEGUN' => 0); # Reset this AFTER finishing the something - @l; } - else { - AssignValue('MATH_ALIGN_$_BEGUN' => $level + 1); # Note that we've begun something - if (LookupValue('IN_MATH')) { # If we're "still" in math - $stomach->invokeToken(T_CS('\@@BEGININLINETEXT')); } - else { - $stomach->invokeToken(T_CS('\@@BEGININLINEMATH')); } } }); - -DefConstructorI('\@@BEGININLINETEXT', undef, - "" - . "#body" - . "", - alias => T_MATH, beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); -DefConstructorI('\@@ENDINLINETEXT', undef, "", alias => T_MATH, - beforeDigest => sub { $_[0]->endMode('text'); }); - -DefPrimitiveI('\@LTX@nonumber', undef, sub { AssignValue(EQUATIONROW_NUMBER => 0, 'global'); }); - -DefMacroI('\hidewidth', undef, Tokens()); - -#====================================================================== -# Multicolumn support -DefMacro('\multispan{Number}', sub { - my ($gullet, $span) = @_; - $span = $span->valueOf; - (T_CS('\omit'), map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1); }); - -DefRegisterI('\@alignment@ncolumns', undef, Dimension(0), - getter => sub { - if (my $alignment = LookupValue('Alignment')) { - Number(scalar($alignment->getTemplate->columns)); } - else { Number(0); } }); -DefRegisterI('\@alignment@column', undef, Dimension(0), - getter => sub { - if (my $alignment = LookupValue('Alignment')) { - Number($alignment->currentColumnNumber); } - else { Number(0); } }); - -DefMacro('\@multicolumn {Number} AlignmentTemplate {}', sub { - my ($gullet, $span, $template, $tokens) = @_; - my $column = $template->column(1); - $span = $span->valueOf; - # First part, like \multispan - (T_CS('\omit'), (map { (T_CS('\span'), T_CS('\omit')) } 1 .. $span - 1), - # Next part, just put the template in-line, since it's only used once. - ($column ? beforeCellUnlist($$column{before}) : ()), - $tokens->unlist, - ($column ? afterCellUnlist($$column{after}) : ())); }); - -DefConditionalI('\if@in@alignment', undef, sub { LookupValue('Alignment'); }); - -DefPrimitive('\@alignment@bindings AlignmentTemplate []', sub { - my ($stomach, $template, $mode) = @_; - alignmentBindings($template, $mode); }); - -# Utility, not really TeX, but used by LaTeX, AmSTeX... -# Convert a vertical positioning, optional argument. -# t = "top", b = "bottom"; default is "middle". -# Note that the default for vattach attribute is "baseline". -sub translateAttachment { - my ($pos) = @_; - $pos = ($pos ? ToString($pos) : ''); - return ($pos eq 't' ? 'top' : ($pos eq 'b' ? 'bottom' : 'middle')); } # undef meaning 'baseline' - -# This trims trailing whitespace from the current digested list, -# for use within latex tabular-style columns. -# But note that \halign does NOT remove this trailing space! -DefPrimitiveI('\lx@column@trimright', undef, sub { - my $box; - my @save = (); - my $s; - while ($box = $LaTeXML::LIST[-1]) { - if ($box->getProperty('alignmentSkippable') - || $box->getProperty('isFill') - || IsEmpty($box)) { - push(@save, pop(@LaTeXML::LIST)); } - elsif (ref $box eq 'LaTeXML::Core::List') { # Unwrap and continue - pop(@LaTeXML::LIST); - push(@LaTeXML::LIST, $box->unlist); } - elsif ((ref $box eq 'LaTeXML::Core::Box') - && defined($s = $box->getString) && ($s =~ /^\s*$/)) { - pop(@LaTeXML::LIST); } # remove any box containing only spaces - else { - last; } } - push(@LaTeXML::LIST, @save); - return; }); - -use constant T_hfil => T_CS('\hfil'); -# Yet more special case hacking. Sometimes the order of tokens works for -# TeX, but confuses us... In particular the order of $ and \hfil! -sub beforeCellUnlist { - my ($tokens) = @_; - return () unless $tokens; - my @toks = $tokens->unlist; - my @new = (); - while (my $t = shift(@toks)) { - if ($t->defined_as(T_MATH) && @toks && $toks[0]->defined_as(T_hfil)) { - push(@new, shift(@toks)); unshift(@toks, $t); } - else { - push(@new, $t); } } - return @new; } - -sub afterCellUnlist { - my ($tokens) = @_; - return () unless $tokens; - my @toks = $tokens->unlist; - my @new = (); - while (my $t = pop(@toks)) { - if ($t->defined_as(T_MATH) && @toks && $toks[-1]->defined_as(T_hfil)) { - unshift(@new, pop(@toks)); push(@toks, $t); } - else { - unshift(@new, $t); } } - return @new; } - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Now, for \halign itself -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# See \@@LTX@noalign for some \noalign cases -# See \@multicolumn for cases of \span,\omit -# See alignmentBindings for default bindings -# But also see others for different handling of (eg) open@row, etc. -# Probably we have to handle these cases by more generic default code -# and appropriate tweaks of alignment data???? - -# Algorithm: -# open@alignment -# Loop while read_column -#====================================================================== -DefConstructor('\halign BoxSpecification', - "#alignment", - reversion => sub { - my ($whatsit, $spec) = @_; - my $template = $whatsit->getProperty('template'); - my $alignment = $whatsit->getProperty('alignment'); - Tokens(T_CS('\halign'), Revert($spec), T_BEGIN, Revert($template), T_CS('\cr'), - Revert($alignment), T_END); }, - bounded => 1, - # sizer => '#1', - sizer => sub { $_[0]->getProperty('alignment')->getSize; }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - $stomach->bgroup; # This will be closed by the \halign's closing } (or will it?) - my $template = parseHAlignTemplate($stomach->getGullet, $whatsit); - my $spec = $whatsit->getArg(1); - alignmentBindings($template, undef, - attributes => { width => orNull(GetKeyVal($spec, 'to')) }); - digestAlignmentBody($stomach, $whatsit); - $stomach->egroup; - $LaTeXML::ALIGN_STATE--; # Balance the opening { OUTSIDE of the masking of ALIGN_STATE - return; }); - -# Parse an \halign style alignment template from Gullet -sub parseHAlignTemplate { - my ($gullet, $whatsit) = @_; - my $t = $gullet->readNonSpace; - Error('expected', '\bgroup', $gullet, "Missing \\halign box") unless $t->defined_as(T_BEGIN); - my $before = 1; # true if we're before a # in current column - my @pre = (); - my @post = (); - my @cols = (); - my $repeated = 0; - my @nonreps = (); - my $tabskip = LookupRegister('\tabskip'); - my $nexttabskip = $tabskip; - my @tokens = (); - ## Only expand certain things; See TeX book p.238 - local $LaTeXML::ALIGN_STATE = 1000000; - while ($t = $gullet->readToken) { - my $cc = $t->getCatcode; - if ($t->equals(T_CS('\tabskip'))) { # Read the tabskip assignment - $gullet->readKeyword('='); - $nexttabskip = $gullet->readGlue; } - elsif ($t->equals(T_CS('\span'))) { # ex-span-ded next token. - $gullet->unread($gullet->readXToken(0)); } - elsif ($cc == CC_PARAM) { # Found the template's column slot - $before = 0; - push(@tokens, $t); } - elsif (($cc == CC_ALIGN) - || $t->equals(T_CS('\cr')) || $t->equals(T_CS('\crcr'))) { # End the column - if ($before) { # Leading & ? - $repeated = 1; - @nonreps = @cols; @cols = (); } # A & while we're before a column means Repeated columns - else { # Finished column spec; add it - ## How should we be handling tabskip? An attribute on the cell or spacing? - push(@cols, { - tabskip => $tabskip, - before => Tokens(beforeCellUnlist(Tokens(@pre))), - after => Tokens(afterCellUnlist(Tokens(@post))) }); - $tabskip = $nexttabskip; - @pre = @post = (); $before = 1; } - last unless $cc == CC_ALIGN; - push(@tokens, $t); } - elsif ($before) { # Other random tokens go into the column's pre-template - push(@pre, $t) if @pre || ($cc != CC_SPACE); - push(@tokens, $t); } - else { # Or the post-template - push(@post, $t) if @post || ($cc != CC_SPACE); - push(@tokens, $t); } } - # Now create & return the template object - my $template = LaTeXML::Core::Alignment::Template->new( - ($repeated - ? (columns => [@nonreps], repeated => [@cols]) - : (columns => [@cols])), - tokens => [@tokens]); - $whatsit->setProperty(template => $template); - return $template; } - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# And the general alignment processing. -# If the Template is appropriately constructed, either by \halign or various \begin{tabular} -# the body of the alignment is processed the same way. - -sub alignmentBindings { - my ($template, $mode, %properties) = @_; - $mode = LookupValue('MODE') unless $mode; - my $ismath = $mode =~ /math$/; - my $container = ($ismath ? 'ltx:XMArray' : 'ltx:tabular'); - my $rowtype = ($ismath ? 'ltx:XMRow' : 'ltx:tr'); - my $coltype = ($ismath ? 'ltx:XMCell' : 'ltx:td'); - my $alignment = LaTeXML::Core::Alignment->new( - template => $template, - openContainer => sub { $_[0]->openElement($container, @_[1 .. $#_]); }, - closeContainer => sub { $_[0]->closeElement($container); }, - openRow => sub { $_[0]->openElement($rowtype, @_[1 .. $#_]); }, - closeRow => sub { $_[0]->closeElement($rowtype); }, - openColumn => sub { $_[0]->openElement($coltype, @_[1 .. $#_]); }, - closeColumn => sub { $_[0]->closeElement($coltype); }, - isMath => $ismath, - properties => {%properties}); - AssignValue(Alignment => $alignment); - Debug("Halign $alignment: New " . $template->show) if $LaTeXML::DEBUG{halign}; - Let(T_MATH, ($ismath ? '\@dollar@in@mathmode' : '\@dollar@in@textmode')); - return; } - -DefMacroI('\@row@before', undef, undef); -DefMacroI('\@row@after', undef, undef); -DefMacroI('\@column@before', undef, undef); -DefMacroI('\@column@after', undef, undef); - -sub pRevert { - my ($arg) = @_; - local $LaTeXML::DUAL_BRANCH = 'presentation'; - return Revert($arg); } - -sub cRevert { - my ($arg) = @_; - local $LaTeXML::DUAL_BRANCH = 'content'; - return Revert($arg); } - -use constant T_close_alignment => T_CS('\@close@alignment'); - -sub digestAlignmentBody { - my ($stomach, $whatsit) = @_; - my $gullet = $stomach->getGullet; - local $LaTeXML::ALIGN_STATE = 0; - # Now read & digest the body. - # Note that the body MUST end with a \cr, and that we've made Special Arrangments - # with \alignment@cr to recognize the end of the \halign - my $alignment = LookupValue('Alignment'); - local $LaTeXML::READING_ALIGNMENT = $alignment; - if (!$alignment) { - Error('missing', 'alignment', $stomach, "There is no open alignment structure here"); - return; } - $whatsit->setProperty(alignment => $alignment); - $alignment->setBody($whatsit); - Debug("Halign $alignment: BODY Processing...") if $LaTeXML::DEBUG{halign}; - my $lastwascr = undef; - my @reversion = (); - my @creversion = (); - while (1) { - my ($cell, $next, $type, $hidden) = digestAlignmentColumn($stomach, $alignment, $lastwascr); - Debug("Halign $alignment: BODY got CELL" - . "[" . $alignment->currentRowNumber . "," . $alignment->currentColumnNumber . "]" - . ToString($cell) . " ended at " . Stringify($next)) if $LaTeXML::DEBUG{halign}; - if (!$cell) { - Debug("Halign $alignment: BODY DONE!") if $LaTeXML::DEBUG{halign}; - last; } - if ($cell) { - push(@reversion, trimColumnTemplate($alignment, pRevert($cell))); - push(@creversion, trimColumnTemplate($alignment, cRevert($cell))); } - extractAlignmentColumn($alignment, $cell); - $lastwascr = undef; - if (!$type && (!$next - || $next->defined_as(T_END) # End of alignment - || $next->defined_as(T_close_alignment))) { # End of alignment - $alignment->endRow(); - last; } - elsif ($type eq 'align') { - $alignment->endColumn(); - if (!$hidden) { - push(@reversion, $next); # and record the & - push(@creversion, $next); } } # and record the & - elsif ($type eq 'insert') { - $alignment->endColumn(); } - elsif (($type eq 'cr') || ($type eq 'crcr')) { - $alignment->endRow(); - if (!$hidden) { - push(@reversion, $next); - push(@creversion, $next); } - elsif ($type eq 'cr') { - my $arg = $stomach->digest($gullet->readArg()); - push(@reversion, pRevert($arg)); - push(@creversion, cRevert($arg)); } - elsif ($type eq 'crcr') { } - $lastwascr = 1; } # Note, in case next is \crcr - elsif ($next) { - Error('unexpected', $next, $stomach, "Column ended with " . Stringify($next)); } } - $alignment->endRow(); - $alignment->setReversion(Tokens(@reversion)); - $alignment->setContentReversion(Tokens(@creversion)); - Debug("Halign $alignment: BODY DONE!\n" - . "=> " . join(',', map { Stringify($_); } @reversion)) if $LaTeXML::DEBUG{halign}; - return; } - -use constant T_crcr => T_CS('\crcr'); -use constant T_hidden_crcr => T_CS('\hidden@crcr'); -use constant T_omit => T_CS('\omit'); -use constant T_noalign => T_CS('\noalign'); -use constant T_hidden_noalign => T_CS('\hidden@noalign'); - -# Read & digest an alignment column's data, -# accommodating the current template and any special cs's -# Returns the column's digested boxes, the ending token, and it's alignment type. -sub digestAlignmentColumn { - my ($stomach, $alignment, $lastwascr) = @_; - my $gullet = $stomach->getGullet; - my $ismath = $STATE->lookupValue('IN_MATH'); - local @LaTeXML::LIST = (); - # Scan for leading \omit, skipping over (& saving) \hline. - Debug("Halign $alignment: COLUMN starting scan " - . "(" . ($ismath ? " math" : " text") . ")") if $LaTeXML::DEBUG{halign}; - my $token; - my $spanning = 0; - while (1) { # Outer loop; collects 1 column (possibly multiple spans) return from within! - ## Scan till we get something NOT \omit, \noalign - while ($token = $gullet->readXToken(0)) { - if ($token->equals(T_SPACE) # Skip leading space. - || $token->equals(T_CS('\par')) # Skip or blank line(?) - || ($lastwascr && # Or \crcr following a \cr - ($token->defined_as(T_crcr) || $token->defined_as(T_hidden_crcr)))) { - } - elsif ($token->defined_as(T_omit)) { # \omit removes template for this column. - Debug("Halign $alignment: OMIT at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; - $alignment->startRow() unless $$alignment{in_row}; - $alignment->omitNextColumn; } - elsif ($token->defined_as(T_noalign)) { # \puts something in vertical list - Debug("Halign $alignment: noalign at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; - $alignment->endRow() if $$alignment{in_row}; - $alignment->startColumn(1); - $alignment->lastColumn; - my $r = $stomach->digest($gullet->readArg); - $alignment->endRow(); - return ($r, T_CS('\cr'), 'cr'), undef; } # Pretend this is a whole row??? - elsif ($token->defined_as(T_hidden_noalign)) { # \puts something in vertical list - Debug("Halign $alignment: COLUMN invisible noalign") if $LaTeXML::DEBUG{halign}; - push(@LaTeXML::LIST, $stomach->invokeToken($token)); } - else { - last; } } - Debug("Halign $alignment: COLUMN end scan at " . Stringify($token)) if $LaTeXML::DEBUG{halign}; - if (!$token || $token->defined_as(T_END) || $token->defined_as(T_close_alignment)) { - return (undef, $token, undef, undef); } - # Next column, unless spanning (then combine columns) - if ($spanning) { - $spanning = 0; - $alignment->nextColumn; } - else { - $alignment->startColumn(); } - # Push before template, Marker and put the token back - Debug("Halign $alignment: COLUMN preload at " - . Stringify(Tokens($alignment->getColumnBefore, T_MARKER('before-column'), $token))) - if $LaTeXML::DEBUG{halign}; - $gullet->unread($alignment->getColumnBefore, T_MARKER('before-column'), $token); - while ($token = $gullet->readXToken(0)) { - my ($atoken, $type, $hidden) = $gullet->isColumnEnd($token); - if ($atoken) { - if ($type eq 'span') { # next column, but continue accumulating - Debug("Halign $alignment: COLUMN span") if $LaTeXML::DEBUG{halign}; - $spanning = 1; - last; } - else { - Debug("Halign $alignment: COLUMN ended with " . Stringify($token) . "\n" - . " => " . ToString(List(@LaTeXML::LIST))) if $LaTeXML::DEBUG{halign}; - return (List(@LaTeXML::LIST, mode => ($ismath ? 'math' : 'text')), - $token, $type, $hidden); } } - elsif ($token->defined_as(T_hidden_noalign)) { # \puts something in vertical list - Debug("Halign $alignment: COLUMN invisible noalign") if $LaTeXML::DEBUG{halign}; - push(@LaTeXML::LIST, $stomach->invokeToken($token)); } - else { # Else, we're getting some actual content for the column - Debug("Halign $alignment: COLUMN invoking " . Stringify($token)) if $LaTeXML::DEBUG{halign}; - push(@LaTeXML::LIST, $stomach->invokeToken($token)); - Debug("Halign $alignment: COLUMN " . Stringify($token) . " ==> " . Stringify(List(@LaTeXML::LIST))) - if $LaTeXML::DEBUG{halign}; - } } } - return; } - -# This attempts to trim off the column template parts from contents of the full column, -# leaving only the author supplied part for a sensible reversion. -# It's not nearly clever enough, given that macros can be in the template, -# but works surprisingly well so far. -# A better alternative might be based on sneaking some Marker tokens/boxes through -# but they would likely interfere with the macros tehmselves. -sub trimColumnTemplate { - my ($alignment, @tokens) = @_; - return Tokens(@tokens) if $alignment->currentRow->{pseudorow}; - my @pre = $alignment->getColumnBefore->unlist; - my @post = $alignment->getColumnAfter->unlist; - Debug("Halign $alignment: COLUMN Compare:\n" - . " Column: " . ToString(Tokens(@tokens)) . "\n" - . " Before: " . ToString(Tokens(@pre)) . "\n" - . " After : " . ToString(Tokens(@post)) . "\n") if $LaTeXML::DEBUG{halign}; - while (scalar(@pre) && scalar(@tokens)) { - my $t = shift(@pre); - if ($t->equals($tokens[0])) { - shift(@tokens); } } - while (scalar(@post) && scalar(@tokens)) { - my $t = pop(@post); - if ($t->equals($tokens[-1])) { - pop(@tokens); } } - Debug(" Trimmed: " . ToString(Tokens(@tokens))) if $LaTeXML::DEBUG{halign}; - return Tokens(@tokens); } - -# Given the boxes for an alignment cell, -# extract & remove the various fills and rules from the ends to annotate the cell structure -sub extractAlignmentColumn { - my ($alignment, $boxes) = @_; - return () unless $alignment; # ?? - # Note: $n0,$n1 is a VERY round-about way of tracking the column spanning! - my $ismath = $STATE->lookupValue('IN_MATH'); - my $n0 = (LookupValue('alignmentStartColumn') || 0) + 1; - my $n1 = $alignment->currentColumnNumber; - my $colspec = $alignment->getColumn($n0); - my $align = $$colspec{align} || 'left'; - my $border = ''; - # Peel off any boxes from both sides until we get the "meat" of the column. - # from this we can establish borders, alignment and emptiness. - # But we, of course, immediately put them back... - my @boxes = $boxes->unlist; - my @saveleft = (); - my @saveright = (); - my (@lspaces, @rspaces); - if (my $skip = $$colspec{tabskip}) { - push(@lspaces, Digest(Tokens(T_CS('\hskip'), $skip->revert, T_CS('\relax')))); } - while (@boxes) { - if (ref $boxes[0] eq 'LaTeXML::Core::List') { - unshift(@boxes, shift(@boxes)->unlist); } - elsif ($boxes[0]->getProperty('isFill')) { - $align = 'right'; - shift(@boxes); - last; } - elsif ($boxes[0]->getProperty('isVerticalRule')) { - $border .= 'l'; - if (my $prev = $alignment->getColumn($n0 - 1)) { # space before | ? move to previous column - $$prev{rspaces} = List(($$prev{rspaces} || ()), @lspaces) if @lspaces; } - @lspaces = (); # then discard - shift(@boxes); } - elsif ($boxes[0]->getProperty('isSpace')) { - push(@lspaces, shift(@boxes)); } - elsif ($boxes[0]->getProperty('isHorizontalRule') - || $boxes[0]->getProperty('alignmentSkippable') - || (ref $boxes[0] eq 'LaTeXML::Core::Comment')) { - push(@saveleft, shift(@boxes)); } - else { - last; } } - while (@boxes) { - if (ref $boxes[-1] eq 'LaTeXML::Core::List') { - push(@boxes, pop(@boxes)->unlist); } - elsif ($boxes[-1]->getProperty('isFill')) { - if ($align eq 'right') { $align = 'center'; } - pop(@boxes); - last; } - elsif ($boxes[-1]->getProperty('isVerticalRule')) { - $border .= 'r'; - @rspaces = (); # discard spacing after rule!!! (should save for next column?) - pop(@boxes); } - elsif ($boxes[-1]->getProperty('isSpace')) { - unshift(@rspaces, pop(@boxes)); } - elsif ($boxes[-1]->getProperty('isHorizontalRule') - || $boxes[-1]->getProperty('alignmentSkippable') - || (ref $boxes[-1] eq 'LaTeXML::Core::Comment')) { - unshift(@saveright, pop(@boxes)); } - else { - last; } } - delete $$colspec{width} unless $align eq 'justify'; - # Replacing boxes with the fil padding & vertical rules stripped off - @boxes = (@saveleft, @boxes, @saveright); - $boxes = List(@boxes, mode => ($boxes->isMath ? 'math' : 'text')); - # record relevant info in the Alignment. - $$colspec{align} = $align; - $$colspec{border} = $border = ($$colspec{border} || '') . $border; - $$colspec{boxes} = $boxes; - $$colspec{lspaces} = List(@lspaces) if @lspaces; - $$colspec{rspaces} = List(@rspaces) if @rspaces; - $$colspec{colspan} = $n1 - $n0 + 1; - - if ($$alignment{in_tabular_head} || $$alignment{in_tabular_foot}) { - $$colspec{thead}{column} = 1; } - for (my $i = $n0 + 1 ; $i <= $n1 ; $i++) { - my $c = $alignment->getColumn($i); - $$c{skipped} = 1 if $c; } - Debug("Halign $alignment: INSTALL column " . join(',', map { $_ . "=" . ToString($$colspec{$_}); } sort keys %$colspec)) if $LaTeXML::DEBUG{halign}; - return $boxes; } - -#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Cleanup the pre & post tokens for halign columns in math mode. -# If a pair of $..$ enclose stuff that is "OK" in math mode, we don't need the $. -# Note that the 1st $ is switching OUT of math mode! -sub stripDupMath { - my (@tokens) = @_; - my @poss = grep { $tokens[$_]->defined_as(T_MATH) } 0 .. $#tokens; - shift(@poss) if scalar(@poss) % 2; # Get pairs! - while (@poss) { - my ($p2, $p1) = (pop(@poss), pop(@poss)); - splice(@tokens, $p1, 2) if $p2 == $p1 + 1; } - return @tokens; } - -# "Initialized" alignment; presets spacing, but since we're ignoring it anyway... -Let('\ialign', '\halign'); - -# Overlapping alignments ??? -DefMacro('\oalign{}', - '\@@oalign{\@start@alignment#1\@finish@alignment}'); -DefConstructor('\@@oalign{}', - '#1', - reversion => '\oalign{#1}', bounded => 1, mode => 'text', - beforeDigest => sub { alignmentBindings('l'); }); - -# This is actually different; the lines should lie ontop of each other. -# How should this be represented? -DefMacro('\ooalign{}', - '\@@ooalign{\@start@alignment#1\@finish@alignment}'); -DefConstructor('\@@ooalign{}', - '#1', - reversion => '\ooalign{#1}', bounded => 1, mode => 'text', - beforeDigest => sub { alignmentBindings('l'); }); - -#---------------------------------------------------------------------- -# These determine whether the _next_ paragraph gets indented! -# thus it needs \par to check whether such indentation has been set. -DefConstructorI('\indent', undef, sub { - my ($document) = @_; - my $node = $document->getElement; - if (!$node) { } - elsif ($document->getNodeQName($node) eq 'ltx:para') { - $node->setAttribute(class => "ltx_indent"); } - elsif ($document->canContainSomehow($node, "ltx:para")) { - # Used in a position where a paragraph can be started, start - # However, perversely ignore indent on 1st para after sectioning titles - my $prev = $node->lastChild; - my $noindent = $prev && ($document->getNodeQName($prev) =~ /^ltx:(?:toc)?title$/); - $document->openElement("ltx:para", ($noindent ? () : (class => "ltx_indent"))); } - # Otherwise ignore. - return; }); -DefConstructorI('\noindent', undef, sub { - my ($document) = @_; - my $node = $document->getElement; - if (!$node) { } - elsif ($document->getNodeQName($node) eq 'ltx:para') { - $node->setAttribute(class => "ltx_noindent"); } - elsif ($document->canContainSomehow($node, "ltx:para")) { - # Used in a position where a paragraph can be started, start - $document->openElement("ltx:para", class => "ltx_noindent"); } - # Otherwise ignore. - return; }); - -# represents a Logical Paragraph, whereas is a `physical paragraph'. -# A para can contain both p and displayed equations and such. - -# Remember; \par _closes_, not opens, paragraphs! -# Here, we want to close both an open p and para (if either are open). -# NOTE Also that the whole inPreamble bit is, I think, overused. -# For example, \par should be a NOOP in vertical mode, and that would generally make it -# ignored in the preamble. -DefConstructorI('\normal@par', undef, sub { - my ($document, %props) = @_; - if ($props{inPreamble}) { } - else { - $document->maybeCloseElement('ltx:p'); - my $node = $document->getElement; - my $qname = ($node && $document->getNodeQName($node)) || ''; - if ($qname eq 'ltx:para' && !$node->getAttribute("class")) { # Only set on the para about to close, if unknown! - if (my $c = $props{class}) { - $document->setAttribute($node, class => $c); } } - $document->maybeCloseElement('ltx:para'); } }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - if (LookupValue('inPreamble')) { - $whatsit->setProperty(inPreamble => 1); } - else { - # Check if flags were set by prior \par: - if (my $c = LookupValue("next_para_class")) { - $whatsit->setProperty(class => $c); - AssignValue(next_para_class => undef); } - # Fish out flags for next ltx:para, to be used when the next \par closes: - if (!LookupRegister('\parindent')->valueOf) { - # respect \parindent if no overrides are given - AssignValue(next_para_class => "ltx_noindent"); } - # Vertical adjustments - if (my $vadj = LookupValue('vAdjust')) { - AssignValue(vAdjust => [], 'global'); - Digest(Tokens(@$vadj)); } - else { - return; } } }, - properties => { alignmentSkippable => 1 }, - alias => '\par'); - -Let('\par', '\normal@par'); -DefMacro('\inner@par OptionalMatch:* [Glue]', '\normal@par'); # Obsolete, but in case still used... - -Tag('ltx:para', autoClose => 1, autoOpen => 1, afterClose => \&pruneEmpty); - -sub pruneEmpty { - my ($document, $node) = @_; - # In some cases we could have e.g. a \noindent followed by a {table}, - # in which case we end up with an empty ltx:para which we can prune. - if (!scalar(element_nodes($node))) { - my $prev = element_prev($node); - if (!$prev || ($document->getNodeQName($prev) ne 'ltx:para')) { # If $node WAS the 1st child - $document->addClass($node->parentNode, 'ltx_pruned_first'); } - $node->unlinkNode; } - return; } - -sub trimNodeWhitespace { - my ($document, $node) = @_; - trimNodeLeftWhitespace($document, $node); - trimNodeRightWhitespace($document, $node); - return; } - -sub trimNodeLeftWhitespace { - my ($document, $node) = @_; - if (my (@children) = $node->childNodes) { - my $child = $children[0]; - my $type = $child->nodeType; - if ($type == XML_TEXT_NODE) { - my $string = $child->data; - # if($string =~ s/^\s+//){ - # with some trepidation, I don't think we want to trim nbsp! - if ($string =~ s/^ +//) { - $child->setData($string); } } - elsif ($type == XML_ELEMENT_NODE) { - trimNodeLeftWhitespace($document, $child); } } - return; } - -sub trimNodeRightWhitespace { - my ($document, $node) = @_; - if (my (@children) = $node->childNodes) { - my $child = $children[-1]; - my $type = $child->nodeType; - if ($type == XML_TEXT_NODE) { - my $string = $child->data; - if ($string =~ s/\s+$//) { - $child->setData($string); } } - elsif ($type == XML_ELEMENT_NODE) { - trimNodeRightWhitespace($document, $child); } } - return; } - -Tag('ltx:p', autoClose => 1, autoOpen => 1, afterClose => \&trimNodeWhitespace); - -# \dump ??? - -DefPrimitiveI('\end', undef, sub { $_[0]->getGullet->flush; return; }); - -#====================================================================== -# Horizontal Mode primitives in Ch.25, pp.285--287 - -# The following cause tex to start a new paragraph -- they switch to horizontal mode. -# = | | \char | -# | \noboundary | \unhbox | \unhcopy | \valign | \vrule -# | \hskip | \hfil | \hfill | \hss | \hfilneg -# | \accent | \discretionary | \- | \ | $ - -# a candidate for use by \hskip, \hspace, etc... ? -sub DimensionToSpaces { - my ($dimen) = @_; - my $fs = LookupValue('font')->getSize; # 1 em - my $pt = $dimen->ptValue; - my $ems = $pt / $fs; - if ($ems < 0.01) { return; } - elsif ($ems < 0.17) { return "\x{2006}"; } # 6/em - elsif ($ems < 0.30) { return "\x{2005}"; } # 4/em - elsif ($ems < 0.40) { return "\x{2004}"; } # 3/em (same as nbsp?) - else { - my $n = int(($ems + 0.3) / 0.333); # 10pts per space...? - return (UTF(0xA0) x $n); } } - -DefPrimitiveI('\noboundary', undef, undef); - -# \hskip handled similarly to \kern -# \hskip can be ignored in certain situations... -DefConstructor('\hskip Glue', sub { - my ($document, $length, %props) = @_; - my $parent = $document->getNode; - # Debug("HSKIP ".ToString($length)." at ".$document->getNodeQName($parent)); - if ($document->getNodeQName($parent) eq 'svg:g') { - if (my $x = $length->pxValue) { - # HACK HACK HACK - my $transform = $parent->getAttribute('transform'); - $parent->setAttribute(transform => ($transform ? $transform . ' ' : '') . "translate($x,0)"); - } } - elsif (inSVG()) { - Warn('unexpected', 'kern', $_[0], "Lost hskip in SVG " . ToString($length)); } - - else { - # $document->openText(DimensionToSpaces($length), $props{font}); } }, - $document->absorb(DimensionToSpaces($length)); } }, - properties => sub { - my ($stomach, $length) = @_; - (width => $length, isSpace => 1); }); - -DefPrimitive('\mskip MuGlue', sub { - my ($stomach, $length) = @_; - my $s = DimensionToSpaces($length); - Box($s, undef, undef, Invocation(T_CS('\mskip'), $length), - width => $length, isSpace => 1); }); -DefPrimitive('\mkern MuGlue', sub { - my ($stomach, $length) = @_; - my $s = DimensionToSpaces($length); - Box($s, undef, undef, Invocation(T_CS('\mkern'), $length), - width => $length, isSpace => 1); }); - -DefPrimitiveI('\hss', undef, undef); -DefPrimitiveI('\hfilneg', undef, undef); - -DefPrimitiveI('\hfil', undef, sub { - Box(' ', undef, undef, T_CS('\hfil'), isSpace => 1, isFill => 1); }); -DefPrimitiveI('\hfill', undef, sub { - Box(' ', undef, undef, T_CS('\hfill'), isSpace => 1, isFill => 1); }); - -# \lower -# \raise -# But apparently must really explicitly be an \hbox, \vbox or \vtop (?) -# OR something that expands into one!! -sub raisedSizer { - my ($box, $y) = @_; - my ($w, $h, $d) = $box->getSize; - my $z = Dimension(0); - $h = $h->add($y)->larger($z); - $d = $d->subtract($y)->larger($z); - return ($w, $h, $d); } - -DefConstructor('\lower Dimension MoveableBox', - "?&inSVG()(#2)" - . "(#2)", - sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)->negate); }, - afterDigest => sub { - my $y = $_[1]->getArg(1)->multiply(-1); - my $ypx = $y->pxValue; - my $transform = ($ypx ? "translate(0,$ypx)" : undef); - $_[1]->setProperties(y => $y, transform => $transform); }); - -DefConstructor('\raise Dimension MoveableBox', - "?&inSVG()(#2)" - . "(#2)", - sizer => sub { raisedSizer($_[0]->getArg(2), $_[0]->getArg(1)); }, - afterDigest => sub { - my $y = $_[1]->getArg(1); - my $ypx = $y->pxValue; - my $transform = ($ypx ? "translate(0,$ypx)" : undef); - $_[1]->setProperties(y => $y, transform => $transform); }); - -# \unhbox<8bit>, \unhcopy<8bit> -DefPrimitive('\unhbox Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - AssignValue($box, undef); - (defined $stuff ? $stuff->unlist : List()); }); - -DefPrimitive('\unhcopy Number', sub { - my $box = 'box' . $_[1]->valueOf; - my $stuff = LookupValue($box); - adjustBoxColor($stuff); - (defined $stuff ? $stuff->unlist : List()); }); - -# Implement ??? -# DefMacro('\vrule','\relax'); -DefMacro('\valign', ''); - -DefMacro('\vspace{}', '\vskip#1\relax'); -# \indent, \noindent, \par; see above. - -DefMacro('\discretionary{}{}{}', '#3'); # No hyphenation here! -DefPrimitiveI('\-', undef, undef); -DefPrimitive('\setlanguage Number', undef); - -#====================================================================== -# Math mode stuff -# See TeXBook Ch.26 -#====================================================================== -# Decide whether we're going into or out of math, inline or display. -Tag('ltx:XMText', autoOpen => 1, autoClose => 1); -# This really should be T_MATH -# and it should (or not) check for a second $ only if not in restricted horizontal mode! -# (and then all the \@dollar@in@(text|math|normal)mode defns would not be needed. -DefPrimitiveI('\@dollar@in@normalmode', undef, sub { - my ($stomach) = @_; - my $gullet = $stomach->getGullet; - my $mode = LookupValue('MODE'); - my $op = '\@@BEGININLINEMATH'; - if ($mode eq 'display_math') { - if ($gullet->ifNext(T_MATH)) { - $gullet->readToken; - $op = '\@@ENDDISPLAYMATH'; } - else { - # Avoid a Fatal, but we're likely in trouble. - # Should we switch to text mode? (LaTeX normally wouldn't) - # Did we miss something and would should have already been in text mode? Possibly... - # OR, were we in a lenient package that allowed inline math mixed in with display? - Error('expected', '$', $stomach, - "Missing \$ closing display math.", - "Ignoring; expect to be in wrong math/text mode."); - $op = undef; } } - elsif ($mode eq 'inline_math') { - $op = '\@@ENDINLINEMATH'; } - # elsif(!LookupValue('Alignment') && $gullet->ifNext(T_MATH)){ - elsif ($gullet->ifNext(T_MATH)) { - $gullet->readToken; - $op = '\@@BEGINDISPLAYMATH'; } - $stomach->invokeToken(T_CS($op)) if $op; }); -# Let this be the default, conventional $ -Let(T_MATH, T_CS('\@dollar@in@normalmode')); - -# Effectively these are the math hooks, redefine these to do what you want with math? -DefConstructorI('\@@BEGINDISPLAYMATH', undef, - "" - . "" - . "" - . "#body" - . "" - . "" - . "", - reversion => Tokens(T_MATH, T_MATH), - beforeDigest => sub { - $_[0]->beginMode('display_math'); - if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) { - $_[0]->getGullet->unread(@everymath_toks); } - if (my @everydisplay_toks = $STATE->lookupDefinition(T_CS('\everydisplay'))->valueOf->unlist()) { - $_[0]->getGullet->unread(@everydisplay_toks); } - return; }, captureBody => 1); -DefConstructorI('\@@ENDDISPLAYMATH', undef, "", - reversion => Tokens(T_MATH, T_MATH), - beforeDigest => sub { $_[0]->endMode('display_math'); }); - -DefConstructorI('\@@BEGININLINEMATH', undef, - "" - . "" - . "#body" - . "" - . "", - reversion => Tokens(T_MATH), - beforeDigest => sub { - $_[0]->beginMode('inline_math'); - if (my @everymath_toks = $STATE->lookupDefinition(T_CS('\everymath'))->valueOf->unlist()) { - $_[0]->getGullet->unread(@everymath_toks); } - return; }, captureBody => 1); -DefConstructorI('\@@ENDINLINEMATH', undef, "", - reversion => Tokens(T_MATH), - beforeDigest => sub { $_[0]->endMode('inline_math'); }); - -# Add the TeX code from the object that created this node, -# unless it has already been recorded on another node. -sub add_TeX { - my ($document, $node, $thing) = @_; - if ($thing) { - local $LaTeXML::DUAL_BRANCH = 'presentation'; - my $tex = UnTeX($thing); - $LaTeXML::DUAL_BRANCH = 'content'; - my $ctex = UnTeX($thing); - $document->setAttribute($node, tex => $tex); - $document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex; } - return; } - -# Same as add_TeX, but add the code from the body of the object. -sub add_body_TeX { - my ($document, $node, $thing) = @_; - if ($thing) { - if (defined(my $body = $thing->getProperty('body'))) { - local $LaTeXML::DUAL_BRANCH = 'presentation'; - my $tex = UnTeX($body); - $LaTeXML::DUAL_BRANCH = 'content'; - my $ctex = UnTeX($body); - $document->setAttribute($node, tex => $tex); - $document->setAttribute($node, 'content-tex' => $ctex) if $ctex ne $tex; } } - return; } - -Tag('ltx:Math', afterClose => \&add_body_TeX); -Tag('ltx:Math', afterClose => \&cleanup_Math); - -# Cleanup ltx:Math elements; particularly if they aren't "really" math. -# But record the oddity with class=ltx_markedasmath -sub cleanup_Math { - my ($document, $mathnode) = @_; - # If the Math ONLY contains XMath/XMText, it apparently isn't math at all!?! - # Single token PUNCTs can also be taken out of math. - if (!$document->findnodes('ltx:XMath/ltx:*' - . '[local-name() != "XMText" and local-name() != "XMHint" ' - . 'and not(' - . 'local-name() = "XMTok" and (@role="PUNCT" or @role="PERIOD") ' - . 'and not(preceding-sibling::*) and not(following-sibling::*) )]', $mathnode)) { - # So unwrap down to the contents of the XMText's. - my @texts = (); - foreach my $xmnode (map { $_->childNodes } $mathnode->childNodes) { - if ($document->getNodeQName($xmnode) eq 'ltx:XMHint') { - if (my $width = $xmnode->getAttribute('width')) { - if (my $space = DimensionToSpaces(Glue($width))) { - push(@texts, $space); } } } - else { # is XMText - foreach my $child ($xmnode->childNodes) { - my $t = $child->nodeType; - if ($t == XML_COMMENT_NODE) { } - elsif ($t != XML_ELEMENT_NODE) { # Make sure we've got an element - push(@texts, ['ltx:text', { class => 'ltx_markedasmath' }, $child]); } - else { - $document->addClass($child, 'ltx_markedasmath'); - push(@texts, $child); } } } } - # and replace the whole Math with the pieces - $document->replaceTree([undef, undef, @texts], $mathnode); } - else { # Cleanup any remaining XMTexts - cleanup_XMText_outer($document, $mathnode); } - return; } - -# Here's for an inverse case: when an XMText isn't "really" just text -# if it only contains an Math ORR, a tabular with only Math in the cells? -# First case: pull it back into the math, but in an XMWrap to isolate it for parsing. -# Should we just pull any mixed text math up or only a single Math? -# For the tabular case, convert it to an XMArray. - -# Note that normally, we'd do afterClose on ltx:XMText, -# but since the ltx:XMText closes before the outer ltx:Math, -# we would keep cleanup_Math from recognizing the trivial case of -# a single ltx:tabular in an equation (perverse, but people do that). -# So, we put this one on ltx:Math also, and scan for any contained XMText to fixup. - -sub cleanup_XMText_outer { - my ($document, $mathnode) = @_; - foreach my $textnode ($document->findnodes('descendant::ltx:XMText', $mathnode)) { - cleanup_XMText($document, $textnode); } - return; } - -sub cleanup_XMText { - my ($document, $textnode) = @_; - # We're really only interested in reducing nested math, right? - # But actually also collapsing ltx:XMText/ltx:text - # Apply "outer" simplifications: remove ltx:text or ltx:p wrappings. - my $model = $document->getModel; - # A single "simple" element, with a single child - my %simple_element = ('ltx:text' => 1, 'ltx:p' => 1, 'ltx:inline-block' => 1); - my @preserved = (qw(yoffset xoffset)); - my @children; - while ((@children = $textnode->childNodes) && (scalar(@children) == 1) - && $document->findnodes('ltx:text' - . ' | ltx:inline-block[count(*)=1]' - . ' | ltx:p', - $textnode)) { - my $child = $children[0]; - $document->setNodeFont($textnode, $document->getNodeFont($child)); - foreach my $attr ($child->attributes) { # Copy the child's attributes (should Merge!!) - $textnode->setAttribute($attr->nodeName => $attr->getValue) unless $attr->nodeName eq 'xml:id'; } - $document->unwrapNodes($child); } - - # Now apply a simplifying rule for nested Math - # If the XMText contains a single Math, pull it's content up in - if ((scalar(@children) == 1) && $document->findnodes('ltx:Math', $textnode)) { - # Replace XMText by XMWrap/* (this should preserve the parse?) - $textnode = $document->renameNode($textnode, 'ltx:XMWrap'); - $document->replaceNode($children[0], map { $_->childNodes } $children[0]->childNodes); } - # # # RISKY!!!! If SOME nodes are math... - # # # pull the whole sequence up, unwrap the math and putting the rest back in XMText. - # # # Even with the XMWrap, this seems to wreak havoc on parsing and structure? - # # if($document->findnodes('ltx:Math',$textnode)){ - # # # Replace XMText by XMWrap/* (this should preserve the parse?) - # # $textnode=$document->renameNode($textnode,'ltx:XMWrap'); - # # foreach my $child (@children){ - # # if($model->getNodeQName($child) eq 'ltx:Math'){ - # # $document->replaceNode($child,map($_->childNodes,$child->childNodes)); } - # # else { - # # $document->wrapNodes('ltx:XMText',$child); }}} - # If a single tabular that ONLY(?) contains Math, turn into an XMArray - # Well, a tabular REALLY shouldn't be in math; - # How much math should determine the switch? - # [will alignment attributes be lost?] - elsif ((scalar(@children) == 1) && ($model->getNodeQName($children[0]) eq 'ltx:tabular') -## Should we ALWAYS do this, or just for some minimal amount of math??? -## && !$document->findnodes('ltx:tabular/ltx:tr/ltx:td/text()' -## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td/text()' -## .' | ltx:tabular/ltx:tr/ltx:td[not(ltx:Math)]' -## .' | ltx:tabular/ltx:tbody/ltx:tr/ltx:td[not(ltx:Math)]', -## $textnode) - ) { - # First step is remove any ltx:tbody from the tabular! - foreach my $tb ($document->findnodes('ltx:tabular/ltx:tbody', $textnode)) { - $document->unwrapNodes($tb); } - # Now, we can start replacing tabular=>XMArray, tr=>XMRow, td=>XMCell - my $table = $document->renameNode($children[0], 'ltx:XMArray'); - foreach my $row ($table->childNodes) { - $row = $document->renameNode($row, 'ltx:XMRow'); - foreach my $cell ($row->childNodes) { - $cell = $document->renameNode($cell, 'ltx:XMCell'); - foreach my $m ($cell->childNodes) { - if ($model->getNodeQName($m) eq 'ltx:Math') { # Math cell, unwrap the Math/XMath layer - $document->replaceNode($m, map { $_->childNodes } $m->childNodes); } - else { # Otherwise, wrap whatever it is in an XMText - $document->wrapNodes('ltx:XMText', $m); } - } } } - # And now we don't need the XMText any more. - foreach my $attr ($textnode->attributes) { # Copy the child's attributes (should Merge!!) - $table->setAttribute($attr->nodeName => $attr->getValue); } - my $newtable = $document->unwrapNodes($textnode); - if (my $id = $textnode->getAttribute('xml:id')) { - $document->unRecordID($id); - $document->recordID($id, $newtable); } } - return; } - -#********************************************************************** -# Support for MathFork. -#********************************************************************** -# [Note: this block of code seems like it belongs somewhere else] -# A MathFork supports document-level alignment of math, -# by collecting equations into an equationgroup. Each equation can contain -# one or more MathFork structures which separate the semantically meaningful -# equation (if possible) from the collection of rows and/or column fragments -# for alignment. The goal is to be able to present the aligned structure -# composed of various mathematical fragments in a grid, and yet still represent -# the (presumably) meaningful complete formula. -# -# The structure looks like -# -# ... -# .. -# -# The initial, "main", Math will contain a complete formula (hopefully). -# The MathBranch will typically contain one or more , each of which -# contains one or more , each of which contains a representing -# a cell of the aligned structure. - -#====================================================================== -# openMathFork($document,$equation) will add a MathFork structure -# to the given $equation, and return ($mainfork, $branch) -# where $mainfork is the initial and $branch is the . -# You'll probably want to be adding Stuff to one or both of $mainfork & $branch. -# Most typically, you'll be finding math fragments that you've found in the -# current content of $equation and adding them into both $mainfork & $branch -# using addColumnToMathFork. -sub openMathFork { - my ($document, $equation) = @_; - my $fork = $document->openElementAt($equation, 'ltx:MathFork'); - my $mainfork = $document->openElementAt($fork, 'ltx:Math', _box => MathWhatsit()); # Start EMPTY! - my $xmath = $document->openElementAt($mainfork, 'ltx:XMath'); - my $branch = $document->openElementAt($fork, 'ltx:MathBranch'); - return ($mainfork, $branch); } - -# Close the appropriate elements of an ltx:MathFork created with openMathFork. -sub closeMathFork { - my ($document, $equation, $mainfork, $branch) = @_; - # Now, close them all. - $document->closeElementAt($branch); - $document->closeElementAt($document->getFirstChildElement($mainfork)); - $document->closeElementAt($mainfork); - # $document->closeElementAt($mainfork->parentNode); } - # More defensive? Sometimes we end up with a DocumentFragment as parent of $mainfork????!?!?!?! - my @mfs = $document->findnodes('ltx:MathFork', $equation); - $document->closeElementAt($mfs[-1]); - my $fork = $branch->parentNode; - my @branches = $fork->childNodes; - if (scalar(@branches) == 1) { # Whoops, came up empty! - $fork->unbindNode; } - return; } - -# Create an inline math Whatsit from a list of math Boxes, Lists or Whatsits. -# Note that we unwrap @hidden@bgroup's (!) and normalize \displaystyle (!) -# This is primarily useful for synthesizing the Box for a newly created ltx:Math -# that is synthesized from other math content within a ltx:MathFork. -sub MathWhatsit { - my (@items) = @_; - my $hbgd = LookupDefinition(T_CS('\@hidden@bgroup')); - @items = map { ((ref $_ eq 'LaTeXML::Core::Whatsit') && ($_->getDefinition eq $hbgd) - ? $_->getBody->unlist : ($_)) } - map { $_->unlist } grep { $_ } @items; - my $locator = undef; - foreach my $i (@items) { - last if $locator; - $locator = $i->getLocator; } - my @styles = grep { UnTeX($_) eq '\displaystyle' } @items; - if (@styles) { - @items = ($styles[0], grep { UnTeX($_) ne '\displaystyle' } @items); } - return LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@BEGININLINEMATH')), [], - body => List(@items, mode => 'math'), - trailer => T_CS('\@@ENDINLINEMATH'), - locator => $locator, isMath => 1); } - -#====================================================================== -# Add a new table column (ltx:td) into the ltx:MathBranch of a ltx:MathFork. -# The insertion point will be at $inbranch, presumably an ltx:tr -# [created in the ltx:MathBranch using: $document->openElementAt($branch,'ltx:tr'); ] -# The content of $cell (an ltx:_Capture_) is typically a single ltx:Math. -# (but occasionally mixed math & ltx:text; some cases may need more semantic analysis?) -# The content of $cell will be MOVED into the new column (ltx:td), (w/ ID's intact) -# and CLONED (w/modified ID's) onto the end of the first child of the main branch, $mainfork, -# of the ltx:MathFork [A Math Whatsit is also synthesized for the main branch, for TeX, etc!). -# Thus, the collection of rows/columns fragments represents the alignment, -# while the main branch synthesizes the (presumed) semantic whole. -# The now-empty $cell is then removed from its parent & the document. -sub addColumnToMathFork { - my ($document, $mainfork, $inbranch, $cell) = @_; - my $td = $document->openElementAt($inbranch, 'ltx:td'); - if (my $align = $cell->getAttribute('align')) { - $document->setAttribute($td, align => $align); } - if (my $colspan = $cell->getAttribute('colspan')) { - $document->setAttribute($td, colspan => $colspan); } - # Remove the _Capture_ from the document; parts will get cloned &/or reinserted - $cell->unbindNode; - # Usually, we will have captured a single ltx:Math node, but occasionally text? - # But in perverse cases, might have NOTHING! - foreach my $node ($cell->childNodes) { - # Add a Clone of the cell's contents to the main branch (This will get modified id's) - local $LaTeXML::Core::Document::ID_SUFFIX = '.mf'; - # Usually, an ltx:Math element will be the complete content of the _Capture_ (cell) - my $type = $document->getNodeQName($node); - my $box; - if ($type eq 'ltx:Math') { - if (my $xmath = $document->getFirstChildElement($node)) { - # But we CLONE the contents of it's ltx:XMath onto the end of - # the mainfork ltx:Math/ltx:XMath (under $mainfork), modifying id's along the way. - $document->appendClone($document->getFirstChildElement($mainfork), - $document->getChildElements($xmath)); - # Add the boxes from this cell to the previously collected ones in the main branch. - $box = $document->getNodeBox($node)->getBody; } } - # The next two cases are unusual (slightly unexpected?), - # typically coming from abused eqnarrays? May need more analysis preceding the MathFork'ing! - elsif (($type eq 'ltx:text') || ($type eq 'ltx:p')) { - next if $node->textContent eq ''; - my $txt = $document->openElementAt($document->getFirstChildElement($mainfork), 'ltx:XMText'); - $document->appendClone($txt, $node); - $document->closeElementAt($txt); - $box = $document->getNodeBox($node); } - elsif ($type eq '#PCDATA') { - my $string = $node->textContent; - # next if $string eq ''; - next if $string =~ /^\s*$/; - my $txt = $document->openElementAt($document->getFirstChildElement($mainfork), 'ltx:XMText'); - $txt->appendText($string); - $document->closeElementAt($txt); - $box = Box($string); } - elsif ($type eq '#Comment') { } - else { - Warn('unexpected', $type, $cell, - "Don't know how to synthesize equation with $type in column"); } - # Add the boxes from this cell to the previously collected ones in the main branch. - if ($box) { - my $composed = MathWhatsit($document->getNodeBox($mainfork)->getBody, $box); - $document->setNodeBox($mainfork, $composed); - $document->setNodeBox($mainfork->firstChild, $composed); } # And also to the XMath element! - # Finally MOVE (really copy) the node from the _Capture_ ($cell) to the td (in the fork) - # this keeps the same IDs as original; (& appendTree may remove id's from $node!) - $document->unRecordNodeIDs($node); - $document->appendTree($td, $node); } - # We can now remove the _Capture_ (and anything still in it?) - # $cell->unbindNode; - $document->closeElementAt($td); - return; } - -#====================================================================== -# Higher level support for equationgroups -# equationgroups hold a collection of equations -# each of which will likely have MathFork within that separates -# the complete semantic expression from a collection of rows & column cells. -# The latter are used to present an aligned set of equations; -# the former hopefully will be useful for the math....? -# -# Typically, there will be some sort of alignment macros, using & -# that will be set up to INITIALLY build an arrangement like: -# -# <_Capture>cellmath... -# that is, an for each row. -# Afterwards, we can analyze the cells and determine how the cells and/or rows -# will be divided up into "real" equations, and insert some MathFork's to reflect. - -# For example, the represents a whole eqnarray, -# and (initially, at least) the rows are represented as 's. -# Some analysis hopefully allows us to recognize - -# Given an ltx:equationgroup containing several ltx:equations (representing rows), -# equationgroupJoinRows combines one or more of those rows into a -# semantically meaningful equation and sets up the appropriate MathForks within. -# This is typically useful for eqnarray, after you have analyzed -# which subsequences of ltx:equations actually correspond to single semantic equations. -sub equationgroupJoinRows { - my ($document, $equationgroup, @equations) = @_; - # Make a new equation, with a single MathFork container - my $equation = $document->openElementAt($equationgroup, 'ltx:equation'); - $equationgroup->insertBefore($equation, $equations[0]); # Move to correct position. - # move labels, id, refnum to new equation - my ($labels, $id, $idctr, $idctrm, $tags); - foreach my $eq (@equations) { - if (my $l = $eq->getAttribute('labels')) { - $labels = ($labels ? "$labels $l" : $l); } - $id = $eq->getAttribute('xml:id') if $eq->hasAttribute('xml:id'); - $eq->removeAttribute('xml:id') if $id; - $tags = $document->findnode('ltx:tags', $eq); - # Annoying bookkeeping (should be more built in?) - $idctr = $eq->getAttribute('_ID_counter_') if $eq->hasAttribute('_ID_counter_'); - $idctrm = $eq->getAttribute('_ID_counter_m_') if $eq->hasAttribute('_ID_counter_m_'); } - $document->unRecordID($id) if $id; - $document->setAttribute($equation, labels => $labels) if $labels; - $document->setAttribute($equation, 'xml:id' => $id) if $id; - $document->setAttribute($equation, '_ID_counter_' => $idctr) if $idctr; - $document->setAttribute($equation, '_ID_counter_m_' => $idctrm) if $idctrm; - $equation->appendChild($tags) if $tags; - - # Scan equations to see which ones likely are continuations of previous - my ($mainfork, $branch) = openMathFork($document, $equation); - foreach my $eq (@equations) { - # remove equation; parts will be added in by adding to mathfork (hopefully taking care of ids) - $eq->unbindNode; - my $tr = $document->openElementAt($branch, 'ltx:tr'); - my @cells = $document->findnodes('ltx:_Capture_', $eq); - $document->setAttribute($tr, class => 'ltx_eqn_lefteqn') - if ($cells[0]->getAttribute('class') || '') =~ /\blefteqn\b/; - foreach my $cell (@cells) { - addColumnToMathFork($document, $mainfork, $tr, $cell); } - $document->closeElementAt($tr); } - closeMathFork($document, $equation, $mainfork, $branch); - $document->closeElementAt($equation); - return; } - -# Given an equation generated in an equationgroup, -# collect each $ncols columns into a MathFork structure, -# with the formatted portion being the columns. -# This is typically useful for AMS's align structures, -# which contain several columns, each pair of which represent a semantic equation. -sub equationgroupJoinCols { - my ($document, $ncols, $equation) = @_; - my ($col, $mainfork, $branch) = (0, undef, undef); - foreach my $cell ($document->findnodes('ltx:_Capture_', $equation)) { - next unless $document->getNodeQName($cell) =~ /(.*?:)?_Capture_$/; - if (($col++ % $ncols) == 0) { # Create new MathFork every $ncols cells. - closeMathFork($document, $equation, $mainfork, $branch) if $mainfork; - ($mainfork, $branch) = openMathFork($document, $equation); } - addColumnToMathFork($document, $mainfork, $branch, $cell); } - closeMathFork($document, $equation, $mainfork, $branch) if $mainfork; - return; } - -#********************************************************************** - -Let('\vcenter', '\vbox'); - -# \eqno & \leqno are really bizzare. -# They should seemingly digest until $ (or while still in math mode), -# and use that stuff as the reference number. -# However, since people abuse this, and we're really not quite TeX, -# we really can't do it Right. -# Even a \begin{array} ends up expanding into a $ !!! -DefMacroI('\eqno', undef, sub { - my ($gullet) = @_; - my $locator = $gullet->getLocator; - my @stuff = (); - # This is risky!!! - while (my $t = $gullet->readXToken(0)) { - if ($t->defined_as(T_BEGIN)) { - push(@stuff, $t, $gullet->readBalanced, T_END); } - # What do I need to explicitly list here!?!?!? UGGH! - elsif ($t->defined_as(T_MATH) - || $t->defined_as(T_CS('\]')) - # UGH from 2022: also don't jump over rows - || $t->defined_as(T_CS('\cr')) - # see arXiv:math/0001062, for one example - || $t->defined_as(T_CS('\hidden@cr')) - || $t->defined_as(T_CS('\@@ENDDISPLAYMATH')) - || $t->defined_as(T_CS('\begingroup')) # Totally wrong, but to catch expanded environments - || (ToString($t) =~ /^\\(?:begin|end)\{/) # any sort of environ begin or end??? - # This seems needed within AmSTeX environs - ) { - return (Invocation(T_CS('\@@eqno'), Tokens(@stuff)), $t); } - else { - push(@stuff, $t); } } - Error('unexpected', '\eqno', $gullet, "Fell of the end reading tag for \\eqno!", - "started " . ToString($locator)); - return Tokens(@stuff); }); - -Let('\leqno', '\eqno'); -# Revert to nothing, since it really doesn't belong in the TeX string(?) -DefConstructor('\@@eqno{}', - "^ #1", - reversion => ''); - -#====================================================================== -# Scripts are a bit of a strange beast, with respect to when the arguments -# are processed, and what kind of object should be created. -# -# While scripts look like they take a normal TeX argument, they really -# take the next BOX (AFTER expansion & digestion)! Thus, while -# a^\frac{b}{c} and a^\mathcal{B} -# DO work in TeX, other things like -# a^\sqrt{3} or a^\acute{b} -# DO NOT! (Hint: consider the expansions) -# Note that with -# \def\xyz{xyz} -# a^\xyz => a^{x}yz -# So, we try to mimic, but note that our boxes don't correspond 100% to TeX's -# -# Normally, sub/super scripts should be turned into a sort of postfix operator: -# The parser will attach the script to the appropriate preceding object. -# However, there are a few special cases involving empty boxes {}. -# If the argument is an empty box $x^{}$, the whole script should just disappear. -# If the PRECEDING box is {} (in ${}^{p}$, a sort of `floating' script should be created. -# This may combine, in the parser, with the following object to generate -# a prescript. - -# Remember a "safe" way to test a script Whatsit. -# Returns [ (FLOATING|POST) , (SUBSCRIPT|SUPERSCRIPT) ] or nothing -sub IsScript { - my ($object) = @_; - if (ref $object eq 'LaTeXML::Core::List') { - $object = [$object->unlist]->[-1]; } - if ((ref $object eq 'LaTeXML::Core::Whatsit') # careful w/alias in getCSName! - && ($object->getDefinition->getCS->getCSName =~ /^\\@@(FLOATING|POST)(SUBSCRIPT|SUPERSCRIPT)$/)) { - return [$1, $2]; } - return; } - -sub scriptHandler { - no warnings 'recursion'; - my ($stomach, $op) = @_; - my $gullet = $stomach->getGullet; - $gullet->skipSpaces; - my $font = LookupValue('font'); - my $style = $font->getMathstyle; - my @putback = (); - my $nscripts = 0; - - if (defined $style) { - my $cs = '\@@FLOATING' . $op; - my ($prevscript, $prevspace, $base); - # Check preceding boxes to determine possible attachment (floating vs post), - # Note that this analysis has to be done now (or sometime like it) before grouping lists go away; - # and whether there are conflicting preceding scripts, which is an error - # Parsing is too late! - while (my $prev = pop(@LaTeXML::LIST)) { - if (($prev->getProperty('isSpace')) - || ($prev->getProperty('isEmpty')) # EXPLICITLY empty, rather than {} - || (ref $prev eq 'LaTeXML::Core::Comment')) { - $prevspace = 1; # a space avoids double-scripts - unshift(@putback, $prev); # put back? assuming it will add rpadding to previous??? - next; } - elsif (IsEmpty($prev)) { # If empty, the script floats, can't conflict, but don't put back - last; } - elsif (my $prevop = IsScript($prev)) { - unshift(@putback, $prev); - if ($$prevop[1] eq $op) { # Whoops, duplicated; better use FLOATING - Error('unexpected', "double-" . lc($$prevop[1]), $stomach, "Double " . lc($$prevop[1])) - unless $prevspace; - $cs = '\@@FLOATING' . $op; - last; } - else { # Else, is OK (so far) assume POST (it will stack previous script) - $prevscript = $prev; # we'll overlap the width of the previous. - $cs = '\@@POST' . $op; } - # if we hit a FLOATING script, terminate, as the floating empty group avoids double scripts - last if ($$prevop[0] eq 'FLOATING'); - last if ++$nscripts > 1; } - else { - # We found something "normal", so assume we'll attach to it, and we're done. - $base = $prev; - unshift(@putback, $prev); - $cs = '\@@POST' . $op; - last; } } - push(@LaTeXML::LIST, @putback); - - MergeFont(scripted => 1); - # Now, get following boxes (may have to process several tokens!) - my @stuff = (); - while (my $tok = $gullet->readXToken(0)) { - @stuff = $stomach->invokeToken($tok); - last if @stuff; } - if (!@stuff) { - Error('expected', '{', $stomach, "Missing sub/superscript argument", $gullet->showUnexpected); - push(@stuff, Box()); } - my $script = shift(@stuff); # ONLY the first box is the script! - unshift(@stuff, - LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS($cs)), [$script], - locator => $gullet->getLocator, - font => $script->getFont, isMath => 1, - level => $stomach->getBoxingLevel, - scriptlevel => $stomach->getScriptLevel, - base => $base, # for sizing/positioning - prevscript => $prevscript)) - unless IsEmpty($script); - AssignValue(font => $font); # revert - return @stuff; } - else { # Non math use of _ ?? - my $c = (($op eq 'SUPERSCRIPT') ? '^' : '_'); - Error('unexpected', $c, $stomach, "Script $c can only appear in math mode"); - return Box($c, undef, undef, (($op eq 'SUPERSCRIPT') ? T_SUPER : T_SUB)); -} } - -DefPrimitiveI(T_SUPER, undef, sub { scriptHandler($_[0], 'SUPERSCRIPT'); }); -DefPrimitiveI(T_SUB, undef, sub { scriptHandler($_[0], 'SUBSCRIPT'); }); - -# The `argument' to a sub or superscript will typically be processed as a box, -# and either has braces, or is something that results in a single box. -# When we revert these, we DON'T want to wrap extra braces around, because they'll accumulate; -# at the least they're ugly; in some applications they affect "round trip" processing. -# OTOH, direct use of \@@POSTSUPERSCRIPT, etal, MAY need to have extra braces around them. -# So, when reverting, we're going to a bit of extra trouble to make sure we have ONE set -# of braces, but no extras!! -sub revertScript { - my ($script) = @_; - # We need to handle lists of lists, see arXiv:2210.11051 - my @tokens = Tokens($script->revert)->unlist; - my @t = @tokens; - my $l; - if ($t[0]->defined_as(T_BEGIN)) { - $l++; shift(@t); } - while (@t && $l) { - my $t = shift(@t); - if ($t->defined_as(T_BEGIN)) { $l++; } - elsif ($t->defined_as(T_END)) { $l--; } } - return (@tokens && !@t ? @tokens : (T_BEGIN, @tokens, T_END)); } - -# Compute the 'advance' of this script. -# can we do this before parsing? we can do the advance or something.... Hmmmm. -# * Need to know scriptpos (mid or post) to determine position. -# * need to know sub/super -sub scriptSizer { - my ($script, $base, $prev, $op, $pos) = @_; - - # NOTE: Currently, the mathstyle is NOT reflected in the font of the script!!!! - # Or is it now ????? - # [unless it's different from the 'expected' style!!!] - my ($ws, $hs, $ds) = map { $_->valueOf } $script->getSize; - $ws *= 0.8; $hs *= 0.8; $ds *= 0.8; # HACK!@!! - my ($wb, $hb, $db) = map { $_->valueOf } ($base ? $base->getSize - : LookupValue('font')->getNominalSize); - my ($w, $h, $d) = (0, 0, 0); - # Fishing for the scriptpos on the base (if any) - my $attr; - $pos = $base->getProperty('scriptpos') if !defined $pos && defined $base; - $pos = 'post' if !defined $pos; - if ($pos eq 'mid') { - $w = max(0, $ws - $wb); # as if max width of base & script - if ($op eq 'SUPERSCRIPT') { - $h = $hb + $ds + $hs; } - else { - $d = $db + $hs + $ds; } } - else { - my $wp = ($prev && $prev->getWidth) || 0; # as if max of width & prev script's width - $w = max(0, $ws - $wp); - if ($op eq 'SUPERSCRIPT') { - $h = $hb + $hs / 2; } - else { - $d = $hs / 2 + $ds; } } - $w = Dimension($w); $h = Dimension($h); $d = Dimension($d); - return ($w, $h, $d); } - -# NOTE: The When reverting these, the -DefConstructor('\@@POSTSUPERSCRIPT InScriptStyle', - "" - . "#1" - . "", - reversion => sub { (T_SUPER, revertScript($_[1])); }, - sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'), - $_[0]->getProperty('prevscript'), 'SUPERSCRIPT', 'post'); }); -DefConstructor('\@@POSTSUBSCRIPT InScriptStyle', - "" - . "#1" - . "", - reversion => sub { (T_SUB, revertScript($_[1])); }, - sizer => sub { scriptSizer($_[0]->getArg(1), $_[0]->getProperty('base'), - $_[0]->getProperty('prevscript'), - 'SUBSCRIPT', 'post'); }); -DefConstructor('\@@FLOATINGSUPERSCRIPT InScriptStyle', - "" - . "#1" - . "", - reversion => sub { (T_BEGIN, T_END, T_SUPER, revertScript($_[1])); }, - sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUPERSCRIPT', 'post'); }); -DefConstructor('\@@FLOATINGSUBSCRIPT InScriptStyle', - "" - . "#1" - . "", - reversion => sub { (T_BEGIN, T_END, T_SUB, revertScript($_[1])); }, - sizer => sub { scriptSizer($_[0]->getArg(1), undef, undef, 'SUBSCRIPT', 'post'); }); - -DefMacroI('\active@math@prime', undef, sub { - my ($gullet) = @_; - my @sup = (T_CS('\prime')); - # Collect up all ', convering to \prime - while ($gullet->ifNext(T_OTHER('\''))) { - $gullet->readToken; - push(@sup, T_CS('\prime')); } - # Combine with any following superscript! - # However, this is semantically screwed up! - # We really need to set up separate superscripts, but at same level! - if ($gullet->ifNext(T_SUPER)) { - $gullet->readToken; - push(@sup, $gullet->readArg->unlist); } - (T_SUPER, T_BEGIN, @sup, T_END); }, - locked => 1); # Only in math! -AssignMathcode("'" => 0x8000); -Let("'", '\active@math@prime'); - -# Experiment: When we detect a math element containing solely a floating superscript in the -# *Frontmatter* of a document, assume it is a note mark, and normalize it down to -# plain text. -DefRewrite(xpath => 'descendant::ltx:Math[child::ltx:XMath[child::ltx:XMApp[' . - '(@role="FLOATSUPERSCRIPT" or @role="FLOATSUBSCRIPT") and ' . - 'not(preceding-sibling::*) and not(following-sibling::*) ' . - 'and not(./*/*[not(self::ltx:XMTok)]) ]]]', - replace => sub { - my ($document, $math) = @_; - # We can assume the grandchild of the XMath node is the XMArg, - # which we need to normalize to scripted Unicode. - if (my @xmath = element_nodes($math)) { - if (my @xmapp = element_nodes($xmath[0])) { - if (my @xmarg = element_nodes($xmapp[0])) { - if (my $role = $xmapp[0]->getAttribute('role')) { - my $text = $xmarg[0]->textContent; - local $LaTeXML::BOX = $document->getNodeBox($xmarg[0]); - if ($role eq 'FLOATSUPERSCRIPT') { - $document->insertElement('ltx:sup', $text); - return; } - elsif ($role eq 'FLOATSUBSCRIPT') { - $document->insertElement('ltx:sub', $text); - return; } - } } } } - # should never happen, but just in case: - Info("rewrite", "footnotemark", "Failed to find floating node in: " . $math->toString(1)); - $document->getNode->appendChild($math); - return; }); - -#====================================================================== -# \choose & friends, also need VERY special argument handling - -# After digesting the \choose (or whatever), grab the previous and following material -# and store as args in the whatsit. - -# Increment the mathstyle stored in any boxes & whatsits. -# The tricky part is to know when NOT to increment! -# \displaystyle, constructors that set their own specific style,... -# And, any collateral adjustments that had been done in digestion depending on mathstyle -# WONT be adjusted! -# We don't have a clear API to find the displayable Boxes within; -# and we don't have a good handle on grouping... - -# ARGH!!!!!!!!! RETHINK!!!!!! -sub adjustMathstyle { - my ($outerstyle, $adjusted, @boxes) = @_; - foreach my $box (@boxes) { - next unless defined $box; - next if $$adjusted{$box}; # since we do args AND props, be careful not to adjust twice! - $$adjusted{$box} = 1; - my $r = ref $box; - next unless $r && ($r !~ /(?:SCALAR|HASH|ARRAY|CODE|REF|GLOB|LVALUE)/) && $r->isaBox; - return if $box->getProperty('explicit_mathstyle'); - next if $box->getProperty('own_mathstyle'); - - if ($r eq 'LaTeXML::Core::Box') { - adjustMathStyle_internal($outerstyle, $box); } - elsif ($r eq 'LaTeXML::Core::List') { - adjustMathstyle($outerstyle, $adjusted, $box->unlist); } - elsif ($r eq 'LaTeXML::Core::Whatsit') { - my $style = adjustMathStyle_internal($outerstyle, $box) || $outerstyle; - # now recurse on contained boxes (args AND properties!) - adjustMathstyle($style, $adjusted, $box->getArgs); - adjustMathstyle($style, $adjusted, values %{ $box->getPropertiesRef }); } } - return; } - -# Heursitic; -# we're wanting to adjust the style AS IF the numerator had been already in the next mathstyle -# This isn't the same as just shifting the mathstyle! -# we're sorta trying to infer WHY the box has a given style...? -our %mathstyle_adjust_map = ( - display => { display => 'text', text => 'script', script => 'script', scriptscript => 'scriptscript' }, - text => { display => 'text', text => 'script', script => 'scriptscript', scriptscript => 'scriptscript' }, - script => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' }, - scriptscript => { display => 'display', text => 'text', script => 'scriptscript', scriptscript => 'scriptscript' }); - -sub adjustMathStyle_internal { - my ($outerstyle, $box) = @_; - $outerstyle = 'display' unless $outerstyle; - if (my $font = $box->getFont) { - my $origstyle = $font->getMathstyle || 'display'; - my $newstyle = $mathstyle_adjust_map{$outerstyle}{$origstyle}; - $box->setFont($font->merge(mathstyle => $newstyle)); - if (my $recstyle = $box->getProperty('mathstyle')) { # And adjust here, if recorded. - $box->setProperty(mathstyle => $newstyle); - return $newstyle; } } - return; } - -sub fracSizer { - my ($numerator, $denominator) = @_; - my $w = $numerator->getWidth->larger($denominator->getWidth); - my $d = $denominator->getTotalHeight->multiply(0.5); - my $h = $numerator->getTotalHeight->add($d); - return ($w, $h, $d); } - -# \lx@generalized@over{reversion}{keyvals}{top}{bottom} -# keyvals: role,meaning, left,right, thickness -DefConstructor('\lx@generalized@over Undigested RequiredKeyVals', - "?#needXMDual(" - . "" - . "" - . "" - . "" - . "" - . "" - . "" - . "#left)()" - . "" - . "" - . "#top" - . "#bottom" - . "" - . "?#needXMDual(#right" - . "" - . ")()", - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $kv = $whatsit->getArg(2); - # Really, we want the mathstyle that was in effect BEFORE the group starting the numerator! - # (there could be a \displaystyle INSIDE the numerator, but that's not the one we want) - # Of course the group that started the numerator may be the start of the Math, itself! - # AND, the numerator, which was already digested, needs it's mathstyle ADJUSTED - my $font = ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?! - ? $STATE->lookupValue('font') # then just use whatever font we've got - : ($STATE->isValueBound('font', 0) # else if font was set in numerator - && $STATE->valueInFrame('font', 1)) - || $STATE->lookupValue('font') # then just use whatever font we've got - ); - my $style = $font->getMathstyle; - my $role = ToString($kv->getValue('role')); - my $meaning = ToString($kv->getValue('meaning')); - my $thickness = ToString($kv->getValue('thickness')); - $role = 'FRACOP' unless $role; - $meaning = 'divide' if (!$meaning) && ($thickness ne '0pt'); - # Unfortunately, the numerator's already digested! We have to adjust it's mathstyle - my @top = $stomach->regurgitate; - # really have to pass +/-1, +/-2 etc..! - adjustMathstyle($style, {}, @top); - MergeFont(fraction => 1); - my @bot = $stomach->digestNextBody(); - my $closing = pop(@bot); # We'll leave whatever closed the list (endmath, endgroup...) - $whatsit->setProperties( - top => List(@top, mode => 'math'), - bottom => List(@bot, mode => 'math'), - role => $role, - meaning => $meaning, - thickness => $thickness, - mathstyle => $style); - if ($kv->getValue('left') || $kv->getValue('right')) { - $whatsit->setProperties(needXMDual => 1, - xmkey0 => LaTeXML::Package::getXMArgID(), - xmkey1 => LaTeXML::Package::getXMArgID(), - xmkey2 => LaTeXML::Package::getXMArgID()); } - return $closing; }, # and leave the closing bit, whatever it is. - properties => sub { %{ $_[2]->getKeyVals }; }, - sizer => sub { fracSizer($_[0]->getProperty('top'), $_[0]->getProperty('bottom')); }, - reversion => sub { - my ($whatsit) = @_; - (Revert($whatsit->getProperty('top')), - $whatsit->getArg(1)->unlist, - Revert($whatsit->getProperty('bottom'))); }); - -DefMacro('\choose', - '\lx@generalized@over{\choose}{meaning=binomial,thickness=0pt,left=\@left(,right=\@right)}'); -DefMacro('\brace', - '\lx@generalized@over{\brace}{thickness=0pt,left=\@left\{,right=\@right\}}'); -DefMacro('\brack', - '\lx@generalized@over{\brack}{thickness=0pt,left=\@left[,right=\@right]}'); -DefMacro('\atop', - '\lx@generalized@over{\atop}{thickness=0pt}'); -DefMacro('\atopwithdelims Token Token', - '\lx@generalized@over{\atopwithdelims #1 #2}{thickness=0pt,left={\@left#1},right={\@right#2}}'); -DefMacro('\over', - '\lx@generalized@over{\over}{meaning=divide}'); -DefMacro('\overwithdelims Token Token', - '\lx@generalized@over{\overwithdelims #1 #2}{left={\@left#1},right={\@right#2},meaning=divide}'); -# My thinking was that this is a "fraction" providing the dimension is > 0! -DefMacro('\above Dimension', - '\lx@generalized@over{\above #1}{meaning=divide,thickness=#1}'); -DefMacro('\abovewithdelims Token Token Dimension', -'\lx@generalized@over{\abovewithdelims #1 #2 #3}{left={\@left#1},right={\@right#2},meaning=divide,thickness=#3}'); - -#====================================================================== -DefPrimitiveI('\cal', undef, undef, - font => { family => 'caligraphic', series => 'medium', shape => 'upright' }); - -# In principle, is a nice markup for emphasized. -# Unfortunately, TeX really just treats it as a font switch. -# Something like: \em et.al. \rm more stuff -# works in TeX, but in our case, since there is no explicit {}, -# the stays open! Ugh! -# This could still be made to work, but merge font would -# need to look at any open , and then somehow close it! -DefPrimitiveI('\em', undef, undef, - beforeDigest => sub { - my $font = LookupValue('font'); - my $shape = $font->getShape; - AssignValue(font => $font->merge(shape => ($shape eq 'italic' ? 'normal' : 'italic')), - 'local'); }); - -# Change math font while still in text! -DefPrimitiveI('\boldmath', undef, undef, - beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 1), 'local'); }, - forbidMath => 1); -DefPrimitiveI('\unboldmath', undef, undef, - beforeDigest => sub { AssignValue(mathfont => LookupValue('mathfont')->merge(forcebold => 0), 'local'); }, - forbidMath => 1); - -#====================================================================== -# Alignments - -# & gives an error except within the right context -# (which should redefine it!) -DefConstructorI('&', undef, sub { Error('unexpected', '&', $_[0], "Stray alignment \"&\""); }); - -#********************************************************************** -# Plain; Extracted from Appendix B. -#********************************************************************** - -#====================================================================== -# TeX Book, Appendix B, p. 344 -#====================================================================== -RawTeX('\outer\def^^L{\par}'); -DefMacro('\dospecials', '\do\ \do\\\do\{\do\}\do\$\do\&\do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~'); - -# Normally, the content branch contains the pure structure and meaning of a construct, -# and the presentation is generated from lower level TeX macros that only concern -# themselves with how to display the object. -# Nevertheless, it is sometimes useful to know where the tokens in the presentation branch -# came from; particularly what their presumed "meaning" is. -# For example, when search-indexing pmml, or providing links to definitions from the pmml. -# -# The following constructor (see how it's used in DefMath), adds meaning attributes -# whereever it seems sensible on the presentation branch, after it has been generated. -DefConstructor('\@ASSERT@MEANING{}{}', '#2', - reversion => '#2', - afterConstruct => sub { - my ($document, $whatsit) = @_; - my $node = $document->getNode; # This should be the wrapper just added. - my $meaning = ToString($whatsit->getArg(1)); - addMeaningRec($document, $node, $meaning); - $node; }); - -sub addMeaningRec { - my ($document, $node, $meaning) = @_; - if ($node->nodeType == XML_ELEMENT_NODE) { - my $qname = $document->getModel->getNodeQName($node); - if ($qname eq 'ltx:XMArg') { } # DONT cross through into arguments! - elsif ($qname eq 'ltx:XMTok') { - if ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN') - && !$node->getAttribute('meaning')) { - $document->setAttribute($node, meaning => $meaning); } } - else { - foreach my $c ($node->childNodes) { - addMeaningRec($document, $c, $meaning); } } } - return; } - -#====================================================================== -# Properties for plain characters. -# These are allowed in plain text, but need to act a bit special in math. -DefMathI('=', undef, '=', role => 'RELOP', meaning => 'equals'); -DefMathI('+', undef, '+', role => 'ADDOP', meaning => 'plus'); -DefMathI('-', undef, '-', role => 'ADDOP', meaning => 'minus'); -## Redefine, if we want Unicode minus -##DefMathI('-', undef, "\x{2212}", role => 'ADDOP', meaning => 'minus'); -DefMathI('*', undef, "\x{2217}", role => 'MULOP', meaning => 'times'); -DefMathI('/', undef, '/', role => 'MULOP', meaning => 'divide'); -DefMathI('!', undef, '!', role => 'POSTFIX', meaning => 'factorial'); -DefMathI(',', undef, ',', role => 'PUNCT'); -DefMathI('.', undef, '.', role => 'PERIOD'); -DefMathI(';', undef, ';', role => 'PUNCT'); -DefMathI('(', undef, '(', role => 'OPEN', stretchy => 'false'); -DefMathI(')', undef, ')', role => 'CLOSE', stretchy => 'false'); -DefMathI('[', undef, '[', role => 'OPEN', stretchy => 'false'); -DefMathI(']', undef, ']', role => 'CLOSE', stretchy => 'false'); -DefMathI('|', undef, '|', role => 'VERTBAR', stretchy => 'false'); -DefMathI(':', undef, ':', role => 'METARELOP', name => 'colon'); # Seems like good default role -DefMathI('<', undef, '<', role => 'RELOP', meaning => 'less-than'); -DefMathI('>', undef, '>', role => 'RELOP', meaning => 'greater-than'); - -# NOTE: Need to evolve Ligatures to be easier to write. -# rough draft of tool to make ligatures more sane to write... -# It is tempting to handle these with macros, -# But that tends to run afoul of tricky packages like babel that make : active as well! -# Even using mathactive doesn't help. -sub TestNode { - my ($node, $qname, $content, %attrib) = @_; - return $node - && ($LaTeXML::DOCUMENT->getModel->getNodeQName($node) eq $qname) - && ((!defined $content) || (($node->textContent || '') eq $content)) - && !grep { $node->getAttribute($_) ne $attrib{$_} } keys %attrib; } - -# Recognize !! -DefMathLigature("!!" => "!!", role => 'POSTFIX', meaning => 'double-factorial'); - -# Recognize := -DefMathLigature(":=" => ":=", role => 'RELOP', meaning => 'assign'); - -#====================================================================== -# Combine letters, when the fonts are right. (sorta related to mathcode) -# well, maybe a letter followed by letters & digits? -DefMathLigature(matcher => sub { my ($document, $node) = @_; - my @chars = (); - my $font = $document->getNodeFont($node); - if ($font->isSticky) { - my $n = 0; - my $string = ''; - my $s = ''; - while ($node - && ($document->getModel->getNodeQName($node) eq 'ltx:XMTok') - && ($document->getNodeFont($node)->equals($font)) - && (!$node->hasAttribute('name')) - && ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN') - || (($node->getAttribute('role') || 'UNKNOWN') eq 'NUMBER')) - && (($s = $node->textContent . $s) =~ /^[0-9a-zA-Z]+$/)) { - $string = $s; - do { $node = $node->previousSibling; $n++; - } while $node && ($node->nodeType == XML_COMMENT_NODE); } - (($string =~ /^[a-zA-Z]/) && ($n > 1) ? ($n, $string, role => 'UNKNOWN', meaning => undef) : undef); -} }); - -#====================================================================== -# Combine digits in math. - -foreach my $digit (qw(0 1 2 3 4 5 6 7 8 9)) { - DefMathI($digit, undef, $digit, role => 'NUMBER', meaning => $digit); } - -# Would probably be best to collapse all XMHint/spaces at the earliest stage. -our %space_chars = (negthinspace => '', thinspace => "\x{2009}", - medspace => "\x{2005}", thickspace => "\x{2004}", space => ' '); - -# This is getting out-of-hand; -# (1) this gets done after document build, so we query the document/node for language -# rather than using something specified during digestion (eg. macros, roles...) -# (2) the way we've specified the decimal & thousands separators (language dependent) -# is completely insufficient; should leverage numprint or babel or ... ? -# (3) the way we're detecting the chars is a mess: a mix of string content & role! -# If we could accommodate multiple roles, maybe a separate role could be set on the tokens -# (a period could be a PERIOD or a DECIMAL_SEPARATOR, eg) - -my %decimal_separator = (en => '.', de => ',', fr => ',', nl => ',', pt => ',', es => ','); -my %thousands_separator = (en => ',', de => '.', fr => '.', nl => '.', pt => '.', es => '.'); -DefMathLigature(matcher => sub { my ($document, $node) = @_; - my $lang = $document->getNodeLanguage($node); - $lang =~ s/-\w+$// if $lang; # strip off region code, if any. - my $dec = ($lang && $decimal_separator{$lang}) || '.'; - my $thou = ($lang && $thousands_separator{$lang}) || ','; - my $decrole = ($dec eq '.' ? 'PERIOD' : ''); - # my $skip = Dimension('5mu')->valueOf; - my @chars = (); - my ($n, $string, $number, $w, $font) = (0, '', '', 0, undef); - # NOTE: We're scanning chars from END! - while ($node) { - my $qn = $document->getModel->getNodeQName($node); - if ($qn =~ /^(ltx:XMTok|ltx:XMWrap)$/) { - my $r = ($node->getAttribute('role') || ''); - my $f = $document->getNodeFont($node); - my $text = $node->textContent; - if (($r eq 'NUMBER') && (!$font || ($f->equals($font)))) { # A number in same font? - $font = $f; - $string = $text . $string; - $number = $node->getAttribute('meaning') . $number; } - elsif (!$n) { # any following cases are not allowed as LAST char - last; } - # if thousands separator (but NOT simultaneously PUNCT!!!! Be paranoid about lists) - elsif (($text eq $thou) && ($r ne 'PUNCT')) { - $string = $text . $string; } # Add to string, but omit from number - # if decimal separator, turn it into "standard" "." - elsif (($text eq $dec) || ($r eq $decrole)) { # was $r eq 'PERIOD' - $string = $node->textContent . $string; - $number = '.' . $number; } - else { - last; } } - # OR if XMHint with 0 <= width <= thickmuskip (5mu == ?) - elsif ($qn eq 'ltx:XMHint') { - my $s; - if (($s = $node->getAttribute('name')) && ($s = $space_chars{$s})) { - $string = $s . $string; } - else { - last; } } - else { - last; } - do { $node = $node->previousSibling; $n++; - } while $node && ($node->nodeType == XML_COMMENT_NODE); } - if (($n > 1) && ($number =~ /\d/)) { - ($n, $string, meaning => $number, role => 'NUMBER'); } }); - -# This needs to be applied AFTER numbers have been resolved! -# If we have a non-negative integer (no signs, decimals,...) -# followed by a fraction dividing two non-negative integers, -# Figure it's a mixed fraction --- ADDING the fraction to the number, not multiplying! -DefRewrite(select => ['descendant-or-self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' - . '[ following-sibling::*[1][self::ltx:XMApp]' - . ' [child::*[1][self::ltx:XMTok[@meaning="divide"]]]' - . ' [child::*[2][' - . 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' - . 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' - . ']]' - . ' [child::*[3][' - . 'self::ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' - . 'or self::ltx:XMArg[count(child::*)=1]/ltx:XMTok[@role="NUMBER" and translate(@meaning,"0123456789","")=""]' - . ']]' - . ']', - 2], - replace => sub { my ($document, $number, $frac) = @_; - my $box = $document->getNodeBox($number); - $document->openElement('ltx:XMApp', _box => $box); - $document->insertMathToken("\x{2064}", # Invisible Plus! - meaning => 'plus', role => "ADDOP", _box => $box); - $document->getNode->appendChild($number); - $document->getNode->appendChild($frac); - $document->closeElement('ltx:XMApp'); }); - -#====================================================================== -# TeX Book, Appendix B, p. 345 - -RawTeX(<<'EoTeX'); - \chardef\active=13 - \chardef\@ne=1 - \chardef\tw@=2 - \chardef\thr@@=3 - \chardef\sixt@@n=16 - \chardef\@cclv=255 - \mathchardef\@cclvi=256 - \mathchardef\@m=1000 - \mathchardef\@M=10000 - \mathchardef\@MM=20000 - \countdef\m@ne=21\relax - \m@ne=-1 -EoTeX - -#====================================================================== -# TeX Book, Appendix B, p. 346 - -RawTeX(<<'EoTeX'); - \countdef\count@=255 - \toksdef\toks@=0 - \skipdef\skip@=0 - \dimendef\dimen@=0 - \dimendef\dimen@i=1 - \dimendef\dimen@ii=2 -\count10=22 % allocates \count registers 23, 24, ... -\count11=9 % allocates \dimen registers 10, 11, ... -\count12=9 % allocates \skip registers 10, 11, ... -\count13=9 % allocates \muskip registers 10, 11, ... -\count14=9 % allocates \box registers 10, 11, ... -\count15=9 % allocates \toks registers 10, 11, ... -\count16=-1 % allocates input streams 0, 1, ... -\count17=-1 % allocates output streams 0, 1, ... -\count18=3 % allocates math families 4, 5, ... -\count19=0 % allocates \language codes 1, 2, ... -\count20=255 % allocates insertions 254, 253, ... -\countdef\insc@unt=20 -\countdef\allocationnumber=21 -\countdef\m@ne=22 \m@ne=-1 -EoTeX -# Various \count's are set; should we? - -#====================================================================== -# TeX Book, Appendix B, p. 347 -DefPrimitive('\wlog{}', sub { - NoteLog(ToString(Expand($_[1]))); - return; }, - locked => 1); -# From plain.tex -DefPrimitive('\newcount DefToken', sub { - DefRegisterI($_[1], undef, Number(0), allocate => '\count'); }); -DefPrimitive('\newdimen DefToken', sub { - DefRegisterI($_[1], undef, Dimension(0), allocate => '\dimen'); }); -DefPrimitive('\newskip DefToken', sub { - DefRegisterI($_[1], undef, Glue(0), allocate => '\skip'); }); -DefPrimitive('\newmuskip DefToken', sub { - DefRegisterI($_[1], undef, MuGlue(0), allocate => '\muskip'); }); -AssignValue(allocated_boxes => 0); -DefPrimitive('\newbox DefToken', sub { - my $n = LookupValue('allocated_boxes'); - AssignValue(allocated_boxes => $n + 1, 'global'); - AssignValue("box$n", List()); - DefRegisterI($_[1], undef, Number($n), readonly => 1); }); -DefPrimitive('\newhelp DefToken {}', sub { AssignValue(ToString($_[1]) => $_[2]); }); -DefPrimitive('\newtoks DefToken', sub { DefRegisterI($_[1], undef, Tokens()); }); -# the next 4 actually work by doing a \chardef instead of \countdef, etc. -# which means they actually work quite differently -DefPrimitive('\alloc@@ {}', sub { - my ($stomach, $type) = @_; - my $c = 'allocation @' . ToString($type); - my $n = LookupValue($c) || '0'; - $n = $n->valueOf if ref $n; - AssignValue($c => $n + 1, 'global'); - AssignRegister('\allocationnumber' => Number($n), 'global'); }); -DefMacro('\newread DefToken', '\alloc@@{read}\global\chardef#1=\allocationnumber'); -DefMacro('\newwrite DefToken', '\alloc@@{write}\global\chardef#1=\allocationnumber'); -DefMacro('\newfam DefToken', '\alloc@@{fam}\global\chardef#1=\allocationnumber'); -DefMacro('\newlanguage DefToken', '\alloc@@{language}\global\chardef#1=\allocationnumber'); - -DefMacro('\e@alloc{}{}{}{}{}{}', - '\global\advance#3\@ne -% \e@ch@ck{#3}{#4}{#5}#1% - \allocationnumber#3\relax - \global#2#6\allocationnumber -% \wlog{\string#6=\string#1\the\allocationnumber} -'); -DefMacro('\alloc@{}{}{}{}', '\e@alloc#2#3{\count1#1}#4\float@count'); -DefMacro('\newread', '\e@alloc\read \chardef{\count16}\m@ne\sixt@@n'); -DefMacro('\newwrite', '\e@alloc\write - {\ifnum\allocationnumber=18 - \advance\count17\@ne - \allocationnumber\count17 % - \fi - \global\chardef}% - {\count17}% - \m@ne - {128}'); - -# This implementation is quite wrong -DefPrimitive('\newinsert Token', sub { DefRegisterI($_[1], undef, Number(0)); }); -# \alloc@, \ch@ck - -# TeX plain uses \newdimen, etc. for these. -# Is there any advantage to that? -DefRegister('\maxdimen', Dimension(16383.99999 * $UNITY)); -DefRegister('\hideskip', Glue('-1000pt plus 1fill')); -DefRegister('\centering', Glue('0pt plus 1000pt minus 1000pt')); -DefRegister('\p@', Dimension($UNITY)); -DefRegister('\z@', Dimension(0)); -DefRegister('\z@skip', Glue(0, 0, 0)); - -# First approximation. till I figure out \newbox -RawTeX('\newbox\voidb@x'); -#====================================================================== -# TeX Book, Appendix B, p. 348 - -DefPrimitive('\newif DefToken', sub { - my ($ignore, $cs) = @_; - DefConditionalI($cs, undef); - return; }); - -# See the section Registers & Parameters, above for setting default values. -#====================================================================== -# TeX Book, Appendix B, p. 349 -# See the section Registers & Parameters, above for setting default values. - -# These are originally defined with \newskip, etc -DefRegister('\smallskipamount' => Glue('3pt plus 1pt minus 1pt')); -DefRegister('\medskipamount' => Glue('6pt plus 2pt minus 2pt')); -DefRegister('\bigskipamount' => Glue('12pt plus 4pt minus 4pt')); -DefRegister('\normalbaselineskip' => Glue('12pt')); -DefRegister('\normallineskip' => Glue('1pt')); -DefRegister('\normallineskiplimit' => Dimension('0pt')); -DefRegister('\jot' => Dimension('3pt')); -DefRegister('\lx@default@jot' => LookupRegister('\jot')); -DefRegister('\interdisplaylinepenalty' => Number(100)); -DefRegister('\interfootnotelinepenalty' => Number(100)); - -DefMacroI('\magstephalf', undef, '1095'); -our @mags = (1000, 1200, 1440, 1728, 2074, 2488); -DefMacro('\magstep{}', sub { - my $level = ToString($_[1]); - $level = ($level =~ /^\d$/) ? int($level) : 0; - $level = 0 unless $level >= 0 and $level < 6; - Explode($mags[$level]); }); - -#====================================================================== -# TeX Book, Appendix B, p. 350 - -# Font stuff ... -RawTeX(<<'EoTeX'); - \font\tenrm=cmr10 - \font\sevenrm=cmr7 - \font\fiverm=cmr5 - \font\teni=cmmi10 - \font\seveni=cmmi7 - \font\fivei=cmmi7 - \font\tensy=cmsy10 - \font\sevensy=cmsy7 - \font\fivesy=cmsy5 - \font\tenex=cmex10 - \font\tenbf=cmbx10 - \font\sevenbf=cmbx7 - \font\fivebf=cmbx5 - \font\tensl=cmsl10 - \font\tentt=cmtt10 - \font\tenit=cmti10 - \newfam\itfam - \newfam\slfam - \newfam\bffam - \newfam\ttfam -\textfont0=\tenrm\scriptfont0=\sevenrm\scriptscriptfont0=\fiverm -\textfont1=\teni\scriptfont1=\seveni\scriptscriptfont1=\fivei -\textfont2=\tensy\scriptfont2=\sevensy\scriptscriptfont2=\fivesy -\textfont3=\tenex -EoTeX -# Note: \newfam in math should be font switching(?) - -#====================================================================== -# TeX Book, Appendix B, p. 351 - -# Old style font styles. -# The trick is to create an empty Whatsit preserved till assimilation (for reversion'ing) -# but to change the current font used in boxes. -# (some of these were defined on different pages? or even latex...) -Tag('ltx:text', autoOpen => 1, autoClose => 1); - -# Note that these, unlike \rmfamily, should set the other attributes to the defaults! -DefPrimitiveI('\rm', undef, undef, - font => { family => 'serif', series => 'medium', shape => 'upright' }); -DefPrimitiveI('\sf', undef, undef, - font => { family => 'sansserif', series => 'medium', shape => 'upright' }); -DefPrimitiveI('\bf', undef, undef, - font => { series => 'bold', family => 'serif', shape => 'upright' }); -DefPrimitiveI('\it', undef, undef, - font => { shape => 'italic', family => 'serif', series => 'medium' }); -DefPrimitiveI('\tt', undef, undef, - font => { family => 'typewriter', series => 'medium', shape => 'upright' }); -# No effect in math for the following 2 ? -DefPrimitiveI('\sl', undef, undef, - font => { shape => 'slanted', family => 'serif', series => 'medium' }); -DefPrimitiveI('\sc', undef, undef, - font => { shape => 'smallcaps', family => 'serif', series => 'medium' }); - -# Ideally, we should set these sizes from class files -AssignValue(NOMINAL_FONT_SIZE => 10); -DefPrimitiveI('\tiny', undef, undef, font => { size => 5 }); -DefPrimitiveI('\scriptsize', undef, undef, font => { size => 7 }); -DefPrimitiveI('\footnotesize', undef, undef, font => { size => 8 }); -DefPrimitiveI('\small', undef, undef, font => { size => 9 }); -DefPrimitiveI('\normalsize', undef, undef, font => { size => 10 }); -DefPrimitiveI('\large', undef, undef, font => { size => 12 }); -DefPrimitiveI('\Large', undef, undef, font => { size => 14.4 }); -DefPrimitiveI('\LARGE', undef, undef, font => { size => 17.28 }); -DefPrimitiveI('\huge', undef, undef, font => { size => 20.74 }); -DefPrimitiveI('\Huge', undef, undef, font => { size => 29.8 }); - -DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' }); - -DefPrimitiveI('\frenchspacing', undef, undef); -DefPrimitiveI('\nonfrenchspacing', undef, undef); -DefMacroI('\normalbaselines', undef, - '\lineskip=\normallineskip\baselineskip=\normalbaselineskip\lineskiplimit=\normallineskiplimit'); -DefMacroI('\space', undef, Tokens(T_SPACE)); -DefMacroI('\lq', undef, "`"); -DefMacroI('\rq', undef, "'"); -Let('\empty', '\@empty'); -DefMacroI('\null', undef, '\hbox{}'); -Let('\bgroup', T_BEGIN); -Let('\egroup', T_END); -Let('\endgraf', '\par'); -Let('\endline', '\cr'); - -DefPrimitiveI('\endline', undef, undef); - -# Use \r for the newline from TeX!!! -DefMacroI("\\\r", undef, '\ '); # \ == \ Interesting (see latex.ltx) -Let(T_ACTIVE("\r"), '\par'); # (or is this just LaTeX?) - -Let("\\\t", "\\\r"); # \ == \, also - -#====================================================================== -# TeX Book, Appendix B, p. 352 - -DefPrimitiveI('\obeyspaces', undef, sub { - AssignCatcode(" " => 13); - Let(T_ACTIVE(" "), '\space'); - return }); -# Curiously enough, " " (a space) is ALREADY defined to be the same as "\space" -# EVEN before it is made active. (see p.380) -Let(T_ACTIVE(" "), '\space'); - -DefPrimitiveI('\obeylines', undef, sub { - AssignCatcode("\r" => 13); - Let(T_ACTIVE("\r"), '\@break'); # More appropriate than \par, I think? - return }); - -DefConstructor('\@break', "", properties => { isBreak => 1 }); - -RawTeX(<<'EoTeX'); -\def\loop#1\repeat{\def\body{#1}\iterate} -\def\iterate{\body \let\next=\iterate \else\let\next=\relax\fi \next} -\let\repeat=\fi -EoTeX - -DefPrimitiveI('\enskip', undef, sub { - Box("\x{2002}", undef, undef, T_CS('\enskip'), - name => 'enskip', width => Dimension('0.5em'), isSpace => 1); }); - -DefPrimitiveI('\enspace', undef, sub { - Box("\x{2002}", undef, undef, T_CS('\enspace'), - name => 'enskip', width => Dimension('0.5em'), isSpace => 1); }); - -DefPrimitiveI('\quad', undef, sub { - Box("\x{2003}", undef, undef, T_CS('\quad'), - name => 'quad', width => Dimension('1em'), isSpace => 1); }); - -# Conceivably should be treated as punctuation! (but maybe even \quad should !?!) -DefPrimitiveI('\qquad', undef, sub { - Box("\x{2003}\x{2003}", undef, undef, T_CS('\qquad'), - name => 'qquad', width => Dimension('2em'), isSpace => 1, asHint => 1); }); - -DefPrimitiveI('\thinspace', undef, sub { - Box("\x{2009}", undef, undef, T_CS('\thinspace'), - name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); }); - -DefPrimitiveI('\negthinspace', undef, sub { - Box("", undef, undef, T_CS('\negthinspace'), - name => 'negthinspace', width => Dimension('-0.16667em'), isSpace => 1); }); - -# DefConstructor('\hglue Glue', "?#isMath()(\x{2003})", -# properties => sub { (isSpace => 1, width => $_[1]); }); - -DefPrimitive('\hglue Glue', sub { - my ($stomach, $length) = @_; - my $s = DimensionToSpaces($length); - return unless defined $s; - Box($s, undef, undef, Invocation(T_CS('\hglue'), $length), - name => 'hglue', width => $length, isSpace => 1); }); - -DefPrimitive('\vglue Glue', undef); -DefPrimitiveI('\topglue', undef, undef); -DefPrimitiveI('\nointerlineskip', undef, undef); -DefPrimitiveI('\offinterlineskip', undef, undef); - -DefMacroI('\smallskip', undef, '\vskip\smallskipamount'); -DefMacroI('\medskip', undef, '\vskip\medskipamount'); -DefMacroI('\bigskip', undef, '\vskip\bigskipamount'); - -#====================================================================== -# TeX Book, Appendix B, p. 353 - -DefPrimitiveI('\break', undef, undef); -DefPrimitiveI('\nobreak', undef, undef); -DefPrimitiveI('\allowbreak', undef, undef); - -DefPrimitiveI('\nobreakspace', undef, sub { - Box(UTF(0xA0), undef, undef, T_ACTIVE("~"), - width => Dimension('0.333em'), isSpace => 1); }); - -DefMacro("~", '\nobreakspace{}'); - -DefMacroI('\slash', undef, '/'); -DefPrimitiveI('\filbreak', undef, undef); -DefMacroI('\goodbreak', undef, '\par'); -DefMacroI('\eject', undef, '\par\LTX@newpage'); -Let('\newpage', '\eject'); -DefConstructorI('\LTX@newpage', undef, "^"); - -DefMacroI('\supereject', undef, '\par\LTX@newpage'); -DefPrimitiveI('\removelastskip', undef, undef); -DefMacroI('\smallbreak', undef, '\par'); -DefMacroI('\medbreak', undef, '\par'); -DefMacroI('\bigbreak', undef, '\par'); - -DefMacroI('\line', undef, '\hbox to \hsize'); -DefMacro('\leftline Undigested', '\ltx@leftline{\hbox{#1}}'); -DefMacro('\rightline Undigested', '\ltx@rightline{\hbox{#1}}'); -DefMacro('\centerline Undigested', '\ltx@centerline{\hbox{#1}}'); -DefConstructor('\ltx@leftline{}', sub { - alignLine($_[0], $_[1], 'left'); }, - alias => '\leftline', - bounded => 1); -DefConstructor('\ltx@rightline{}', sub { - alignLine($_[0], $_[1], 'right'); }, - alias => '\rightline', - bounded => 1); -DefConstructor('\ltx@centerline{}', sub { - alignLine($_[0], $_[1], 'center'); }, - alias => '\centerline', - bounded => 1); - -sub alignLine { - my ($document, $line, $alignment) = @_; - if ($document->isOpenable('ltx:p')) { - $document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); } - elsif ($document->isOpenable('ltx:text')) { - $document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment); - $document->insertElement('ltx:break'); } - else { - $document->absorb($line); } - return; } - -# These should be 0 width, but perhaps also shifted? -DefMacro('\llap{}', '\hbox to 0pt{\hss#1}'); -DefMacro('\rlap{}', '\hbox to 0pt{#1\hss}'); - -DefMacroI('\m@th', undef, '\mathsurround=0pt '); - -# \strutbox -DefMacroI('\strut', undef, Tokens()); -RawTeX('\newbox\strutbox'); - -#====================================================================== -# TeX Book, Appendix B. p. 354 - -# TODO: Not yet done!! -# tabbing stuff!!! - -DefMacroI('\settabs', undef, undef); - -#====================================================================== -# TeX Book, Appendix B. p. 355 - -# TODO: \item, \itemitem not done! -# This could probably be adopted from LaTeX, if the could auto-open -# and close! -DefMacro('\hang', '\hangindent\parindent'); -DefMacro('\item', '\par\hang\textindent'); -DefMacro('\itemitem', '\par\indent \hangindent2\parindent \textindent'); -DefMacro('\textindent{}', '\indent\llap{#1\enspace}\ignorespaces'); -DefMacro('\narrower', '\advance\leftskip by\parindent' - . '\advance\rightskip by\parindent'); - -#---------------------------------------------------------------------- -# General support for Front Matter. -# Not (yet) used by TeX (finish plain?) -# But provides support for LaTeX (and other formats?) for handling frontmatter. -# -# The idea is to accumulate any frontmatter material (title, author,...) -# rather than directly drop it into the digested stream. -# When we begin constructing the document, all accumulated material is output. -# See LaTeX.ltxml for usage. -# Note: could be circumstances where you'd want modular frontmatter? -# (ie. frontmatter for each sectional unit) -AssignValue(frontmatter => {}, 'global'); - -DefConditionalI('\if@in@preamble', undef, sub { LookupValue('inPreamble'); }); - -# Add a new frontmatter item that will be enclosed in <$tag %attr>... -# The content is the result of digesting $tokens. -# \@add@frontmatter[keys]{tag}[attributes]{content} -# keys can have -# replace (to replace the current entry, if any) -# ifnew (only add if no previous entry) -DefPrimitive('\@add@frontmatter OptionalKeyVals {} OptionalKeyVals {}', sub { - my ($stomach, $keys, $tag, $attr, $tokens) = @_; - # Digest this as if we're already in the document body! - my $frontmatter = LookupValue('frontmatter'); - my $inpreamble = LookupValue('inPreamble'); - AssignValue(inPreamble => 0); - # Be careful since the contents may also want to add frontmatter - # (which should be inside or after this one!) - # So, we append this entry before digesting - $tag = ToString($tag); - if ($keys && $keys->hasKey('replace') && $$frontmatter{$tag}) { # if replace and previous entries - $$frontmatter{$tag} = []; } # Remove previous entries - if ($keys && $keys->hasKey('ifnew') && $$frontmatter{$tag}) { # if ifnew and previous entries - return; } # Skip this one. - my $entry = [$tag, undef, 'to-be-filled-in']; - push(@{ $$frontmatter{$tag} }, $entry); - if ($attr) { - $$entry[1] = { $attr->beDigested($stomach)->getHash }; } - $$entry[2] = Digest(Tokens(T_BEGIN, $tokens, T_END)); - AssignValue(inPreamble => $inpreamble); - return; }, - beforeDigest => sub { - $_[0]->bgroup; }, - afterDigest => sub { - $_[0]->egroup; }); - -# Append a piece of data to an existing frontmatter item that is contained in <$tag> -# If $label is given, look for an item which has label=>$label, -# otherwise, just append to the last item in $tag. - -# \@add@to@frontmatter{tag}[label]{content} -DefPrimitive('\@add@to@frontmatter {} [] {}', sub { - my ($stomach, $tag, $label, $tokens) = @_; - $tag = ToString($tag); - $label = ToString($label) if $label; - my $frontmatter = LookupValue('frontmatter'); - - my $inpreamble = LookupValue('inPreamble'); - AssignValue(inPreamble => 0); - my $datum = Digest(Tokens(T_BEGIN, $tokens, T_END)); - AssignValue(inPreamble => $inpreamble); - if ($label) { - my $entry; - foreach my $item (@{ $$frontmatter{$tag} || [] }) { - my ($itag, $iattr, @stuff) = @$item; - if ($label eq ($$iattr{label} || '')) { - push(@$item, $datum); - return; } } } - elsif (my $list = $$frontmatter{$tag}) { - push(@{ $$list[-1] }, $datum); - return; } - push(@{ $$frontmatter{$tag} }, [$tag, ($label ? { label => $label } : undef), $datum]); - return; }, - beforeDigest => sub { - $_[0]->bgroup; }, - afterDigest => sub { - $_[0]->egroup; }); - -# This is called by afterOpen (by default on ) to -# output any frontmatter that was accumulated. - -my @frontmatter_elements = (qw(ltx:title ltx:toctitle ltx:subtitle - ltx:creator ltx:date - ltx:abstract ltx:keywords ltx:classification ltx:acknowledgements)); -my %frontmatter_elements = map { ($_ => 1) } @frontmatter_elements; - -# Insert FrontMatter into document, if not already added -sub insertFrontMatter { - my ($document) = @_; - return if LookupValue('frontmatter_done'); - my $frontmatter = LookupValue('frontmatter'); - my @set_keys = $frontmatter ? (keys %$frontmatter) : (); - # if doc ONLY has abstract as frontmatter, defer until abstract's document location - if ((scalar(@set_keys) == 1) && ($set_keys[0] eq 'ltx:abstract') && - !LookupValue('frontmatter_deferred')) { - AssignValue(frontmatter_deferred => 1, 'global'); - return; } - AssignValue(frontmatter_done => 1, 'global'); # OK, we're placing FrontMatter here, now. - foreach my $key (@frontmatter_elements, grep { !$frontmatter_elements{$_} } @set_keys) { - if (my $list = $$frontmatter{$key}) { - # Dubious, but assures that frontmatter appears in text mode... - local $LaTeXML::BOX = Box('', $STATE->lookupValue('font'), '', T_SPACE); - foreach my $item (@$list) { - my ($tag, $attr, @stuff) = @$item; - # add a dedicated class for frontmatter notes, - # in the case we want to style those uniformly. - if ($tag eq 'ltx:note') { - $attr ||= {}; - $$attr{class} = ($$attr{class} ? $$attr{class} . ' ' : '') . 'ltx_note_frontmatter'; } - $document->openElement($tag, ($attr ? %$attr : ()), - (scalar(@stuff) && $document->canHaveAttribute($tag, 'font') - ? (font => $stuff[0]->getFont, _force_font => 'true') : ())); - map { $document->absorb($_) } @stuff; - my $completed_node = $document->closeElement($tag); - # At this time, the frontmatter element should really carry the actual literal values intended. - # Thus, if we see an empty element, something went wrong -- including our bindings are too verbose, - # as e.g. \preprint{} always generates a ltx:note element. - # - # To solve this in a single location: prune here! - if (($tag ne "ltx:rdf") && !scalar($completed_node->childNodes)) { - $document->removeNode($completed_node); } } } } - return; } - -# Add FrontMatter at document begin, unless deferred to a better position. -Tag('ltx:document', 'afterOpen:late' => sub { - insertFrontMatter($_[0]) unless LookupValue('frontmatter_deferred'); }); -# Request Frontmatter to appear HERE (if not already done), -# deferring it from document begin. -DefConstructor('\lx@frontmatterhere', sub { insertFrontMatter($_[0]); }, - afterDigest => sub { AssignValue(frontmatter_deferred => 1, 'global'); }); - -# Maintain a list of classes that apply to the document root. -# This might involve global style options, like leqno. -Tag('ltx:document', 'afterOpen:late' => sub { - my ($document, $root) = @_; - if (my $classes = join(' ', LookupMappingKeys('DOCUMENT_CLASSES'))) { - $document->addClass($root, $classes); } }); - -# If folks start using plain TeX macros, and never load LaTeX.pool, -# they might benefit from a ltx-plain.css? -DefMacro('\beginsection Until:\par', '\@beginsection{{\bf #1}}'); -DefConstructor('\@beginsection {}', - "#1"); - -# POSSIBLY #1 is a name or reference number and #2 is the theoremm TITLE -# If so, how do know when the theorem ends? -DefMacroI('\proclaim', parseDefParameters('\proclaim', Tokenize('#1. #2\par')), - '\@proclaim{{\bf #1}}{{\sl #2}}'); -DefConstructor('\@proclaim{}{}', - "" - . "#title" - . "#2", - afterConstruct => sub { $_[0]->maybeCloseElement('ltx:theorem'); }, - properties => sub { - my $title = $_[1]; - (title => $title, titlefont => $title->getFont); }); - -#====================================================================== -# Tags & Titles -# The reference numbers, titles, captions etc, for various objects have -# different styling conventions, and the styling various depending on context. -# We'll use ltx:tags as a container for the various forms of ltx:tag with different @role's. -# The role=refnum form is simply formatted by \the and used by \ref; -# An ltx:tag w/o @role are for the numbers, often formatted differently, which -# appear alongside the object; Such a tag also may be embedded within the title or caption. -# Cross-references automatically generated by LaTeXML benefit from a bit more context: -# these are the role=typerefnum forms. -# Additional forms are needed for bibliographies, hyperref's autoref, etc. -# An additional complication is that while the "type" determines the formatting -# of the various forms, some types (eg. theorems) share the same counter. -# LaTeX defines this handling on an adhoc basis; defines \fnum@table, \fnum@figure for some types -# but \labelenumi, etc for others. - -# This section synthesizes a more uniform support for reference numbers, -# references to reference numbers, title formatting etc. -# It allows you to customize each of the forms for each type encountered. -# The design reflects LaTeX needs, more than TeX, but support starts here! - -# This collects up the various declared ltx:tag's into an ltx:tags -DefMacro('\lx@make@tags {}', sub { - my ($gullet, $type) = @_; - my @tags = (); - my $formatters = LookupValue('type_tag_formatter'); - foreach my $role (sort keys %{$formatters}) { - my $formatter = $$formatters{$role}; - push(@tags, Invocation(T_CS('\lx@tag@intags'), T_OTHER($role), - Invocation($formatter, $type))); } - return (T_CS('\lx@tags'), T_BEGIN, @tags, T_END); }); - -# Remove the last closed node, if it's empty. -sub removeEmptyElement { - my ($document, $whatsit) = @_; - my $node = $document->getNode->lastChild; # This should be the wrapper just added. - if (!$node->childNodes) { - $document->removeNode($node); } - return; } - -# \lx@tag[open][close]{stuff} -DefConstructor('\lx@tag[][][]{}', - "#4", - bounded => 1, mode => 'text', - afterConstruct => \&removeEmptyElement); - -# \lx@tag@intags{role}{stuff} -DefConstructor('\lx@tag@intags[]{}', - "#2", - bounded => 1, mode => 'text', - beforeDigest => sub { reenterTextMode(); neutralizeFont() }, - afterConstruct => \&removeEmptyElement); - -DefConstructor('\lx@tags{}', - "#1", - afterConstruct => \&removeEmptyElement); - -#---------------------------------------------------------------------- -# "refnum" is the lowest level reference number for an object is typically \the -# but be sure to use the right counter! This is how \ref will show the number. -# You'll typically customize this by defining \the (and \p@ '\lx@therefnum@@'); - -#---------------------------------------------------------------------- -# \lx@fnum@@{type} Gets the formatted form of the refnum, as part of the object, (no @role). -# Customize by defining \fnum@ or \name and \fnum@font@ -# Default uses \fnum@font@ \name prefix + space (if any) and \the. -# When using the "name", uses \name in preference to fallback \lx@name@ -DefMacro('\lx@refnum@compose{}{}', '\expandafter\lx@refnum@compose@\expandafter{#2}{#1}'); -DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2\space#1\fi'); -####DefMacro('\lx@refnum@compose@{}{}', '\if.#1.#2\else#2~#1\fi'); - -DefMacro('\lx@fnum@@{}', - '{\normalfont\@ifundefined{fnum@font@#1}{}{\csname fnum@font@#1\endcsname}' - . '\@ifundefined{fnum@#1}{\lx@@fnum@@{#1}}{\csname fnum@#1\endcsname}}'); - -# Really seems like name should take precedence over \lx@name@, -# since users might define it. -# BUT amsthm defines \thmname{}! -DefMacro('\lx@@fnum@@ {}', - '\@ifundefined{lx@name@#1}{' - . '\@ifundefined{#1name}{' - . '\lx@the@@{#1}' - . '}{' - . '\lx@refnum@compose{\csname #1name\endcsname}{\lx@the@@{#1}}' - . '}}{' - . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\lx@the@@{#1}}' - . '}'); - -AssignMapping('type_tag_formatter', '' => '\lx@fnum@@'); # Default! - -#---------------------------------------------------------------------- -# \lx@fnum@toc@{type} is similar, but formats the number for use within \toctitle -# Customize by defining \fnum@toc@ or \fnum@tocfont@ -# Default uses just \the, else composes using \lx@@fnum@@{type} -DefMacro('\lx@fnum@toc@@{}', - '{\normalfont\@ifundefined{fnum@tocfont@#1}{}{\csname fnum@tocfont@#1\endcsname}' - . '\@ifundefined{fnum@toc@#1}{\lx@the@@{#1}}{\csname fnum@toc@#1\endcsname}}'); - -#---------------------------------------------------------------------- -# "typerefnum" form is used by automatic cross-references, typically "type number" or similar. -# Customize by defining \typerefnum@ or \typerefnum@font@ -# Default uses either \typerefname or \name (if any, followed by space, then \the -DefMacro('\lx@typerefnum@@{}', - '{\normalfont\@ifundefined{typerefnum@font@#1}{}{\csname typerefnum@font@#1\endcsname}' - . '\@ifundefined{typerefnum@#1}{\lx@@typerefnum@@{#1}}{\csname typerefnum@#1\endcsname}}'); - -DefMacro('\lx@@typerefnum@@{}', - '\@ifundefined{#1typerefname}{' - . '\@ifundefined{lx@name@#1}{' - . '\@ifundefined{#1name}{' - . '}{' - . '\lx@refnum@compose{\csname #1name\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' - . '}}{' - . '\lx@refnum@compose{\csname lx@name@#1\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' - . '}}{' - . '\lx@refnum@compose{\csname #1typerefname\endcsname}{\csname p@#1\endcsname\lx@the@@{#1}}' - . '}'); - -AssignMapping('type_tag_formatter', 'typerefnum' => '\lx@typerefnum@@'); - -#---------------------------------------------------------------------- -# The following macros provide similar customization for titles & toctitles -# in particular for supporting localization for different languages. -# Redefine these if you want to assemble the name (eg. \chaptername), refnum and titles differently -#---------------------------------------------------------------------- -# \lx@format@title@@{type}{title} -# Format a title (or caption) appropriately for type. -# Customize by defining \format@title@type{title} -# Default composes \lx@fnum@@{type} space title. -DefMacro('\lx@format@title@@{}{}', - '\lx@@format@title@@{#1}' - . '{{\lx@format@title@font@@{#1}#2}}'); -DefMacro('\lx@@format@title@@{}{}', - '{\@ifundefined{format@title@#1}' - . '{\lx@@compose@title{\lx@fnum@@{#1}}{#2}}' - . '{\csname format@title@#1\endcsname{#2}}}'); - -# \lx@format@toctitle@@{type}{toctitle} -# Similar for toctitle, typically briefer -# Customize by defining \format@toctitle@type{title} -# Default composes \lx@fnum@toc@@{type} space title. -DefMacro('\lx@format@toctitle@@{}{}', - '\lx@@format@toctitle@@{#1}' - . '{{\lx@format@toctitle@font@@{#1}#2}}'); - -DefMacro('\lx@@format@toctitle@@{}{}', - '{\@ifundefined{format@toctitle@#1}' - . '{\lx@@compose@title{\lx@fnum@toc@@{#1}}{#2}}' - . '{\csname format@toctitle@#1\endcsname{#2}}}'); - -DefMacro('\lx@@compose@title{}{}', '\lx@tag[][ ]{#1}#2'); - -DefMacro('\lx@format@title@font@@{}', - '\@ifundefined{format@title@font@#1}{}{\csname format@title@font@#1\endcsname}'); -DefMacro('\lx@format@toctitle@font@@{}', - '\@ifundefined{format@toctitle@font@#1}{}{\csname format@toctitle@font@#1\endcsname}'); - -## NOTE that a 3rd form seems desirable: an concise form that cannot rely on context for the type. -## This would be useful for the titles in links; thus can be plain (unicode) text. - -#====================================================================== -# TeX Book, Appendix B. p. 356 - -DefPrimitiveI('\raggedright', undef, undef); -DefPrimitiveI('\raggedleft', undef, undef); # this is actually LaTeX -DefPrimitiveI('\ttraggedright', undef, undef); -DefPrimitiveI('\leavevmode', undef, undef); -DefMacro('\mathhexbox{}{}{}', '\leavevmode\hbox{$\m@th \mathchar"#1#2#3$}'); - -#---------------------------------------------------------------------- -# Actually from LaTeX; Table 3.2. Non-English Symbols, p.39 - -# The following shouldn't appear in math. -DefPrimitiveI('\OE', undef, "\x{0152}"); # LATIN CAPITAL LIGATURE OE -DefPrimitiveI('\oe', undef, "\x{0153}"); # LATIN SMALL LIGATURE OE -DefPrimitiveI('\AE', undef, UTF(0xC6)); # LATIN CAPITAL LETTER AE -DefPrimitiveI('\ae', undef, UTF(0xE6)); # LATIN SMALL LETTER AE -DefPrimitiveI('\AA', undef, UTF(0xC5)); # LATIN CAPITAL LETTER A WITH RING ABOVE -DefPrimitiveI('\aa', undef, UTF(0xE5)); # LATIN SMALL LETTER A WITH RING ABOVE -DefPrimitiveI('\O', undef, UTF(0xD8)); # LATIN CAPITAL LETTER O WITH STROKE -DefPrimitiveI('\o', undef, UTF(0xF8)); # LATIN SMALL LETTER O WITH STROKE -DefPrimitiveI('\L', undef, "\x{0141}"); # LATIN CAPITAL LETTER L WITH STROKE -DefPrimitiveI('\l', undef, "\x{0142}"); # LATIN SMALL LETTER L WITH STROKE -DefPrimitiveI('\ss', undef, UTF(0xDF)); # LATIN SMALL LETTER SHARP S - -# apparently the rest can appear in math. -DefPrimitiveI('\lx@sectionsign', undef, UTF(0xa7), alias => '\S'); # SECTION SIGN -DefPrimitiveI('\lx@paragraphsign', undef, UTF(0xB6), alias => '\P'); # PILCROW SIGN -DefMacroI('\S', undef, '\lx@sectionsign'); -DefMacroI('\P', undef, '\lx@paragraphsign'); -DefPrimitiveI('\dag', undef, "\x{2020}"); # DAGGER -DefPrimitiveI('\ddag', undef, "\x{2021}"); # DOUBLE DAGGER -DefPrimitiveI('\copyright', undef, UTF(0xA9)); # COPYRIGHT SIGN -DefPrimitiveI('\pounds', undef, UTF(0xA3)); # POUND SIGN - -#---------------------------------------------------------------------- -# Accents. LaTeX Table 3.1, p.38 -#---------------------------------------------------------------------- -# All of TeX's accents can (sorta) be handled by Unicode's combining accents -# (which follow the character to be accented). -# We'll let unicode normalization do the combination, if needed. -# Also, note that \t is intended to combine multiple chars, but it appears to -# work (via mozilla !?) best when the combining char is after the 1st char. -# Further, the accents \d and \b seem to center the under dot or bar under multiple -# chars --- how should this be handled in Unicode? - -# Since people sometimes try to get fancy by using an empty argument, -# for each, I'm providing the combining code and an equivalent(?) spacing one. -# (doesn't look quite the same to use a combining char after a space) - -# Create a box applying an accent to a letter -# Hopefully, we'll get a Box from digestion with a plain string. -# Then we can apply combining accents to it. -sub applyAccent { - my ($stomach, $letter, $combiningchar, $standalonechar, $reversion) = @_; - my $box = $stomach->digest($letter); - my $locator = $box->getLocator; - my $font = $box->getFont; - my $string = $box->toString; - $string =~ tr/\x{0131}\x{0237}/ij/; - $string =~ s/\s/ /g; - my @letters = split(//, $string); - return Box(($string =~ /^\s*$/ - ? $standalonechar - : NFC($letters[0] . $combiningchar . join('', @letters[1 .. $#letters]))), - $font, $locator, $reversion); } - -# Defines an accent command using a combining char that follows the -# 1st char of the argument. In cases where there is no argument, $standalonechar is used. -sub DefAccent { - my ($accent, $combiningchar, $standalonechar, %options) = @_; - $options{above} = 1 if !(defined $options{above}) && !$options{below}; - # Used for converting a char used as an above-accent to a combining char (See \accent) - AssignMapping('accent_combiner_above', $standalonechar => $combiningchar) if $options{above}; - AssignMapping('accent_combiner_below', $standalonechar => $combiningchar) unless $options{above}; - DefMacroI($accent, "{}", - Tokens(T_CS('\lx@applyaccent'), T_OTHER($accent), - T_OTHER($combiningchar), T_OTHER($standalonechar), - T_BEGIN, T_ARG(1), T_END), - protected => 1); - return; } - -DefPrimitiveI('\lx@applyaccent', "DefToken Token Token {}", sub { - my ($stomach, $accent, $combiningchar, $standalonechar, $letter) = @_; - applyAccent($stomach, $letter, $combiningchar->getString, $standalonechar->getString, - Tokens(T_CS($accent->getString), T_BEGIN, $letter, T_END)); }, - mode => 'text'); - -DefAccent('\`', "\x{0300}", UTF(0x60)); # COMBINING GRAVE ACCENT & GRAVE ACCENT -DefAccent("\\'", "\x{0301}", UTF(0xB4)); # COMBINING ACUTE ACCENT & ACUTE ACCENT -DefAccent('\^', "\x{0302}", UTF(0x5E)); # COMBINING CIRCUMFLEX ACCENT & CIRCUMFLEX ACCENT -DefAccent('\"', "\x{0308}", UTF(0xA8)); # COMBINING DIAERESIS & DIAERESIS -DefAccent('\~', "\x{0303}", "~"); # COMBINING TILDE -DefAccent('\=', "\x{0304}", UTF(0xAF)); # COMBINING MACRON & MACRON -DefAccent('\.', "\x{0307}", "\x{02D9}"); # COMBINING DOT ABOVE & DOT ABOVE -DefAccent('\u', "\x{0306}", "\x{02D8}"); # COMBINING BREVE & BREVE -DefAccent('\v', "\x{030C}", "\x{02C7}"); # COMBINING CARON & CARON -DefAccent('\@ringaccent', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining -DefAccent('\r', "\x{030A}", "o"); # COMBINING RING ABOVE & non-combining -DefAccent('\H', "\x{030B}", "\x{02DD}"); # COMBINING DOUBLE ACUTE ACCENT & non-combining -DefAccent('\c', "\x{0327}", UTF(0xB8), below => 1); # COMBINING CEDILLA & CEDILLA - # NOTE: The next two get define for math, as well; See below -DefAccent('\@text@daccent', "\x{0323}", '.', below => 1); # COMBINING DOT BELOW & DOT (?) -DefAccent('\@text@baccent', "\x{0331}", UTF(0xAF), below => 1); # COMBINING MACRON BELOW & MACRON -DefAccent('\t', "\x{0361}", "-"); # COMBINING DOUBLE INVERTED BREVE & ???? What???? - # this one's actually defined in mathscinet.sty, but just stick it here! -DefAccent('\lfhook', "\x{0326}", ",", below => 1); # COMBINING COMMA BELOW - # I doubt that latter covers multiple chars...? - #DefAccent('\bar',"\x{0304}", ?); # COMBINING MACRON or is this the longer overbar? - -# This will fail if there really are "assignments" after the number! -# We're given a number pointing into the font, from which we can derive the standalone char. -# From that, we want to figure out the combining character, but there could be one for -# both the above & below cases! We'll prefer the above case. -DefPrimitive('\accent Number {}', sub { - my ($stomach, $num, $letter) = @_; - my $n = $num->valueOf; - my $fontinfo = lookupFontinfo(LookupValue('textfont_0')); - my $acc = ($fontinfo && $$fontinfo{encoding} ? FontDecode($n, $$fontinfo{encoding}) : chr($n)); - my $reversion = Invocation(T_CS('\accent'), $num, $letter); - # NOTE: REVERSE LOOKUP in above accent list for the non-spacing accent char - # BUT, \accent always (?) makes an above type accent... doesn't it? - if (my $combiner = LookupMapping('accent_combiner_above', $acc) - || LookupMapping('accent_combiner_below', $acc)) { - applyAccent($stomach, $letter, $combiner, $acc, $reversion); } - else { - Warn('unexpected', "accent$n", $stomach, "Accent '$n' not recognized"); - Box(ToString($letter), undef, undef, $reversion); } }); - -# Note that these two apparently work in Math? BUT the argument is treated as text!!! -DefMacro('\d{}', '\ifmmode\@math@daccent{#1}\else\@text@daccent{#1}\fi'); -DefMacro('\b{}', '\ifmmode\@math@baccent{#1}\else\@text@baccent{#1}\fi'); - -DefConstructor('\@math@daccent {}', - "\x{22c5}" - . "?#textarg(#textarg)(#matharg)" - . "", - mode => 'text', alias => '\d', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $arg = $whatsit->getArg(1); - if ($arg->isMath) { - $whatsit->setProperty(matharg => $arg->getBody); } - else { - $whatsit->setProperty(textarg => $arg); } - return; }); - -DefConstructor('\@math@baccent {}', - "" . UTF(0xAF) . "" - . "?#textarg(#textarg)(#matharg)" - . "", - mode => 'text', alias => '\b', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $arg = $whatsit->getArg(1); - if ($arg->isMath) { - $whatsit->setProperty(matharg => $arg->getBody); } - else { - $whatsit->setProperty(textarg => $arg); } - return; }); - -#====================================================================== -# TeX Book, Appendix B. p. 357 - -foreach my $op ('\hrulefill', '\dotfill', '\rightarrowfill', '\leftarrowfill', - '\upbracefill', '\downbracefill') { - DefPrimitiveI($op, undef, undef); } - -Let('\bye', '\end'); - -Let('\sp', T_SUPER); -Let('\sb', T_SUB); - -DefPrimitiveI('\lx@thinmuskip', undef, sub { - Box("\x{2009}", undef, undef, T_CS('\,'), - name => 'thinspace', isSpace => 1, - width => LookupRegister('\thinmuskip')); }); -DefPrimitiveI('\lx@thinspace', undef, sub { - Box("\x{2009}", undef, undef, T_CS('\,'), - name => 'thinspace', width => Dimension('0.16667em'), isSpace => 1); }); -DefMacroI('\,', undef, '\ifmmode\lx@thinmuskip\else\lx@thinspace\fi', protected => 1); - -DefPrimitiveI('\!', undef, sub { - Box("\x{200B}", undef, undef, T_CS('\!'), # zero width space - name => 'negthinspace', isSpace => 1, - width => LookupRegister('\thinmuskip')->negate); }); -DefPrimitiveI('\>', undef, sub { - Box("\x{2005}", undef, undef, T_CS('\>'), - name => 'medspace', isSpace => 1, - width => LookupRegister('\medmuskip')); }); - -DefPrimitiveI('\;', undef, sub { - Box("\x{2004}", undef, undef, T_CS('\;'), - name => 'thickspace', isSpace => 1, - width => LookupRegister('\thickmuskip')); }); - -Let('\:', '\>'); - -DefPrimitiveI('\ ', undef, sub { - Box(UTF(0xA0), undef, undef, T_CS('\ '), - name => 'space', isSpace => 1, width => Dimension('0.5em')); }); - -DefPrimitiveI("\\\t", undef, sub { - Box(UTF(0xA0), undef, undef, T_CS("\\\t"), - isSpace => 1, width => Dimension('1em')); }); - -DefPrimitiveI('\/', undef, sub { - Box("", undef, undef, T_CS('\/'), - isSpace => 1, name => 'italiccorr', width => Dimension('0em')); }); - -#====================================================================== -# TeX Book, Appendix B. p. 358 - -#---------------------------------------------------------------------- -# Actually from LaTeX; Table 3.3, Greek, p.41 -#---------------------------------------------------------------------- -DefMathI('\alpha', undef, "\x{03B1}"); -DefMathI('\beta', undef, "\x{03B2}"); -DefMathI('\gamma', undef, "\x{03B3}"); -DefMathI('\delta', undef, "\x{03B4}"); -DefMathI('\epsilon', undef, "\x{03F5}"); -DefMathI('\varepsilon', undef, "\x{03B5}"); -DefMathI('\zeta', undef, "\x{03B6}"); -DefMathI('\eta', undef, "\x{03B7}"); -DefMathI('\theta', undef, "\x{03B8}"); -DefMathI('\vartheta', undef, "\x{03D1}"); -DefMathI('\iota', undef, "\x{03B9}"); -DefMathI('\kappa', undef, "\x{03BA}"); -DefMathI('\lambda', undef, "\x{03BB}"); -DefMathI('\mu', undef, "\x{03BC}"); -DefMathI('\nu', undef, "\x{03BD}"); -DefMathI('\xi', undef, "\x{03BE}"); -DefMathI('\pi', undef, "\x{03C0}"); -DefMathI('\varpi', undef, "\x{03D6}"); -DefMathI('\rho', undef, "\x{03C1}"); -DefMathI('\varrho', undef, "\x{03F1}"); -DefMathI('\sigma', undef, "\x{03C3}"); -DefMathI('\varsigma', undef, "\x{03C2}"); -DefMathI('\tau', undef, "\x{03C4}"); -DefMathI('\upsilon', undef, "\x{03C5}"); -DefMathI('\phi', undef, "\x{03D5}"); -DefMathI('\varphi', undef, "\x{03C6}"); -DefMathI('\chi', undef, "\x{03C7}"); -DefMathI('\psi', undef, "\x{03C8}"); -DefMathI('\omega', undef, "\x{03C9}"); -DefMathI('\Gamma', undef, "\x{0393}"); -DefMathI('\Delta', undef, "\x{0394}"); -DefMathI('\Theta', undef, "\x{0398}"); -DefMathI('\Lambda', undef, "\x{039B}"); -DefMathI('\Xi', undef, "\x{039E}"); -DefMathI('\Pi', undef, "\x{03A0}"); -DefMathI('\Sigma', undef, "\x{03A3}"); -DefMathI('\Upsilon', undef, "\x{03A5}"); -DefMathI('\Phi', undef, "\x{03A6}"); -DefMathI('\Psi', undef, "\x{03A8}"); -DefMathI('\Omega', undef, "\x{03A9}"); - -#---------------------------------------------------------------------- -# Actually from LaTeX; Table 3.7. Miscellaneous Symbols, p.43 -#---------------------------------------------------------------------- -# Some should be differential operators, qualifiers, ... -DefMathI('\aleph', undef, "\x{2135}"); -DefMathI('\hbar', undef, "\x{210F}", role => 'ID', meaning => 'Planck-constant-over-2-pi'); -DefMathI('\imath', undef, "\x{0131}"); -DefMathI('\jmath', undef, "\x{0237}"); -DefMathI('\ell', undef, "\x{2113}"); -DefMathI('\wp', undef, "\x{2118}", meaning => 'Weierstrass-p'); -DefMathI('\Re', undef, "\x{211C}", role => 'OPFUNCTION', meaning => 'real-part'); -DefMathI('\Im', undef, "\x{2111}", role => 'OPFUNCTION', meaning => 'imaginary-part'); -DefMathI('\mho', undef, "\x{2127}"); - -DefMathI('\prime', undef, "\x{2032}", role => 'SUPOP', locked => 1); -DefMathI('\emptyset', undef, "\x{2205}", role => 'ID', meaning => 'empty-set'); -DefMathI('\nabla', undef, "\x{2207}", role => 'OPERATOR'); -DefMathI('\surd', undef, "\x{221A}", role => 'OPERATOR', meaning => 'square-root'); -DefMathI('\top', undef, "\x{22A4}", role => 'ADDOP', meaning => 'top'); -DefMathI('\bot', undef, "\x{22A5}", role => 'ADDOP', meaning => 'bottom'); -DefMathI('\|', undef, "\x{2225}", role => 'VERTBAR', name => '||'); -# should get meaning => 'parallel-to' when used as infix, but NOT when for OPEN|CLOSE -DefMathI('\angle', undef, "\x{2220}"); - -# NOTE: This is probably the wrong role. -# Also, should probably carry info about Binding for OpenMath -DefMathI('\forall', undef, "\x{2200}", role => 'BIGOP', meaning => 'for-all'); -DefMathI('\exists', undef, "\x{2203}", role => 'BIGOP', meaning => 'exists'); -DefMathI('\neg', undef, UTF(0xAC), role => 'BIGOP', meaning => 'not'); -DefMathI('\lnot', undef, UTF(0xAC), role => 'BIGOP', meaning => 'not'); -DefMathI('\flat', undef, "\x{266D}"); -DefMathI('\natural', undef, "\x{266E}"); -DefMathI('\sharp', undef, "\x{266F}"); -DefMathI('\backslash', undef, UTF(0x5C), role => 'MULOP'); -DefMathI('\partial', undef, "\x{2202}", role => 'DIFFOP', meaning => 'partial-differential'); - -DefMathI('\infty', undef, "\x{221E}", role => 'ID', meaning => 'infinity'); -DefMathI('\Box', undef, "\x{25A1}"); -DefMathI('\Diamond', undef, "\x{25C7}"); -DefMathI('\triangle', undef, "\x{25B3}"); -DefMathI('\clubsuit', undef, "\x{2663}"); -DefMathI('\diamondsuit', undef, "\x{2662}"); -DefMathI('\heartsuit', undef, "\x{2661}"); -DefMathI('\spadesuit', undef, "\x{2660}"); - -#---------------------------------------------------------------------- -DefMath('\smallint', "\x{222B}", meaning => 'integral', role => 'INTOP', - font => { size => 9 }, scriptpos => \&doScriptpos, mathstyle => 'text'); # INTEGRAL - -#---------------------------------------------------------------------- -# Actually LaTeX; Table 3.8. Variable-sized Symbols, p.44. -#---------------------------------------------------------------------- -sub doScriptpos { - return (LookupValue('font')->getMathstyle eq 'display' ? 'mid' : 'post'); } - -sub doVariablesizeOp { - return (LookupValue('font')->getMathstyle eq 'display' ? 'display' : 'text'); } - -DefMathI('\sum', undef, "\x{2211}", - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'sum', - mathstyle => \&doVariablesizeOp); -DefMathI('\prod', undef, "\x{220F}", - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'product', - mathstyle => \&doVariablesizeOp); -DefMathI('\coprod', undef, "\x{2210}", - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'coproduct', - mathstyle => \&doVariablesizeOp); -DefMathI('\int', undef, "\x{222B}", - role => 'INTOP', - meaning => 'integral', - mathstyle => \&doVariablesizeOp); -DefMathI('\oint', undef, "\x{222E}", - role => 'INTOP', - meaning => 'contour-integral', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigcap', undef, "\x{22C2}", # versus \x{2229} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'intersection', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigcup', undef, "\x{22C3}", # versus \x{222A} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'union', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigsqcup', undef, "\x{2A06}", # versus \x{2294} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'square-union', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigvee', undef, "\x{22C1}", # versus \x{2229} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'or', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigwedge', undef, "\x{22C0}", # versus \x{2227} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'and', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigodot', undef, "\x{2A00}", # versus \x{2299} - role => 'SUMOP', #meaning=> ? - scriptpos => \&doScriptpos, - mathstyle => \&doVariablesizeOp); -DefMathI('\bigotimes', undef, "\x{2A02}", # versus \x{2297} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'tensor-product', - mathstyle => \&doVariablesizeOp); -DefMathI('\bigoplus', undef, "\x{2A01}", # versus \x{2295} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'direct-sum', - mathstyle => \&doVariablesizeOp); -DefMathI('\biguplus', undef, "\x{2A04}", # versus \x{228e} - role => 'SUMOP', - scriptpos => \&doScriptpos, - meaning => 'symmetric-difference', - mathstyle => \&doVariablesizeOp); -DefConstructorI('\limits', undef, '', - afterDigest => sub { mergeLimits('mid'); }, - properties => { isEmpty => 1 }); -DefConstructorI('\nolimits', undef, '', - afterDigest => sub { mergeLimits('post'); }, - properties => { isEmpty => 1 }); -DefConstructorI('\displaylimits', undef, '', - afterDigest => sub { - mergeLimits((($_[1]->getProperty('mathstyle') || '') eq 'display' ? 'mid' : 'post')); }, - properties => { isEmpty => 1 }); - -sub mergeLimits { - my ($pos) = @_; - for (my $i = scalar(@LaTeXML::LIST) - 1 ; $i >= 0 ; $i--) { - my $box = $LaTeXML::LIST[$i]; - my $prev = $box->getProperty('scriptpos') || ''; - my $level = ($prev =~ /^\w*(\d+)$/ ? $1 : $STATE->getStomach->getScriptLevel || ''); - $box->setProperty(scriptpos => $pos . $level); - last unless IsEmpty($box) || IsScript($box); } - return; } - -#---------------------------------------------------------------------- -# Actually from LaTeX; Table 3.4. Binary Operation Symbols, p.42 -#---------------------------------------------------------------------- -DefMathI('\pm', undef, UTF(0xB1), role => 'ADDOP', meaning => 'plus-or-minus'); -DefMathI('\mp', undef, "\x{2213}", role => 'ADDOP', meaning => 'minus-or-plus'); -DefMathI('\times', undef, UTF(0xD7), role => 'MULOP', meaning => 'times'); -DefMathI('\div', undef, UTF(0xF7), role => 'MULOP', meaning => 'divide'); -DefMathI('\ast', undef, "\x{2217}", role => 'MULOP'); -DefMathI('\star', undef, "\x{22C6}", role => 'MULOP'); -DefMathI('\circ', undef, "\x{2218}", role => 'MULOP', meaning => 'compose'); -DefMathI('\bullet', undef, "\x{2219}", role => 'MULOP'); -DefMathI('\cdot', undef, "\x{22C5}", role => 'MULOP'); -## , meaning=>'inner-product'); that's pushing it a bit far... - -# Need to classify set operations more carefully.... -DefMathI('\cap', undef, "\x{2229}", role => 'ADDOP', meaning => 'intersection'); -DefMathI('\cup', undef, "\x{222A}", role => 'ADDOP', meaning => 'union'); -DefMathI('\uplus', undef, "\x{228E}", role => 'ADDOP'); -DefMathI('\sqcap', undef, "\x{2293}", role => 'ADDOP', meaning => 'square-intersection'); -DefMathI('\sqcup', undef, "\x{2294}", role => 'ADDOP', meaning => 'square-union'); -DefMathI('\vee', undef, "\x{2228}", role => 'ADDOP', meaning => 'or'); -DefMathI('\lor', undef, "\x{2228}", role => 'ADDOP', meaning => 'or'); -DefMathI('\wedge', undef, "\x{2227}", role => 'ADDOP', meaning => 'and'); -DefMathI('\land', undef, "\x{2227}", role => 'ADDOP', meaning => 'and'); -DefMathI('\setminus', undef, "\x{2216}", role => 'ADDOP', meaning => 'set-minus'); -DefMathI('\wr', undef, "\x{2240}", role => 'MULOP'); - -# Should this block be ADDOP or something else? -DefMathI('\diamond', undef, "\x{22C4}", role => 'ADDOP'); -DefMathI('\bigtriangleup', undef, "\x{25B3}", role => 'ADDOP'); -DefMathI('\bigtriangledown', undef, "\x{25BD}", role => 'ADDOP'); -DefMathI('\triangleleft', undef, "\x{25C1}", role => 'ADDOP'); -DefMathI('\triangleright', undef, "\x{25B7}", role => 'ADDOP'); -DefMathI('\lhd', undef, "\x{22B2}", role => 'ADDOP', meaning => 'subgroup-of'); -DefMathI('\rhd', undef, "\x{22B3}", role => 'ADDOP', meaning => 'contains-as-subgroup'); -DefMathI('\unlhd', undef, "\x{22B4}", role => 'ADDOP', meaning => 'subgroup-of-or-equals'); -DefMathI('\unrhd', undef, "\x{22B5}", role => 'ADDOP', meaning => 'contains-as-subgroup-or-equals'); - -DefMathI('\oplus', undef, "\x{2295}", role => 'ADDOP', meaning => 'direct-sum'); -DefMathI('\ominus', undef, "\x{2296}", role => 'ADDOP', meaning => 'symmetric-difference'); -DefMathI('\otimes', undef, "\x{2297}", role => 'MULOP', meaning => 'tensor-product'); -DefMathI('\oslash', undef, "\x{2298}", role => 'MULOP'); -DefMathI('\odot', undef, "\x{2299}", role => 'MULOP', meaning => 'direct-product'); -DefMathI('\bigcirc', undef, "\x{25CB}", role => 'MULOP'); -DefMathI('\dagger', undef, "\x{2020}", role => 'MULOP'); -DefMathI('\ddagger', undef, "\x{2021}", role => 'MULOP'); -DefMathI('\amalg', undef, "\x{2210}", role => 'MULOP', meaning => 'coproduct'); - -#---------------------------------------------------------------------- -# LaTeX; Table 3.5. Relation Symbols, p.43 -#---------------------------------------------------------------------- -DefMathI('\leq', undef, "\x{2264}", role => 'RELOP', meaning => 'less-than-or-equals'); -DefMathI('\prec', undef, "\x{227A}", role => 'RELOP', meaning => 'precedes'); -DefMathI('\preceq', undef, "\x{2AAF}", role => 'RELOP', meaning => 'precedes-or-equals'); -DefMathI('\ll', undef, "\x{226A}", role => 'RELOP', meaning => 'much-less-than'); -DefMathI('\subset', undef, "\x{2282}", role => 'RELOP', meaning => 'subset-of'); -DefMathI('\subseteq', undef, "\x{2286}", role => 'RELOP', meaning => 'subset-of-or-equals'); -DefMathI('\sqsubset', undef, "\x{228F}", role => 'RELOP', meaning => 'square-image-of'); -DefMathI('\sqsubseteq', undef, "\x{2291}", role => 'RELOP', meaning => 'square-image-of-or-equals'); -DefMathI('\in', undef, "\x{2208}", role => 'RELOP', meaning => 'element-of'); -DefMathI('\vdash', undef, "\x{22A2}", role => 'METARELOP', meaning => 'proves'); - -DefMathI('\geq', undef, "\x{2265}", role => 'RELOP', meaning => 'greater-than-or-equals'); -DefMathI('\succ', undef, "\x{227B}", role => 'RELOP', meaning => 'succeeds'); -DefMathI('\succeq', undef, "\x{2AB0}", role => 'RELOP', meaning => 'succeeds-or-equals'); -DefMathI('\gg', undef, "\x{226B}", role => 'RELOP', meaning => 'much-greater-than'); -DefMathI('\supset', undef, "\x{2283}", role => 'RELOP', meaning => 'superset-of'); -DefMathI('\supseteq', undef, "\x{2287}", role => 'RELOP', meaning => 'superset-of-or-equals'); -DefMathI('\sqsupset', undef, "\x{2290}", role => 'RELOP', meaning => 'square-original-of'); -DefMathI('\sqsupseteq', undef, "\x{2292}", role => 'RELOP', meaning => 'square-original-of-or-equals'); -DefMathI('\ni', undef, "\x{220B}", role => 'RELOP', meaning => 'contains'); -DefMathI('\dashv', undef, "\x{22A3}", role => 'METARELOP', meaning => 'does-not-prove'); - -# I have the impression think that "identical" is a stronger notion than "equivalence" -# Note that the unicode here is called "Identical To", -# and that the notion of "equivalent to" usually involves the tilde operator. -DefMathI('\equiv', undef, "\x{2261}", role => 'RELOP', meaning => 'equivalent-to'); -DefMathI('\sim', undef, "\x{223C}", role => 'RELOP', meaning => 'similar-to'); -DefMathI('\simeq', undef, "\x{2243}", role => 'RELOP', meaning => 'similar-to-or-equals'); -DefMathI('\asymp', undef, "\x{224D}", role => 'RELOP', meaning => 'asymptotically-equals'); -DefMathI('\approx', undef, "\x{2248}", role => 'RELOP', meaning => 'approximately-equals'); -DefMathI('\cong', undef, "\x{2245}", role => 'RELOP', meaning => 'approximately-equals'); -DefMathI('\neq', undef, "\x{2260}", role => 'RELOP', meaning => 'not-equals'); -DefMathI('\doteq', undef, "\x{2250}", role => 'RELOP', meaning => 'approaches-limit'); -DefMathI('\notin', undef, "\x{2209}", role => 'RELOP', meaning => 'not-element-of'); - -DefMathI('\models', undef, "\x{22A7}", role => 'RELOP', meaning => 'models'); -DefMathI('\perp', undef, "\x{27C2}", role => 'RELOP', meaning => 'perpendicular-to'); -DefMathI('\mid', undef, "\x{2223}", role => 'VERTBAR'); # DIVIDES (RELOP?) ?? well, sometimes... -DefMathI('\parallel', undef, "\x{2225}", role => 'VERTBAR', meaning => 'parallel-to'); -DefMathI('\bowtie', undef, "\x{22C8}", role => 'RELOP'); # BOWTIE -DefMathI('\Join', undef, "\x{2A1D}", role => 'RELOP', meaning => 'join'); -DefMathI('\smile', undef, "\x{2323}", role => 'RELOP'); # SMILE -DefMathI('\frown', undef, "\x{2322}", role => 'RELOP'); # FROWN -DefMathI('\propto', undef, "\x{221D}", role => 'RELOP', meaning => 'proportional-to'); - -# TeX defines these as alternate names... -Let('\le', '\leq'); -Let('\ge', '\geq'); -Let('\ne', '\neq'); -# And it defines some others as alternate names, but they seem to -# potentially imply slightly different meanings??? Leave them out for now.. - -#---------------------------------------------------------------------- -# Not; (Is fullwidth solidus appropriate for when \not appears in isolation?) -DefMathI('\not', undef, "\x{FF0F}", role => 'OPFUNCTION', meaning => 'not'); -# Match negations of many operators -our %NOTS = ('=' => "\x{2260}", '<' => "\x{226E}", '>' => "\x{226F}", - "\x{2208}" => "\x{2209}", #\in=>\notin - "\x{2264}" => "\x{2270}", "\x{2265}" => "\x{2271}", # Less eq, greater eq. - "\x{227A}" => "\x{2280}", "\x{227B}" => "\x{2281}", # prec, succ - "\x{2AAF}" => "\x{22E0}", "\x{2AB0}" => "\x{22E1}", # preceq, succeq - "\x{2282}" => "\x{2284}", "\x{2283}" => "\x{2285}", # subset, supset - "\x{2286}" => "\x{2288}", "\x{2287}" => "\x{2289}", # subseteq, supseteq - "\x{2291}" => "\x{22E2}", "\x{2290}" => "\x{22E3}", # sqsubseteq, sqsupseteq - "\x{2261}" => "\x{2262}", # equiv - "\x{224D}" => "\x{226D}", "\x{2248}" => "\x{2249}", # asymp, approx - "\x{22B2}" => "\x{22EA}", "\x{22B3}" => "\x{22EB}", # lhd, rhd - "\x{22B4}" => "\x{22EC}", "\x{22B5}" => "\x{22ED}", # unlhd, unrhd - "\x{2203}" => "\x{2204}", # Exists -); - -# For a \not operator that is followed by anything, concoct an appropriate not or cancelation. -DefRewrite(select => ["descendant-or-self::ltx:XMTok[text()='\x{FF0F}' and \@meaning='not']" - . "[ following-sibling::*]", 2], - replace => sub { - my ($doc, $not, $thing) = @_; - my $text = ($doc->getModel->getNodeQName($thing) eq 'ltx:XMTok') - && $thing->textContent; - - if ((!defined $text) || (length($text) != 1)) { # Not simple char token. - my $box = $doc->getNodeBox($not); - $doc->openElement('ltx:XMApp', _box => $box); # Wrap with a cancel op - my $strike = $doc->insertMathToken(undef, role => 'ENCLOSE', enclose => 'updiagonalstrike', - meaning => 'not', _box => $box); - if (my $id = $not->getAttribute('xml:id')) { - $not->removeAttribute('xml:id'); - $doc->unRecordID($id); - $doc->setAttribute($strike, 'xml:id' => $id); } - $doc->getNode->appendChild($thing); - $doc->closeElement('ltx:XMApp'); } - else { - # For simple tokens, we'll modify the relevant content & attributes - # [children removed, id's presumably ignorable] - map { $_->unbindNode() } $thing->childNodes; - my $new = defined $NOTS{$text} ? $NOTS{$text} : $text . "\x{0338}"; - $thing->appendText($new); - if (my $meaning = $thing->getAttribute('meaning')) { - $doc->setAttribute($thing, meaning => "not-$meaning"); } - if (my $name = $thing->getAttribute('name') || $text) { - $doc->setAttribute($thing, name => "not-$name"); } - # and put the node back in - $doc->getNode->appendChild($thing); - # Since the element is disappearing, if it had an id that was referenced...!?!? - if (my $id = $not->getAttribute('xml:id')) { - foreach my $n ($doc->findnodes("descendant-or-self::ltx:XMRef[\@idref='$id']")) { - $doc->removeNode($n); } } # ? Hopefully this is safe. -} }); - -#---------------------------------------------------------------------- -# \joinrel -DefMathI('\relbar', undef, "-", role => 'RELOP'); # ??? -DefMathI('\Relbar', undef, "=", role => 'RELOP'); # ??? - -# \joinrel is \mathrel{\mkern-3\mu} -# Ah, but the Effect is to join 2 "relations" into one! -DefPrimitiveI('\joinrel', undef, sub { - my ($stomach, $op) = @_; - my $gullet = $stomach->getGullet; - $gullet->skipSpaces; - my $left = $LaTeXML::LIST[-1]; - if (!$left) { # Nothing there?... - return (); } # I guess this becomes a no-op??? - else { - pop(@LaTeXML::LIST); - my @stuff = (); - while (my $tok = $gullet->readXToken(0)) { - @stuff = $stomach->invokeToken($tok); - last if @stuff; } - return () unless @stuff; # no-op ???? - my $right = shift(@stuff); - (@stuff, - LaTeXML::Core::Whatsit->new(LookupDefinition(T_CS('\@@joinrel')), [$left, $right], - locator => $gullet->getLocator, - font => $right->getFont, isMath => 1)); } }); - -DefConstructor('\@@joinrel{}{}', sub { - my ($document, $left, $right) = @_; - $document->absorb($left); - $document->absorb($right); - # Now if last 2 items are XMTok, replace by a single token with joined content (& attr?) - my $node = $document->getNode; - my @nodes = $document->getChildElements($node); - if (scalar(@nodes) >= 2) { - my @rels = ($nodes[-2], $nodes[-1]); - if (grep { $document->getNodeQName($_) eq 'ltx:XMTok' } @rels) { - my %roles = (); - map { $roles{ $_->getAttribute('role') } = 1 } @rels; - my $role = (scalar(keys %roles) == 1 ? [keys %roles]->[0] : ($roles{ARROW} ? 'ARROW' : 'RELOP')); - map { $node->removeChild($_) } @rels; - $document->insertElement('ltx:XMTok', [map { $_->textContent } @rels], role => $role); - } } }, - reversion => '#1\joinrel #2'); - -#---------------------------------------------------------------------- -# LaTeX; Table 3.6. Arrow Symbols, p.43 -#---------------------------------------------------------------------- -# Arrows get treated somewhat like relations (or meta-relations), -# but it's hard to associate any particular "meaning" to them. - -DefMathI('\leftarrow', undef, "\x{2190}", role => 'ARROW'); # LEFTWARDS ARROW -DefMathI('\Leftarrow', undef, "\x{21D0}", role => 'ARROW'); # LEFTWARDS DOUBLE ARROW -DefMathI('\rightarrow', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW -DefMathI('\Rightarrow', undef, "\x{21D2}", role => 'ARROW'); # RIGHTWARDS DOUBLE ARROW -DefMathI('\leftrightarrow', undef, "\x{2194}", role => 'METARELOP'); # LEFT RIGHT ARROW -DefMathI('\Leftrightarrow', undef, "\x{21D4}", role => 'METARELOP'); # LEFT RIGHT DOUBLE ARROW -DefMathI('\iff', undef, "\x{21D4}", role => 'METARELOP', meaning => 'iff'); # LEFT RIGHT DOUBLE ARROW -DefMathI('\mapsto', undef, "\x{21A6}", role => 'ARROW', meaning => 'maps-to'); -DefMathI('\hookleftarrow', undef, "\x{21A9}", role => 'ARROW'); # LEFTWARDS ARROW WITH HOOK -DefMathI('\leftharpoonup', undef, "\x{21BC}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB UPWARDS -DefMathI('\leftharpoondown', undef, "\x{21BD}", role => 'ARROW'); # LEFTWARDS HARPOON WITH BARB DOWNWARDS -DefMathI('\rightleftharpoons', undef, "\x{21CC}", role => 'METARELOP'); # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON -DefMathI('\longleftarrow', undef, "\x{27F5}", role => 'ARROW'); # LONG LEFTWARDS ARROW -DefMathI('\Longleftarrow', undef, "\x{27F8}", role => 'ARROW'); # LONG LEFTWARDS DOUBLE ARROW -DefMathI('\longrightarrow', undef, "\x{27F6}", role => 'ARROW'); # LONG RIGHTWARDS ARROW -DefMathI('\Longrightarrow', undef, "\x{27F9}", role => 'ARROW'); # LONG RIGHTWARDS DOUBLE ARROW -DefMathI('\longleftrightarrow', undef, "\x{27F7}", role => 'METARELOP'); # LONG LEFT RIGHT ARROW -DefMathI('\Longleftrightarrow', undef, "\x{27FA}", role => 'METARELOP'); # LONG LEFT RIGHT DOUBLE ARROW -DefMathI('\longmapsto', undef, "\x{27FC}", role => 'ARROW'); # LONG RIGHTWARDS ARROW FROM BAR -DefMathI('\hookrightarrow', undef, "\x{21AA}", role => 'ARROW'); # RIGHTWARDS ARROW WITH HOOK -DefMathI('\rightharpoonup', undef, "\x{21C0}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB UPWARDS -DefMathI('\rightharpoondown', undef, "\x{21C1}", role => 'ARROW'); # RIGHTWARDS HARPOON WITH BARB DOWNWARDS -DefMathI('\leadsto', undef, "\x{219D}", role => 'ARROW', meaning => 'leads-to'); - -DefMathI('\uparrow', undef, "\x{2191}", role => 'ARROW'); # UPWARDS ARROW -DefMathI('\Uparrow', undef, "\x{21D1}", role => 'ARROW'); # UPWARDS DOUBLE ARROW -DefMathI('\downarrow', undef, "\x{2193}", role => 'ARROW'); # DOWNWARDS ARROW -DefMathI('\Downarrow', undef, "\x{21D3}", role => 'ARROW'); # DOWNWARDS DOUBLE ARROW -DefMathI('\updownarrow', undef, "\x{2195}", role => 'ARROW'); # UP DOWN ARROW -DefMathI('\Updownarrow', undef, "\x{21D5}", role => 'ARROW'); # UP DOWN DOUBLE ARROW -DefMathI('\nearrow', undef, "\x{2197}", role => 'ARROW'); # NORTH EAST ARROW -DefMathI('\searrow', undef, "\x{2198}", role => 'ARROW'); # SOUTH EAST ARROW -DefMathI('\swarrow', undef, "\x{2199}", role => 'ARROW'); # SOUTH WEST ARROW -DefMathI('\nwarrow', undef, "\x{2196}", role => 'ARROW'); # NORTH WEST ARROW - -# \mapstochar (3237), \lhook(312C), \rhook(312D) -# These are really wrong; I can't find the right Unicode Glyphs. -# These are only fragments intended to be assembled into meaningful(?) symbols. -DefMathI('\mapstochar', undef, "\x{2E20}"); # TeX 3237 -DefMathI('\lhook', undef, "\x{2E26}"); # TeX 312C -DefMathI('\rhook', undef, "\x{2E27}"); # TeX 312D - -#====================================================================== -# TeX Book, Appendix B. p. 359 - -# Ah, since \ldots can appear in text and math.... -DefMacroI('\ldots', undef, '\lx@ldots'); -DefConstructorI('\lx@ldots', undef, - "?#isMath(\x{2026})(\x{2026})", - sizer => "\x{2026}", - reversion => '\ldots', - properties => sub { - (LookupValue('IN_MATH') - ? (font => LookupValue('font')->merge(family => 'serif', - series => 'medium', shape => 'upright')->specialize("\x{2026}")) - : ()); }); # Since not DefMath! - # And so can \vdots -DefConstructorI('\vdots', undef, - "?#isMath(\x{22EE})(\x{22EE})", - sizer => "\x{22EE}", - properties => sub { - (LookupValue('IN_MATH') - ? (font => LookupValue('font')->merge(family => 'serif', - series => 'medium', shape => 'upright')->specialize("\x{22EE}")) - : ()); }); # Since not DefMath! - # But not these! -DefMathI('\cdots', undef, "\x{22EF}", role => 'ID'); # MIDLINE HORIZONTAL ELLIPSIS - -DefMathI('\ddots', undef, "\x{22F1}", role => 'ID'); # DOWN RIGHT DIAGONAL ELLIPSIS -DefMathI('\colon', undef, ':', role => 'METARELOP'); # Seems like good default role - # Note that amsmath redefines \dots to be `smart'. - # Aha, also can be in text... -DefConstructorI('\dots', undef, - "?#isMath(\x{2026})(\x{2026})", - sizer => "\x{2026}", - properties => sub { - (LookupValue('IN_MATH') - ? (font => LookupValue('font')->merge(family => 'serif', - series => 'medium', shape => 'upright')->specialize("\x{2026}")) - : ()); }); # Since not DefMath! - -# And while we're at it... - -# Pretest for XMath to keep from interpreting math that the DOM may not allow!! -##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots'); - -DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots'); - -DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => sub { $_[0]->getFamily ne 'typewriter'; }); # ldots - -#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots'); -DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots'); - -#---------------------------------------------------------------------- -# Math Accents. -#---------------------------------------------------------------------- -# LaTeX; Table 3.11. Math Mode Accents, p.50. -# Are these all TeX (or LaTeX)? -# Note that most of these should NOT be stretchy, by default! -DefMath('\hat Digested', UTF(0x5E), - operator_role => 'OVERACCENT', operator_stretchy => 'false'); -DefMath('\check Digested', "\x{02C7}", - operator_role => 'OVERACCENT', operator_stretchy => 'false'); # CARON -DefMath('\breve Digested', "\x{02D8}", operator_role => 'OVERACCENT'); # BREVE -DefMath('\acute Digested', UTF(0xB4), operator_role => 'OVERACCENT'); # ACUTE ACCENT -DefMath('\grave Digested', UTF(0x60), operator_role => 'OVERACCENT'); # GRAVE ACCENT -DefMath('\tilde Digested', UTF(0x7E), - operator_role => 'OVERACCENT', operator_stretchy => 'false'); # TILDE -DefMath('\bar Digested', UTF(0xAF), - operator_role => 'OVERACCENT', operator_stretchy => 'false'); # MACRON -DefMath('\vec Digested', "\x{2192}", - operator_role => 'OVERACCENT', operator_stretchy => 'false'); # RIGHTWARDS ARROW -DefMath('\dot Digested', "\x{02D9}", operator_role => 'OVERACCENT'); # DOT ABOVE -DefMath('\ddot Digested', UTF(0xA8), operator_role => 'OVERACCENT'); # DIAERESIS -DefMath('\overline Digested', UTF(0xAF), operator_role => 'OVERACCENT'); # MACRON -DefMath('\widehat Digested', UTF(0x5E), operator_role => 'OVERACCENT'); # CIRCUMFLEX ACCENT [plain? also amsfonts] -DefMath('\widetilde Digested', UTF(0x7E), operator_role => 'OVERACCENT'); # TILDE [plain? also amsfonts] -# These aren't handled as simple accents by TeX, so no Digested -DefMath('\overbrace {}', "\x{23DE}", operator_role => 'OVERACCENT', # TOP CURLY BRACKET - scriptpos => 'mid', robust => 1); -DefMath('\underbrace {}', "\x{23DF}", operator_role => 'UNDERACCENT', # BOTTOM CURLY BRACKET - scriptpos => 'mid', robust => 1); - -# NOTE that all the above accents REQUIRE math mode -# EXCEPT underline, overrightarrow and overleftarrow! - -DefMath('\math@underline{}', UTF(0xAF), operator_role => 'UNDERACCENT', - name => 'underline', alias => '\underline'); -DefConstructor('\text@underline{}', "#1"); -DefMath('\math@overrightarrow{}', "\x{2192}", operator_role => 'OVERACCENT', - name => 'overrightarrow', alias => '\overrightarrow'); -DefMath('\math@overleftarrow{}', "\x{2190}", operator_role => 'OVERACCENT', - name => 'overleftarrow', alias => '\overleftarrow'); - -# Careful: Use \protect so that it doesn't expand too early in alignments, etc. -DefMacro('\underline{}', '\protect\ifmmode\math@underline{#1}\else\text@underline{#1}\fi'); -Let('\underbar', '\underline'); # Will anyone notice? - -DefMacro('\overrightarrow{}', '\protect\ifmmode\math@overrightarrow{#1}\else$\math@overrightarrow{#1}$\fi'); -DefMacro('\overleftarrow{}', '\protect\ifmmode\math@overleftarrow{#1}\else$\math@overleftarrow{#1}$\fi'); - -DefMacro('\skew{}{}{}', '{#2{#3\mkern#1mu}\mkern-#1mu}{}'); # ignore the subtle spacing for now? - #---------------------------------------------------------------------- - # LaTeX; Table 3.10. Delimiters, p.47 - #---------------------------------------------------------------------- - # The meaning of OPEN/CLOSE tends to depend upon the pairing, - # rather than the individual tokens. - # This meaning is handled in MathParser (for now) - -DefMacroI('\{', undef, '\ifmmode\lx@math@lbrace\else\lx@text@lbrace\fi', protected => 1); -DefMacroI('\}', undef, '\ifmmode\lx@math@rbrace\else\lx@text@rbrace\fi', protected => 1); -DefMathI('\lx@math@lbrace', undef, '{', role => 'OPEN', stretchy => 'false', alias => '\{'); -DefMathI('\lx@math@rbrace', undef, '}', role => 'CLOSE', stretchy => 'false', alias => '\}'); -DefPrimitiveI('\lx@text@lbrace', undef, '{', alias => '\{', - # font => { specialize => "{" }); - font => { shape => 'upright' }, bounded => 1); # Since not DefMath! -DefPrimitiveI('\lx@text@rbrace', undef, '}', alias => '\}', - # font => { specialize => "}" }); # Since not DefMath! - font => { shape => 'upright' }, bounded => 1); # Since not DefMath! -Let('\lbrace', '\{'); -Let('\lbrack', T_OTHER('[')); -Let('\rbrace', '\}'); -Let('\rbrack', T_OTHER(']')); -DefMathI('\lceil', undef, "\x{2308}", role => 'OPEN', stretchy => 'false'); # LEFT CEILING -DefMathI('\rceil', undef, "\x{2309}", role => 'CLOSE', stretchy => 'false'); # RIGHT CEILING -DefMathI('\lfloor', undef, "\x{230A}", role => 'OPEN', stretchy => 'false'); # LEFT FLOOR -DefMathI('\rfloor', undef, "\x{230B}", role => 'CLOSE', stretchy => 'false'); # RIGHT FLOOR - # Note: We should be using 27E8,27E9, which are "mathematical", not 2329,232A -DefMathI('\langle', undef, "\x{27E8}", role => 'OPEN', stretchy => 'false'); # LEFT-POINTING ANGLE BRACKET -DefMathI('\rangle', undef, "\x{27E9}", role => 'CLOSE', stretchy => 'false'); # RIGHT-POINTING ANGLE BRACKET - -# Not sure these should be defined here, or latex, or even latex compat mode. -DefMathI('\lgroup', undef, "(", font => { series => 'bold' }, role => 'OPEN', stretchy => 'false'); -DefMathI('\rgroup', undef, ")", font => { series => 'bold' }, role => 'CLOSE', stretchy => 'false'); -DefMathI('\bracevert', undef, "|", font => { series => 'bold' }, role => 'VERTBAR'); - -## DefMath('\lmoustache',"???", font=>{series=>'bold'}, role=>'OPEN'); -## DefMath('\rmoustache',"???", font=>{series=>'bold'}, role=>'OPEN'); - -# TeX marks some symbols as delimiters which can be used with \left,\right, -# but many of which have different grammatical roles otherwise, eg. arrows, <, >. -# Short of setting up TeX's complicated encoding machinery, I need an explicit -# mapping. Unfortunately, this doesn't (yet) support people declaring thier own delimiters! - -# This duplicates in slightly different way what DefMath has put together. -our %DELIMITER_MAP = - ('(' => { char => "(", lrole => 'OPEN', rrole => 'CLOSE' }, - ')' => { char => ")", lrole => 'OPEN', rrole => 'CLOSE' }, - '[' => { char => "[", lrole => 'OPEN', rrole => 'CLOSE' }, - ']' => { char => "]", lrole => 'OPEN', rrole => 'CLOSE' }, - '\{' => { char => "{", lrole => 'OPEN', rrole => 'CLOSE' }, - '\}' => { char => "}", lrole => 'OPEN', rrole => 'CLOSE' }, - '\lfloor' => { char => "\x{230A}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lfloor' }, - '\rfloor' => { char => "\x{230B}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rfloor' }, - '\lceil' => { char => "\x{2308}", lrole => 'OPEN', rrole => 'CLOSE', name => 'lceil' }, - '\rceil' => { char => "\x{2309}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rceil' }, - '\langle' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' }, - '\rangle' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' }, - '<' => { char => "\x{27E8}", lrole => 'OPEN', rrole => 'CLOSE', name => 'langle' }, - '>' => { char => "\x{27E9}", lrole => 'OPEN', rrole => 'CLOSE', name => 'rangle' }, - '/' => { char => "/", lrole => 'MULOP', rrole => 'MULOP' }, - '\backslash' => { char => UTF(0x5C), lrole => 'MULOP', rrole => 'MULOP', name => 'backslash' }, - '|' => { char => "|", lrole => 'VERTBAR', rrole => 'VERTBAR' }, - '\|' => { char => "\x{2225}", lrole => 'VERTBAR', rrole => 'VERTBAR' }, - '\uparrow' => { char => "\x{2191}", lrole => 'OPEN', rrole => 'CLOSE', name => 'uparrow' }, # ?? - '\Uparrow' => { char => "\x{21D1}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Uparrow' }, # ?? - '\downarrow' => { char => "\x{2193}", lrole => 'OPEN', rrole => 'CLOSE', name => 'downarrow' }, # ?? - '\Downarrow' => { char => "\x{21D3}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Downarrow' }, # ?? - '\updownarrow' => { char => "\x{2195}", lrole => 'OPEN', rrole => 'CLOSE', name => 'updownarrow' }, # ?? - '\Updownarrow' => { char => "\x{21D5}", lrole => 'OPEN', rrole => 'CLOSE', name => 'Updownarrow' }, # ?? - ); - -# With new treatment of Simple Symbols as just Box's with assigned attributes, -# we're not getting whatsits, and so we're not looking them up the same way!!! -# TEMPORARILY (?) hack the Delimiter map -foreach my $entry (values %DELIMITER_MAP) { - $DELIMITER_MAP{ $$entry{char} } = $entry; } - -sub lookup_delimiter { - my ($delim) = @_; - return $DELIMITER_MAP{$delim}; } - -# This is a little messier than you'd think. -# These effectively create a group between the \left,\right. -# And this also gives us a single list of things to parse separately. -# Since \left,\right are TeX, primitives and must be paired up, -# we use a bit of macro trickery to simulate. -# [The \@hidden@bgroup/egroup keep from putting a {} into the UnTeX] -# HOWEVER, an additional complication is that it is a common mistake to omit the balancing \right! -# Using an \egroup (or hidden) makes it hard to recover, so use a special egroup -DefMacro('\left XToken', '\@left #1\@hidden@bgroup'); -# Like \@hidden@egroup, but softer about missing \left -DefConstructor('\right@hidden@egroup', '', - afterDigest => sub { - my ($stomach) = @_; - if ($STATE->isValueBound('MODE', 0) # Last stack frame was a mode switch!?!?! - || $STATE->lookupValue('groupNonBoxing')) { # or group was opened with \begingroup - Error('unexpected', '\right', undef, "Unbalanced \\right, no balancing \\left."); } - else { - $stomach->egroup; } }, - reversion => ''); - -DefMacro('\right XToken', '\right@hidden@egroup\@right #1'); - -DefConstructor('\@left Token', - "?#char(#char)" - . "(?#hint()(#1))", - afterDigest => sub { my ($stomach, $whatsit) = @_; - my $arg = $whatsit->getArg(1); - my $delim = ToString($arg); - if ($delim eq '.') { - $whatsit->setProperty(hint => 1); } - elsif (my $entry = $DELIMITER_MAP{$delim}) { - $whatsit->setProperties(role => $$entry{lrole}, - char => $$entry{char}, - name => $$entry{name}, - stretchy => 'true'); - $whatsit->setFont($arg->getFont()); } - elsif (($arg->getProperty('role') || '') eq 'OPEN') { - $arg->setProperty(stretchy => 'true'); } - else { - Warn('unexpected', $delim, $stomach, - "Missing delimiter; '.' inserted"); } - return; }, - alias => '\left'); -DefConstructor('\@right Token', - "?#char(#char)" - . "(?#hint()(#1))", - afterDigest => sub { my ($stomach, $whatsit) = @_; - my $arg = $whatsit->getArg(1); - my $delim = ToString($arg); - if ($delim eq '.') { - $whatsit->setProperty(hint => 1); } - elsif (my $entry = $DELIMITER_MAP{$delim}) { - $whatsit->setProperties(role => $$entry{rrole}, - char => $$entry{char}, - name => $$entry{name}, - stretchy => 'true'); - $whatsit->setFont($arg->getFont()); } - elsif (($arg->getProperty('role') || '') eq 'CLOSE') { - $arg->setProperty(stretchy => 'true'); } - else { - Warn('unexpected', $delim, $stomach, - "Missing delimiter; '.' inserted)"); } - return; }, - alias => '\right'); - -# These originally had Token as parameter, rather than {}..... Why? -# Note that in TeX, \big{((} will only enlarge the 1st paren!!! -DefConstructor('\big {}', '#1', bounded => 1, font => { size => 'big' }); -DefConstructor('\Big {}', '#1', bounded => 1, font => { size => 'Big' }); -DefConstructor('\bigg {}', '#1', bounded => 1, font => { size => 'bigg' }); -DefConstructor('\Bigg {}', '#1', bounded => 1, font => { size => 'Bigg' }); - -sub addDelimiterRole { - my ($document, $role) = @_; - my $current = $document->getNode; - my $delim = $document->getLastChildElement($current) || $current; - my $delim_role = (($delim && ($delim->nodeType == XML_ELEMENT_NODE) && $delim->getAttribute('role')) || ''); - # if there is some delimiter-like role on the "delimiter", switch it, otherwise, leave it alone! - if ($delim && ($delim_role =~ /^(OPEN|MIDDLE|CLOSE|VERTBAR|)$/)) { - ## Maybe we shouldn't switch VERTBAR ? - ## The catch is that occasionally people use a single \Bigl (or whatever) - ## where they should have used a \Big - $document->setAttribute($delim, role => $role); } - return; } - -# The "m" versions are defined in e-Tex and other places. -DefConstructor('\bigl {}', '#1', bounded => 1, font => { size => 'big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); -DefConstructor('\bigm {}', '#1', bounded => 1, font => { size => 'big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); -DefConstructor('\bigr {}', '#1', bounded => 1, font => { size => 'big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); - -DefConstructor('\Bigl {}', '#1', bounded => 1, font => { size => 'Big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); -DefConstructor('\Bigm {}', '#1', bounded => 1, font => { size => 'Big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); -DefConstructor('\Bigr {}', '#1', bounded => 1, font => { size => 'Big' }, - afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); - -DefConstructor('\biggl {}', '#1', bounded => 1, font => { size => 'bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); -DefConstructor('\biggm {}', '#1', bounded => 1, font => { size => 'bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); -DefConstructor('\biggr {}', '#1', bounded => 1, font => { size => 'bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); - -DefConstructor('\Biggl {}', '#1', bounded => 1, font => { size => 'Bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'OPEN'); }); -DefConstructor('\Biggm {}', '#1', bounded => 1, font => { size => 'Bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'MIDDLE'); }); -DefConstructor('\Biggr {}', '#1', bounded => 1, font => { size => 'Bigg' }, - afterConstruct => sub { addDelimiterRole($_[0], 'CLOSE'); }); - -Let('\vert', T_OTHER('|')); -Let('\Vert', '\|'); - -#====================================================================== -# TeX Book, Appendix B. p. 360 - -# \choose, et al, already handle above. -# Note that in TeX, all 4 args get digested(!) -# and the choice is made when absorbing! -DefConstructor('\mathchoice Digested Digested Digested Digested', sub { - my ($document, $d, $t, $s, $ss, %props) = @_; - my $style = $props{mathstyle}; - my $choice = ($style eq 'display' ? $d - : ($style eq 'text' ? $t - : ($style eq 'script' ? $s - : $ss))); - $document->absorb($choice); }, - properties => { mathstyle => sub { LookupValue('font')->getMathstyle; } }); - -DefMacro('\mathpalette{}{}', - '\mathchoice{#1\displaystyle{#2}}{#1\textstyle{#2}}' - . '{#1\scriptstyle{#2}}{#1\scriptscriptstyle{#2}}'); - -DefConstructor('\phantom{}', - "?#isMath()" - . "(#1)", # !?!?!?! - properties => { isSpace => 1 }, - afterDigest => sub { - my $whatsit = $_[1]; - my ($w, $h, $d) = $whatsit->getArg(1)->getSize; - $whatsit->setProperties(width => $w, height => $h, depth => $d); - return; }); - -DefConstructor('\hphantom{}', - "?#isMath()" - . "(#1)", # !?!?!?! - properties => { isSpace => 1 }, - afterDigest => sub { - my $whatsit = $_[1]; - my ($w, $h, $d) = $whatsit->getArg(1)->getSize; - $whatsit->setProperties(width => $w, height => $h, depth => $d); - return; }); - -DefConstructor('\vphantom{}', - "?#isMath()" - . "(#1)", # !?!?!?! - properties => { isSpace => 1 }, - afterDigest => sub { - my $whatsit = $_[1]; - my ($w, $h, $d) = $whatsit->getArg(1)->getSize; - $whatsit->setProperties(width => $w, height => $h, depth => $d); - return; }); - -DefConstructor('\mathstrut', "?#isMath()()", - properties => { isSpace => 1 }); -DefConstructor('\smash{}', "#1"); # well, what? - -#====================================================================== -# TeX Book, Appendix B. p. 361 - -# This is actually LaTeX's definition, but let's just do it this way. -DefConstructor('\sqrt OptionalInScriptStyle Digested', - "?#1(" - . "#1#2" - . ")" - . "(" - . "#2)"); - -DefParameterType('ScriptStyleUntil', sub { - my ($gullet, $until) = @_; - $gullet->readUntil($until); }, - beforeDigest => sub { - $_[0]->bgroup; - MergeFont(mathstyle => 'script'); }, - afterDigest => sub { - $_[0]->egroup; }, - reversion => sub { (T_BEGIN, Revert($_[0]), T_END); }); - -DefConstructor('\root ScriptStyleUntil:\of {}', - "" - . "#1#2" - . "", - reversion => '\root #1 \of {#2}'); - -#---------------------------------------------------------------------- -# LaTeX; Table 3.9. Log-like Functions, p.44. -#---------------------------------------------------------------------- -# NOTE: Classifying some as TRIGFUNCTION might clarify 'pi' ambiguities ? -DefMathI('\arccos', undef, "arccos", role => 'OPFUNCTION', meaning => 'inverse-cosine'); -DefMathI('\arcsin', undef, "arcsin", role => 'OPFUNCTION', meaning => 'inverse-sine'); -DefMathI('\arctan', undef, "arctan", role => 'OPFUNCTION', meaning => 'inverse-tangent'); -DefMathI('\arg', undef, "arg", role => 'OPFUNCTION', meaning => 'argument'); - -DefMathI('\cos', undef, "cos", role => 'TRIGFUNCTION', meaning => 'cosine'); -DefMathI('\cosh', undef, "cosh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cosine'); -DefMathI('\cot', undef, "cot", role => 'TRIGFUNCTION', meaning => 'cotangent'); -DefMathI('\coth', undef, "coth", role => 'TRIGFUNCTION', meaning => 'hyperbolic-cotangent'); - -DefMathI('\csc', undef, "csc", role => 'TRIGFUNCTION', meaning => 'cosecant'); -DefMathI('\deg', undef, "deg", role => 'OPFUNCTION', meaning => 'degree'); -DefMathI('\det', undef, "det", role => 'LIMITOP', meaning => 'determinant', - scriptpos => \&doScriptpos); -DefMathI('\dim', undef, "dim", role => 'LIMITOP', meaning => 'dimension'); - -DefMathI('\exp', undef, "exp", role => 'OPFUNCTION', meaning => 'exponential'); -DefMathI('\gcd', undef, "gcd", role => 'OPFUNCTION', meaning => 'gcd', - scriptpos => \&doScriptpos); -DefMathI('\hom', undef, "hom", role => 'OPFUNCTION'); -DefMathI('\inf', undef, "inf", role => 'LIMITOP', meaning => 'infimum', - scriptpos => \&doScriptpos); - -DefMathI('\ker', undef, "ker", role => 'OPFUNCTION', meaning => 'kernel'); -DefMathI('\lg', undef, "lg", role => 'OPFUNCTION'); -DefMathI('\lim', undef, "lim", role => 'LIMITOP', meaning => 'limit', - scriptpos => \&doScriptpos); -DefMathI('\liminf', undef, "lim inf", role => 'LIMITOP', meaning => 'limit-infimum', - scriptpos => \&doScriptpos); - -DefMathI('\limsup', undef, "lim sup", role => 'LIMITOP', meaning => 'limit-supremum', - scriptpos => \&doScriptpos); -DefMathI('\ln', undef, "ln", role => 'OPFUNCTION', meaning => 'natural-logarithm'); -DefMathI('\log', undef, "log", role => 'OPFUNCTION', meaning => 'logarithm'); -DefMathI('\max', undef, "max", role => 'OPFUNCTION', meaning => 'maximum', - scriptpos => \&doScriptpos); - -DefMathI('\min', undef, "min", role => 'OPFUNCTION', meaning => 'minimum', - scriptpos => \&doScriptpos); -DefMathI('\Pr', undef, "Pr", role => 'OPFUNCTION', scriptpos => \&doScriptpos); -DefMathI('\sec', undef, "sec", role => 'TRIGFUNCTION', meaning => 'secant'); -DefMathI('\sin', undef, "sin", role => 'TRIGFUNCTION', meaning => 'sine'); - -DefMathI('\sinh', undef, "sinh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-sine'); -DefMathI('\sup', undef, "sup", role => 'LIMITOP', meaning => 'supremum', - scriptpos => \&doScriptpos); -DefMathI('\tan', undef, "tan", role => 'TRIGFUNCTION', meaning => 'tangent'); -DefMathI('\tanh', undef, "tanh", role => 'TRIGFUNCTION', meaning => 'hyperbolic-tangent'); - -#---------------------------------------------------------------------- -# Modulo - -DefMath('\pmod{}', '\;\;(\mathop{{\rm mod}} #1)', role => 'MODIFIER'); # , meaning=>'modulo'); -DefMath('\bmod', 'mod', role => 'MODIFIEROP', meaning => 'modulo'); - -#====================================================================== -# TeX Book, Appendix B. p. 362 - -#---------------------------------------------------------------------- -# Matrices; Generalized - -# The delimiters around a matrix may simply be notational, or for readability, -# and don't affect the "meaning" of the array structure as a matrix. -# In that case, we'll use an XMDual to indidate the content is simply the matrix, -# but the presentation includes the delimiters. -# HOWEVER, the delimeters may also signify an OPERATION on the matrix -# in which case the application & meaning of that operator must be supplied. - -# keys are -# name : the name of the environment (for reversion) -# datameaning: the (presumed) meaning of the array construct (typically 'matrix') -# delimitermeaning : the operator meaning due to delimiters (eg. norm)(as applied to the array) -# style : typically \displaystyle, \textstyle... -# left : TeX code for left of matrix -# right : TeX code for right -# ncolumns : the number of columns (default is not limited) -DefKeyVal('lx@GEN', 'style', 'UndigestedKey'); - -DefPrimitive('\lx@gen@matrix@bindings RequiredKeyVals:lx@GEN', sub { - my ($stomach, $kv) = @_; - $stomach->bgroup; - my $style = $kv->getValue('style') || T_CS('\textstyle'); - my $align = ToString($kv->getValue('alignment')) || 'c'; - # We really should be using ReadAlignmentTemplate (LaTeXML::Core::Alignment) - # but we'd have to convert it to a repeating spec somehow. - my @colspec = (before => Tokens(($align =~ /^(?:c|r)/ ? (T_CS('\hfil')) : ()), $style), - after => Tokens(($align =~ /^(?:c|l)/ ? (T_CS('\hfil')) : ()))); - my $ncols = ToString($kv->getValue('ncolumns')); - my %attributes = (); - foreach my $key (qw(rowsep)) { # Probably more? - if (my $value = $kv->getValue($key)) { - $attributes{$key} = $value; } } - alignmentBindings(LaTeXML::Core::Alignment::Template->new( - ($ncols ? (columns => [map { { @colspec } } 1 .. $ncols]) - : (repeated => [{@colspec}]))), - 'math', - (keys %attributes ? (attributes => {%attributes}) : ())); # }); - Let("\\\\", '\@alignment@newline'); - Let('\lx@intercol', '\lx@math@intercol'); - Let('\@row@before', '\@empty'); # Disable special row treatment (eg. numbering) unless requested - Let('\@row@after', '\@empty'); -}); - -DefPrimitive('\lx@end@gen@matrix', sub { $_[0]->egroup; }); - -DefMacro('\lx@gen@plain@matrix{}{}', - '\lx@gen@matrix@bindings{#1}' - . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\@finish@alignment}' - # . '\lx@gen@plain@matrix@{#1}{\@start@alignment#2\cr\@finish@alignment}' - . '\lx@end@gen@matrix'); - -# The delimiters on a matrix are presumably just for notation or readability (not an operator); -# the array data itself is the matrix. -DefConstructor('\lx@gen@plain@matrix@ RequiredKeyVals:lx@GEN {}', - "?#needXMDual(" - . "" - . "?#delimitermeaning()()" - . "?#datameaning()()" - . "" - . "?#delimitermeaning()()" - . "?#datameaning()()" - . "#left#2#right" - . "" - . ")(" - . "#2" - . ")", - properties => sub { %{ $_[1]->getKeyVals }; }, - reversion => sub { - my ($whatsit, $kv, $body) = @_; - my $name = ToString($kv->getValue('name')); - my $alignment = $whatsit->getProperty('alignment'); -## (T_CS('\\' . $name), T_BEGIN, Revert($body), T_END); }, -## (T_CS('\\' . $name), T_BEGIN, Revert($alignment), T_END); }, - (T_CS('\\' . $name), T_BEGIN, $alignment->revert, T_END); }, - - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $kv = $whatsit->getArg(1); - if ($kv->getValue('datameaning') || $kv->getValue('delimitermeaning')) { - $whatsit->setProperties( - needXMDual => 1, - xmkey => LaTeXML::Package::getXMArgID()); } - $whatsit->setProperties(alignment => LookupValue('Alignment')); - return; }); - -DefMacro('\matrix{}', - '\lx@gen@plain@matrix{name=matrix,datameaning=matrix}{#1}'); - -DefMacro('\bordermatrix{}', # Semantics? - '\lx@hack@bordermatrix{\lx@gen@plain@matrix{name=bordermatrix}{#1}}'); -# HACK the newly created border matrix to add columns for the (spanned) parentheses!!! -# Assume (for now) that there's no XMDual structure here. -# What is the semantics, anyway? -DefConstructor('\lx@hack@bordermatrix{}', sub { - my ($document, $matrix) = @_; - $document->absorb($matrix); - my $marray = $document->getNode->lastChild; - my @rows = $document->findnodes('ltx:XMRow', $marray); - my ($h, $d) = (10.0 * $UNITY, 0); # 10pts. - # Contrived, since $matrix may be a List or... - my ($alignment) = grep { $_ } map { $_->getProperty('alignment') } $matrix->unlist; - if ($alignment) { - my $arrayh = $alignment->getHeight->ptValue; - my ($row0, $row1) = $alignment->rows; # What's row 0 ? - $h = $$row1{y}->valueOf; - $d = $h - $arrayh; } - my $md = Dimension(-$d); - $h = Dimension($h); $d = Dimension($d); - - foreach my $row (@rows) { # Add empty cells for 2nd & last colum - $document->openElementAt($row, 'ltx:XMCell'); - $document->openElementAt($row, 'ltx:XMCell'); - $row->insertAfter($row->lastChild, $row->firstChild); # Move to 2nd pos! - } - my @cols = element_nodes($rows[1]); - my $col1 = $cols[1]; - my $coln = $cols[-1]; - my $n = scalar(@rows) - 1; - $col1->setAttribute(rowspan => $n); - $coln->setAttribute(rowspan => $n); - my $pfont = $STATE->lookupValue('font')->specialize('('); - $document->appendTree($col1, - ['ltx:XMWrap', { depth => $d }, - ['ltx:XMTok', { role => 'OPEN', height => 0, depth => $d, yoffset => $md, font => $pfont }, '('], - ['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]); # Effectively, a strut - $document->appendTree($coln, - ['ltx:XMWrap', {}, - ['ltx:XMTok', { role => 'CLOSE', height => 0, depth => $d, yoffset => $md, font => $pfont }, ')'], - ['ltx:XMTok', { height => $h, yoffset => $md, font => $pfont }, ' ']]); - return; }, - reversion => '#1'); - -DefMacro('\pmatrix{}', - '\lx@gen@plain@matrix{name=pmatrix,datameaning=matrix,left=\@left(,right=\@right)}{#1}'); - -#---------------------------------------------------------------------- -# Cases: Generalized -# keys are -# name : the name of the command (for reversion) -# meaning: the (presumed) meaning of the construct -# style : \textstyle or \displaystyle -# conditionmode : mode of 2nd column, text or math -# left : TeX code for left of cases -# right : TeX code for right - -DefConstructorI('\lx@cases@condition', undef, - "#body", - alias => '', beforeDigest => sub { $_[0]->beginMode('text'); }, captureBody => 1); -DefConstructorI('\lx@cases@end@condition', undef, "", alias => '', - beforeDigest => sub { $_[0]->endMode('text'); }); - -DefPrimitive('\lx@gen@cases@bindings RequiredKeyVals:lx@GEN', sub { - my ($stomach, $kv) = @_; - $stomach->bgroup; - my $style = $kv->getValue('style') || T_CS('\textstyle'); - $style = T_CS($style) unless ref $style; - my @mode = (ToString($kv->getValue('conditionmode')) eq 'text' - ? (T_MATH) : ()); - my $condtext = ToString($kv->getValue('conditionmode')) eq 'text'; - alignmentBindings(LaTeXML::Core::Alignment::Template->new( - columns => [ - { before => Tokens($style), after => Tokens(T_CS('\hfil')) }, - { before => Tokens($style, - ($condtext ? (T_CS('\lx@cases@condition')) : ())), - after => Tokens(T_CS('\lx@column@trimright'), - ($condtext ? (T_CS('\lx@cases@end@condition')) : ()), - T_CS('\hfil')) }]), - 'math'); - Let("\\\\", '\@alignment@newline'); - Let('\lx@intercol', '\lx@math@intercol'); - DefMacro('\@row@before', ''); # Don't inherit counter stepping from containing environments - DefMacro('\@row@after', ''); -}); - -DefMacro('\lx@gen@plain@cases{}{}', - '\lx@gen@cases@bindings{#1}' - . '\lx@gen@plain@cases@{#1}{\@start@alignment#2\@finish@alignment}' - . '\lx@end@gen@cases'); -DefPrimitive('\lx@end@gen@cases', sub { $_[0]->egroup; }); - -# The logical structure for cases extracts the columns of the alignment -# to give alternating value,condition (an empty condition is replaced by "otherwise" !?!?!) -DefConstructor('\lx@gen@plain@cases@ RequiredKeyVals:lx@GEN {}', - '#left#2#right', - properties => sub { %{ $_[1]->getKeyVals }; }, - afterConstruct => sub { - my ($document) = @_; - if (my $point = $document->getElement->lastChild) { - # Get the sequence of alternating (case, condition). - # Expecting ltx:XMArray/ltx:XMRow/ltx:XMCell [should have /ltx:XMArg, but could be empty!!!] - my @cells = $document->findnodes('ltx:XMArray/ltx:XMRow/ltx:XMCell', $point); - my @stuff = map { ($_->hasChildNodes ? createXMRefs($document, element_nodes($_)) - : ['ltx:XMText', {}, 'otherwise']) } @cells; - $document->replaceTree(['ltx:XMDual', {}, - ['ltx:XMApp', {}, ['ltx:XMTok', { meaning => 'cases' }], @stuff], - $point], - $point); } }, - reversion => sub { - my ($whatsit, $kv, $body) = @_; - my $name = $kv->getValue('name'); - (T_CS('\cases'), T_BEGIN, Revert($body), T_END); }); - -# Note that 2nd column in \cases is in text mode! -DefMacro('\cases{}', - '\lx@gen@plain@cases{meaning=cases,left=\@left\{,conditionmode=text,style=\textstyle}{#1}'); - -#---------------------------------------------------------------------- -DefPrimitive('\openup Dimension', undef); - -# What should this do? (needs to work with alignments..) -# see https://www.tug.org/TUGboat/tb07-1/tb14beet.pdf -# use in arXiv:hep-th/0001208 -DefMacro('\displaylines{}', '\halign{\hbox to\displaywidth{$\hfil\displaystyle##\hfil$}\crcr#1\crcr}'); - -DefMacro('\eqalign{}', - '\@@eqalign{\@start@alignment#1\@finish@alignment}'); -DefConstructor('\@@eqalign{}', - '#1', - reversion => '\eqalign{#1}', bounded => 1, - beforeDigest => sub { alignmentBindings('rl', 'math', - attributes => { vattach => 'baseline' }); }); - -DefMacro('\eqalignno{}', - '\@@eqalignno{\@start@alignment#1\@finish@alignment}'); -DefConstructor('\@@eqalignno{}', - '#1', - reversion => '\eqalignno{#1}', bounded => 1, - beforeDigest => sub { alignmentBindings('rll', 'math', - attributes => { vattach => 'baseline' }); }); - -DefMacro('\leqalignno{}', - '\@@leqalignno{\@start@alignment#1\@finish@alignment}'); -DefConstructor('\@@leqalignno{}', - '#1', - reversion => '\leqalignno{#1}', bounded => 1, - beforeDigest => sub { alignmentBindings('rll', 'math', - attributes => { vattach => 'baseline' }); }); - -DefRegister('\pageno' => Number(0)); -DefRegister('\headline' => Tokens()); -DefRegister('\footline' => Tokens()); -DefMacroI('\folio', undef, "1"); # What else? - -DefPrimitiveI('\nopagenumbers', undef, undef); -DefMacroI('\advancepageno', undef, '\advance\pageno1\relax'); - -#====================================================================== -# TeX Book, Appendix B. p. 363 - -DefPrimitive('\raggedbottom', undef); -DefPrimitive('\normalbottom', undef); - -# if the mark is not simple, we add it to the content of the note -# otherwise, to the attribute. -DefConstructor('\footnote{}{}', - "^?#prenote(#prenote )()#2", - mode => 'text', bounded => 1, - beforeDigest => sub { reenterTextMode(1); neutralizeFont(); }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $mark = $whatsit->getArg(1); - my $change = 0; - foreach my $token (Revert($mark)) { - unless ($token->getCatcode == CC_LETTER || $token->getCatcode == CC_SPACE || - $token->getCatcode == CC_OTHER) { - $change = 1; last; } } - $whatsit->setProperty(($change ? 'prenote' : 'mark') => $mark); - return; }); -# Until we can do the "v" properly: -DefMacro('\vfootnote', '\footnote'); -DefMacro('\fo@t', '\ifcat\bgroup\noexpand\next \let\next\f@@t \else\let\next\f@t\fi \next'); -DefMacro('\f@@t', '\bgroup\aftergroup\@foot\let\next'); -DefMacro('\f@t{}', '#1\@foot'); -DefMacro('\@foot', '\strut\egroup'); - -DefPrimitiveI('\footstrut', undef, undef); -DefRegister('\footins' => Number(0)); - -DefPrimitiveI('\topinsert', undef, undef); -DefPrimitiveI('\midinsert', undef, undef); -DefPrimitiveI('\pageinsert', undef, undef); -DefPrimitiveI('\endinsert', undef, undef); -# \topins ? - -#====================================================================== -# TeX Book, Appendix B. p. 364 - -# Let's hope nobody is messing with the output routine... - -DefPrimitiveI('\footnoterule', undef, undef); - -#====================================================================== -# End of TeX Book definitions. -#====================================================================== - -#********************************************************************** -# Stray stuff .... where to ? -#********************************************************************** - -# Mostly ignorable, although it could add an attribute to an ancestor -# to record the desired justification. -# Spacing stuff -DefConstructor('\@', ''); -# Math spacing. - -# Math style. -# Also record that this explicitly sets the mathstyle (support for \over, etal) -DefPrimitiveI('\displaystyle', undef, sub { - MergeFont(mathstyle => 'display'); - Box(undef, undef, undef, T_CS('\displaystyle'), explicit_mathstyle => 1); }); -DefPrimitiveI('\textstyle', undef, sub { - MergeFont(mathstyle => 'text'); - Box(undef, undef, undef, T_CS('\textstyle'), explicit_mathstyle => 1); }); -DefPrimitiveI('\scriptstyle', undef, sub { - MergeFont(mathstyle => 'script'); - Box(undef, undef, undef, T_CS('\scriptstyle'), explicit_mathstyle => 1); }); -DefPrimitiveI('\scriptscriptstyle', undef, sub { - MergeFont(mathstyle => 'scriptscript'); - Box(undef, undef, undef, T_CS('\scriptscriptstyle'), explicit_mathstyle => 1); }); - -#====================================================================== - -# Special Characters. -# Try to give them some sense in math... -DefMacroI('\#', undef, '\ifmmode\lx@math@hash\else\lx@text@hash\fi', protected => 1); -DefMacroI('\&', undef, '\ifmmode\lx@math@amp\else\lx@text@amp\fi', protected => 1); -DefMacroI('\%', undef, '\ifmmode\lx@math@percent\else\lx@text@percent\fi', protected => 1); -DefMacroI("\\\$", undef, '\ifmmode\lx@math@dollar\else\lx@text@dollar\fi', protected => 1); -DefMacroI('\_', undef, '\ifmmode\lx@math@underscore\else\lx@text@underscore\fi', protected => 1); -DefPrimitiveI('\lx@text@hash', undef, '#', alias => '\#'); -DefPrimitiveI('\lx@text@amp', undef, '&', alias => '\&'); -DefPrimitiveI('\lx@text@percent', undef, '%', alias => '\%'); -DefPrimitiveI('\lx@text@dollar', undef, "\$", alias => "\\\$"); -DefPrimitiveI('\lx@text@underscore', undef, '_', alias => '\_'); -DefMathI('\lx@math@hash', undef, '#', alias => '\#'); -DefMathI('\lx@math@amp', undef, '&', role => 'ADDOP', meaning => 'and', alias => '\&'); -DefMathI('\lx@math@percent', undef, '%', role => 'POSTFIX', meaning => 'percent', alias => '\%'); -DefMathI('\lx@math@dollar', undef, "\$", role => 'OPERATOR', meaning => 'currency-dollar', - alias => "\\\$"); -DefMathI('\lx@math@underscore', undef, '_', alias => '\_'); - -# Discretionary times; just treat as invisible ? -DefMathI('\*', undef, "\x{2062}", role => 'MULOP', name => '', meaning => 'times'); # INVISIBLE TIMES (or MULTIPLICATION SIGN = 00D7) - -# These 3 should have some `name' assigned ... but what??? - -# Is XMWrap the right thing to wrap with (instead of XMArg)? -# We can't really assume that the stuff inside is sensible math. -# NOTE that \mathord and \mathbin aren't really right here. -# We need a finer granularity than TeX does: an ORD could be several things, -# a BIN could be a MULOP or ADDOP. -# AND, rarely, they're empty.... Is it wrong to drop them? -DefConstructor('\mathord{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathop{}', "?#1(#1)()", - bounded => 1, properties => { scriptpos => \&doScriptpos }); -DefConstructor('\mathbin{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathrel{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathopen{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathclose{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathpunct{}', "?#1(#1)()", bounded => 1); -DefConstructor('\mathinner{}', "?#1(#1)()", bounded => 1); - -# If an XMWrap (presumably from \mathop, \mathbin, etc) -# has multiple children, ALL are XMTok, within a restricted set of roles, -# we want to concatenate the text content into a single XMTok. -DefMathRewrite(xpath => 'descendant-or-self::ltx:XMWrap[' - # Only XMWrap's from the above class of operators - . '(@role="OP" or @role="BIGOP" or @role="RELOP" ' - . 'or @role="ADDOP" or @role="MULOP" or @role="BINOP" ' - . 'or @role="OPEN" or @role="CLOSE")' - . ' and count(child::*) > 1 ' - # with only XMTok as children with the roles in (roughly) the same set - . ' and not(child::*[local-name() != "XMTok"])' - . ' and not(ltx:XMTok[' - . '@role !="OP" and @role!="BIGOP" and @role!="RELOP" and @role!="METARELOP" ' - . 'and @role!="ADDOP" and @role!="MULOP" and @role!="BINOP" ' - . 'and @role!="OPEN" and @role!="CLOSE"' - . '])]', - replace => sub { - my ($document, $node) = @_; - my $replacement = $node->cloneNode(0); - my $content = $node->textContent; - $replacement->appendText($content); - $replacement->setName('ltx:XMTok'); - $document->getNode->appendChild($replacement); - }); - -DefMacro('\hiderel{}', "#1"); # Just ignore, for now... - -DefMathI('\to', undef, "\x{2192}", role => 'ARROW'); # RIGHTWARDS ARROW??? a bit more explicitly relation-like? - -# TeX's ligatures handled by rewrite regexps. -# Note: applied in reverse order of definition (latest defined applied first!) -# Note also, these area only applied in text content, not in attributes! -DefPrimitive('\@@endash', sub { Box("\x{2013}", undef, undef, T_CS('\@@endash')); }); -DefPrimitive('\@@emdash', sub { Box("\x{2014}", undef, undef, T_CS('\@@emdash')); }); - -sub nonTypewriter { - my ($font) = @_; - return ($font->getFamily ne 'typewriter'); } - -sub nonTypewriterT1 { - my ($font) = @_; - return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); } - -# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH}) -DefLigature(qr{--}, "\x{2013}", fontTest => \&nonTypewriter); # EN dash -DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter); # EM dash - -# Ligatures for doubled single left & right quotes to convert to double quotes -# [should ligatures be part of a font, in the first place? (it is in TeX!) -DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1); # double left quote -DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1); # double right quote -DefLigature(qr{\?\x{2018}}, UTF(0xBF), fontTest => \&nonTypewriterT1); # ? backquote -DefLigature(qr{!\x{2018}}, UTF(0xA1), fontTest => \&nonTypewriterT1); # ! backquote -# These ligatures are also handled by TeX. -# However, it appears that decent modern fonts in modern browsers handle these at that level. -# So it's likely not worth doing it at the conversion level, possibly adversely affecting search. -# DefLigature(qr{ff}, "\x{FB00}", fontTest => \&nonTypewriterT1); -# DefLigature(qr{fi}, "\x{FB01}", fontTest => \&nonTypewriterT1); -# DefLigature(qr{fl}, "\x{FB02}", fontTest => \&nonTypewriterT1); -# DefLigature(qr{ffi}, "\x{FB03}", fontTest => \&nonTypewriterT1); -# DefLigature(qr{ffl}, "\x{FB04}", fontTest => \&nonTypewriterT1); - -DefConstructor('\TeX', - "", - sizer => sub { (Dimension('1.9em'), Dimension('1.6ex'), Dimension('0.5ex')); }); -DefPrimitiveI('\i', undef, "\x{0131}"); # LATIN SMALL LETTER DOTLESS I -DefPrimitiveI('\j', undef, "\x{0237}"); - -DefConstructor('\buildrel Until:\over {}', - "" - . "" - . "#2" - . "#1" - . "", - properties => { scriptpos => sub { "mid" . $_[0]->getScriptLevel; } }); - -#********************************************************************** -# LaTeX Hook -#********************************************************************** -# This is used for plain TeX, but needs to be undone for LaTeX (or...)! -RelaxNGSchema("LaTeXML"); -Tag('ltx:section', autoClose => 1); -Tag('ltx:document', autoClose => 1, autoOpen => 1); -Tag('ltx:document', afterOpen => sub { - my ($document, $root) = @_; - if (my $font = $document->getNodeFont($root)) { - if (my $bg = $font->getBackground) { - if ($bg ne 'white') { - $document->setAttribute($root, backgroundcolor => $bg); } } } }); - -# No, \documentclass isn't really a primitive -- It's not even TeX! -# But we define a number of stubs here that will automatically load -# the LaTeX pool (or AmSTeX.pool) (which will presumably redefine them), and then -# stuff the token back to be reexecuted. -foreach my $ltxtrigger (qw(documentclass - newcommand renewcommand newenvironment renewenvironment - NeedsTeXFormat ProvidesFile - ProvidesPackage RequirePackage PassOptionsToPackage - makeatletter makeatother - typeout begin listfiles nofiles)) { - DefAutoload($ltxtrigger, 'LaTeX.pool.ltxml'); } - -foreach my $ltx3trigger (qw(ExplSyntaxOn - ProvidesExplClass ProvidesExplPackage)) { - # DG: note that these auto-loads are not perfect -- - # if they are triggered with a raw .sty file for example, - # the expl3 support will "expire" at the end of the current scope, - # and e.g. \ExplSyntaxOn will once again be undefined. - DefAutoload($ltx3trigger, 'expl3.pool.ltxml'); } - -# Seemingly good candidates to trigger AmSTeX ?? -foreach my $amstrigger (qw(BlackBoxes NoBlackBoxes - TagsAsMath TagsAsText TagsOnLeft TagsOnRight CenteredTagsOnSplits TopOrBottomTagsOnSplits - LimitsOnInts NoLimitsOnInts LimitsOnNames NoLimitsOnNames LimitsOnSums NoLimitsOnSums - loadbold loadeufb loadeufm loadeurb loadeurm loadeusb - loadeusm loadmathfont loadmsam loadmsbm)) { - DefAutoload($amstrigger, 'AmSTeX.pool.ltxml'); } - -# Darn; we need to be even more clever, since we need to simulate an amstex command, as well. -# For example \documentstyle[...]{amsppt} must switch to AMSTeX mode, _NOT_ LaTeX mode!!!! -DefMacro('\documentstyle OptionalSemiverbatim SkipSpaces Semiverbatim', sub { - my ($gullet, $options, $class) = @_; - LoadPool((ToString($class) =~ /^amsppt$/ ? "AmSTeX" : "LaTeX")); - (T_CS('\\documentstyle'), - ($options ? (T_OTHER('['), $options->unlist, T_OTHER(']')) : ()), - T_BEGIN, $class->unlist, T_END); }); - -# Technically should be in LaTeX.pool, but we try to maintain the bookkeeping from the very start, -# in order to avoid partially defined behavior when --preload directives are mixed with \usepackage{} loads -DefMacro('\@pushfilename', '\xdef\@currnamestack{{\@currname}{\@currext}{\the\catcode`\@}\@currnamestack}'); -DefMacro('\@popfilename', '\expandafter\@p@pfilename\@currnamestack\@nil'); -DefMacro('\@p@pfilename {}{}{} Until:\@nil', - '\gdef\@currname{#1}% - \gdef\@currext{#2}% - \catcode`\@#3\relax - \gdef\@currnamestack{#4}'); -DefMacroI(T_CS('\@currnamestack'), undef, Tokens()); -Let('\@currname', '\@empty'); -Let('\@currext', '\@empty'); - -#********************************************************************** -# LaTeXML Specific. -# Support for Declarations & Presentation/Semantic Duality -#********************************************************************** - -#====================================================================== -# Normally definitions disappear; the macros are expanded or have their expected effect. -# But in a few cases (eg tabular column definitions, or LaTeX \Declarexxxx) -# they will need declarations in the (La)TeX preamble to allow (La)TeX to process snippets -# (eg. math) in order to create images. -# Returning a call to this utility from Primitives will add a preamble Processing Instruction -sub AddToPreamble { - my ($cs, @args) = @_; - return Digest(Invocation(T_CS('\lx@add@Preamble@PI'), Invocation((ref $cs ? $cs : T_CS($cs)), @args))); } - -DefConstructor('\lx@add@Preamble@PI Undigested', - ""); - -#====================================================================== -# Support for constructing mathematical expressions - -# Common XMath pattern for assigning attributes from Whatsit properties. -our $XMath_attributes = - " role='#role' name='#name' meaning='#meaning' omcd='#omcd'" - . " width='#width' height='#height' xoffset='#xoffset' yoffset='#yoffset'" - . " lpadding='#lpadding' rpadding='#rpadding'"; - -sub XMath_copy_keyvals { - my ($stomach, $whatsit) = @_; - my $kv = $whatsit->getArg(1); - $whatsit->setProperties($kv->getPairs) if $kv; - return; } - -# Build an ltx:XMApp, application of function/operator to arguments -# first piece of (TeX) argument is expected to be the operator -# Usually used on content side, but at least the arguments should be properly encapsulated: -# They should build individual subtrees; use ltx::XMArg, ltx:XMWrap ... if needed -DefConstructor('\lx@apply OptionalKeyVals:XMath {}{}', - "#2#3", - reversion => '#2#3', - afterDigest => sub { XMath_copy_keyvals(@_); }); - -# Build an ltx:XMTok, a mathematical symbol, with given attributes -# the argument should create text to be the content of the token. -DefConstructor('\lx@symbol OptionalKeyVals:XMath {}', - "#2", - reversion => '#2', - afterDigest => sub { - $_[1]->setFont($_[1]->getArg(2)->getFont); - XMath_copy_keyvals(@_); }); - -# Wrap the contents in an ltx:XMWrap, to stand as a single subtree & providing attributes -# The ltx:XMWrap may be collapsed, later, by parsing -DefConstructor('\lx@wrap OptionalKeyVals:XMath {}', - "#2", - reversion => '#2', - afterDigest => sub { XMath_copy_keyvals(@_); }); - -# Convert a hashref into a list of tokens of the form key=value,... -sub I_keyvals { - my ($keyvals) = @_; - my @options = (); - if ($keyvals) { - while (my ($key, $value) = each %$keyvals) { - $value = TokenizeInternal($value) if defined $value && !ref $value; - push(@options, T_OTHER(',')) if @options; - push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); } } - return (@options ? Tokens(T_OTHER('['), @options, T_OTHER(']')) : ()); } - -sub I_apply { - my ($kv, $op, @args) = @_; - return Tokens(T_CS('\lx@apply'), I_keyvals($kv), - T_BEGIN, T_CS('\lx@wrap'), T_BEGIN, $op, T_END, T_END, - T_BEGIN, (map { (T_CS('\lx@wrap'), T_BEGIN, $_, T_END); } @args), T_END); } - -sub I_symbol { - my ($kv, $text) = @_; - return Tokens(T_CS('\lx@symbol'), I_keyvals($kv), T_BEGIN, (defined $text ? $text : ()), T_END); } - -sub I_wrap { - my ($kv, @stuff) = @_; - return Tokens(T_CS('\lx@wrap'), I_keyvals($kv), T_BEGIN, @stuff, T_END); } - -# These two accept key operator_meaning, operator_omcd to give a meaning to the sub/superscript -# NOTE (BUG): We SHOULD nest paired sub/superscripts, but avoid conflicting double scripts -# To do that we need to sniff at the base, whether it already contains scripts. -# However, IsScript isn't quite sufficient if the scripts are hidden within Whatsits, duals, etc. -# Currently, LaTeXML manages to deal with the double scripts anyway; -# The reversion ALWAYS wraps the base (which will render non-optimally in images but avoid Errors) -DefConstructor('\lx@superscript OptionalKeyVals:XMath {} InScriptStyle', - "" - . "" - . "#2" - . "#3" - . "", - afterDigest => sub { XMath_copy_keyvals(@_); }, - reversion => sub { - my ($whatsit, $kv, $base, $sup) = @_; - my $bump = $whatsit->getProperty('bump'); - $bump = 1; # For now: ALWAYS {} wrap base in the reversion! - (IsEmpty($sup) - ? Revert($base) - : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUPER, revertScript($sup))); }, - properties => sub { - my ($stomach, $kv, $base, $script) = @_; - my $basetype = IsScript($base); - my $bump = ($basetype && ($$basetype[1] eq 'SUPERSCRIPT') ? 1 : 0); - (scriptpos => "post" . ($_[0]->getScriptLevel + $bump), - bump => $bump); }, - sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUPERSCRIPT', 'post'); }); - -DefConstructor('\lx@subscript OptionalKeyVals:XMath {} InScriptStyle', - "" - . "" - . "#2" - . "#3" - . "", - afterDigest => sub { XMath_copy_keyvals(@_); }, - reversion => sub { - my ($whatsit, $kv, $base, $sub) = @_; - my $bump = $whatsit->getProperty('bump'); - $bump = 1; # For now: ALWAYS {} wrap base in the reversion! - (IsEmpty($sub) - ? Revert($base) - : (($bump ? (T_BEGIN, Revert($base), T_END) : Revert($base)), T_SUB, revertScript($sub))); }, - properties => sub { - my ($stomach, $kv, $base, $script) = @_; - my $basetype = IsScript($base); - my $bump = ($basetype && ($$basetype[1] eq 'SUBSCRIPT') ? 1 : 0); - (scriptpos => "post" . ($_[0]->getScriptLevel + $bump), - bump => $bump); }, - sizer => sub { scriptSizer($_[0]->getArg(3), $_[0]->getArg(2), undef, 'SUBSCRIPT', 'post'); }); - -# Ignore $kv for the moment????? -sub I_subscript { - my ($kv, $base, $script) = @_; - return Tokens(T_CS('\lx@subscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); } - -sub I_superscript { - my ($kv, $base, $script) = @_; - return Tokens(T_CS('\lx@superscript'), I_keyvals($kv), T_BEGIN, $base, T_END, T_BEGIN, $script, T_END); } - -# Superscript meaning power -DefMacro('\lx@power{}{}', '\lx@superscript[operator_meaning=power]{#1}{#2}'); -# Superscript meaning functional (or applicative) power; iterated function/operator application -DefMacro('\lx@functionalpower{}{}', '\lx@superscript[operator_meaning=functional-power]{#1}{#2}'); - -# These to be used in presentation side -DefMathI('\lx@ApplyFunction', undef, "\x{2061}", reversion => '', name => '', role => 'APPLYOP'); -DefMathI('\lx@InvisibleTimes', undef, "\x{2062}", reversion => '', name => '', meaning => 'times', role => 'MULOP'); -DefMathI('\lx@InvisibleComma', undef, "\x{2063}", reversion => '', name => '', role => 'PUNCT'); -DefMathI('\lx@InvisiblePlus', undef, "\x{2064}", reversion => '', name => '', meaning => 'plus', role => 'ADDOP'); - -DefConstructor('\lx@kludged{}', - "?#isMath(#1)(#1)", - reversion => '#1'); -DefConstructor('\lx@padded[MuDimension]{MuDimension}{}', - '#3', - afterConstruct => sub { - my ($document, $whatsit) = @_; - my $node = $document->getLastChildElement($document->getNode); - if ($document->getNodeQName($node) eq 'ltx:XMDual') { - my (@ch) = $node->childNodes; - $node = $ch[1]; } - if (my $lpadding = $whatsit->getArg(1)) { - $document->setAttribute($node, lpadding => $lpadding); } - if (my $rpadding = $whatsit->getArg(2)) { - $document->setAttribute($node, rpadding => $rpadding); } }, - reversion => '#3'); - -#====================================================================== -# Building XMDuals for Mathematical Parallel markup -# Used when the content and presentation forms have different structure. - -DefKeyVal('XMath', 'reversion', 'UndigestedDefKey'); -DefKeyVal('XMath', 'content_reversion', 'UndigestedDefKey'); -DefKeyVal('XMath', 'presentation_reversion', 'UndigestedDefKey'); -DefConstructor('\lx@dual OptionalKeyVals:XMath {}{}', - "#2#3", - beforeDigest => sub { - PushValue(PENDING_DUAL_XMARGS => {}); - return; }, - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my $kv = $whatsit->getArg(1); - my $xmargs = PopValue('PENDING_DUAL_XMARGS'); # Really SHOULD be a hash - $whatsit->setProperties(%$xmargs) if $xmargs; # Hopefully no name class with XM - $whatsit->setProperties($kv->getPairs) if $kv; - my %props = $whatsit->getProperties; - my $cr = $props{content_reversion}; - my $pr = $props{presentation_reversion}; - my $r = ToString($props{revert_as}) || 'content'; # ????? - - if (!defined $props{reversion}) { - $whatsit->setProperty(reversion => sub { - my ($self, $kvs, $c, $p) = @_; - ($r eq 'content' ? $cr || Revert($c) - : ($r eq 'presentation' ? $pr || Revert($p) - : ($r eq 'dual' - ? Tokens(T_CS('\lx@dual'), I_keyvals($kvs), - T_BEGIN, ($cr || Revert($c)), T_END, - T_BEGIN, ($pr || Revert($p)), T_END) - : (($LaTeXML::DUAL_BRANCH || '') eq 'presentation' # Context dependent reversion - ? $pr || Revert($p) - : $cr || Revert($c))))); }); } - return; }, - sizer => '#3'); # size according to presentation - -# These are used within XMDual -# The XMDual represents both a content & presentation representation of some -# possibly exotic structure ("Transfix notation"), -# or just a somewhat complex presentation that corresponds (often) to a simpler -# applicative content structure. -# Invoking such a mathematical object to "arguments" requires that both the -# content & presentation branches contain those arguments. -# There will be an XMArg, with an ID, containing the actual markup, and an XMRef that referrs to it. -# The XMArg will usually be in the presentation branch (so that it inherits appropriate style), -# unless the arg is "hidden" (ie. semantic, but not displayed). -# This means that we don't know which one appears first! (See Package's dualize_arglist) -# -# To get a "proper id", we'll use a temporary label-like attribute (_xmkey) -# and establish an id and idref later. -DefConstructor('\lx@xmarg{}{}', "#2", - reversion => '#2', - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my ($xmid, $arg) = $whatsit->getArgs(); - $xmid = ToString($xmid); - if (my $pending = LookupValue('PENDING_DUAL_XMARGS')) { - if (my $xmargs = $$pending[-1]) { - $$xmargs{$xmid} = $arg; } } - AssignValue('xref:' . ToString($_[1]->getArg(1)) => $_[1], 'global'); }); - -DefConstructor('\lx@xmref{}', "", - reversion => sub { - Revert(LookupValue('xref:' . ToString($_[1]))); }, - sizer => sub { LookupValue('xref:' . ToString($_[0]->getArg(1)))->getSize; }); - -# Connect up the XMRef/XMArg pairs (actually can be multiple XMRef's) -# We want to set the idref of the XMRef's to point to the id of the XMArg (or other XM element), -# but usually the XMRef is created first, and we want to let the referred to element -# get it's id computed by whatever means it prefers. -# so we have to work both ways (use state to record associations, to avoid expensive xpath) -# Set id's on any non-XMRef nodes that have an _xmkey -# This gets a more natural ordering -Tag('ltx:*', 'afterOpen:late' => sub { - my ($document, $node) = @_; - if (my $key = $node->getAttribute('_xmkey')) { - my $qname = $document->getNodeQName($node); - if (($qname ne 'ltx:XMRef') && ($qname =~ /^ltx:XM/) && !$node->hasAttribute('xml:id')) { - GenerateID($document, $node, undef, ''); } } }); - -Tag('ltx:XMDual', 'afterClose:late' => sub { - my ($document, $node) = @_; - my %ids = (); - my @refs = (); - # Collect all children with _xmkey attribute - foreach my $n ($document->findnodes('descendant::*[@_xmkey]', $node)) { - if (($document->getNodeQName($n) eq 'ltx:XMRef') && !$n->hasAttribute('idref')) { - push(@refs, $n); } # we'll fill these in next - else { # generate & record ids for all referenced noces - my $key = $n->getAttribute('_xmkey'); - if (!$ids{$key}) { - GenerateID($document, $n, undef, ''); # Generate id if none already. - $ids{$key} = $n->getAttribute('xml:id'); } } } - foreach my $r (@refs) { # Now fill in the references - $document->setAttribute($r, idref => $ids{ $r->getAttribute('_xmkey') }); - $r->removeAttribute('_xmkey'); } -}); - -# Construction aids -# Build an XMDual (via \lx@dual) given the content & presentation forms. -# These forms are provided as Tokens, invoking the appropriate constructor macros, -# and refering to any arguments using #1, #2.... (see T_XMArg for syntactic sugar) -# The arguments (if any) are given separately; within the content & presentation -# they are replaced by \lx@xmref and \lx@xmarg, appropriately, -# so that they will be linked/shared in the XML tree. -# The keyvals argument is a hash containing any properties of the construct, -# along with reversion, content_reversion & presentation_reversion, which are -# substituted for arguments as well. -sub I_dual { - my ($keyvals, $content, $presentation, @args) = @_; - $content = TokenizeInternal($content) if $content && !ref $content; - $presentation = TokenizeInternal($presentation) if $presentation && !ref $presentation; - my (@revargs, @pargs, @cargs); - foreach my $arg (@args) { - my $id = LaTeXML::Package::getXMArgID(); - push(@revargs, Tokens(I_arg(ToString($id)))); - push(@pargs, Invocation(T_CS('\lx@xmarg'), $id, $arg)); - push(@cargs, Invocation(T_CS('\lx@xmref'), $id)); } - my $optional = undef; - if ($keyvals) { - my @options = (); - while (my ($key, $value) = each %$keyvals) { - $value = TokenizeInternal($value) if $value && !ref $value; - if ($key =~ /^(?:presentation_|content_|)reversion$/) { - $value = $value->substituteParameters(@revargs); } - push(@options, T_OTHER(',')) if @options; - push(@options, T_OTHER($key), T_OTHER('='), T_BEGIN, $value, T_END); } - $optional = Tokens(@options); } - return - Invocation(T_CS('\lx@dual'), $optional, - $content->substituteParameters(@cargs), - I_wrap({}, $presentation->substituteParameters(@pargs))); } - -# A little helper to shorten things up a bit; simply generates #1 (or whatever) -sub I_arg { # uncoditionally create an arg token - return bless ["$_[0]", CC_ARG], 'LaTeXML::Core::Token'; } - -sub I_xmarg { - my ($id, $arg) = @_; - return Tokens(T_CS('\lx@xmarg'), - T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END, T_BEGIN, $arg, T_END); } - -sub I_xmref { - my ($id) = @_; - return Tokens(T_CS('\lx@xmref'), T_BEGIN, (ref $id ? $id : T_OTHER($id)), T_END); } - -#---------------------------------------------------------------------- -# This group should be renamed to \lx@somethings and deprecated -# NOTE: work through this systematically! -DefMacro('\FCN{}', '\lx@wrap[role=FUNCTION]{#1}'); -DefMacro('\ROLE{}{}', '\lx@wrap[role={#1}]{#2}'); -DefMacro('\@SYMBOL{}', '\lx@wrap[role=ID]{#1}'); -DefMacro('\@CSYMBOL{}', '\lx@symbol[meaning={#1}]{}'); -DefMacro('\@APPLY{}', '\lx@apply[]{#1}{}'); # Sorta broken? -DefMacro('\@MAYBEAPPLY{}{}', '\ifx.#2.#1\else\lx@apply{#1}{#2}\fi'); -DefMacro('\@WRAP{}', '\lx@wrap[]{#1}'); -DefMacro('\@TOKEN{}', '\lx@symbol[name={#1}]{}'); -DefMacro('\@SUPERSCRIPT{}{}', '\ifx.#2.#1\else\lx@superscript[]{#1}{#2}\fi'); -DefMacro('\@SUBSCRIPT{}{}', '\ifx.#2.#1\else\lx@subscript[]{#1}{#2}\fi'); -Let('\@PADDED', '\lx@padded'); -Let('\DUAL', '\lx@dual'); -Let('\@XMArg', '\lx@xmarg'); -Let('\@XMRef', '\lx@xmref'); -Let('\@APPLYFUNCTION', '\lx@ApplyFunction'); -Let('\@INVISIBLETIMES', '\lx@InvisibleTimes'); -Let('\@INVISIBLECOMMA', '\lx@InvisibleComma'); -Let('\@INVISIBLEPLUS', '\lx@InvisiblePlus'); - -# End of stuff to be deprecated. -#---------------------------------------------------------------------- - -#====================================================================== - -# We OUGHT to be able to do this using \llap,\rlap,\hss... -DefMacro('\lx@tweaked{}{}', '\ifmmode\lx@math@tweaked{#1}{#2}\else\lx@text@tweaked{#1}{#2}\fi'); -DefConstructor('\lx@math@tweaked RequiredKeyVals {}', - "#2", - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my ($kv, $body) = $whatsit->getArgs; - XMath_copy_keyvals($stomach, $whatsit); - $whatsit->setFont($body->getFont); - return; }, - reversion => '#2'); - -DefConstructor('\lx@text@tweaked RequiredKeyVals {}', - "#2", - afterDigest => sub { - my ($stomach, $whatsit) = @_; - my ($kv, $body) = $whatsit->getArgs; - $whatsit->setProperties($kv->getPairs); }); - -DefMacro('\lx@nounicode {}', '\ifmmode\lx@math@nounicode#1\else\lx@text@nounicode#1\fi'); - -DefConstructor('\lx@framed[]{}', - "#2", - properties => { frame => sub { ToString($_[1] || 'rectangle'); } }); -DefConstructor('\lx@hflipped{}', - "#1"); - -sub reportNoUnicode { - my ($cs) = @_; - $cs = ToString($cs); - if (!LookupMapping('missing_unicode' => $cs)) { - Warn('expected', 'unicode', $cs, - "There's no Unicode equivalent for the symbol '$cs'"); - AssignMapping('missing_unicode' => $cs => 1); } - return; } -# Slightly contrived so that this can be used within a DefMath -# and still declare & get the semantic properties. -DefPrimitive('\lx@math@nounicode DefToken', sub { - my ($stomach, $cs) = @_; - reportNoUnicode($cs); - Box(ToString($cs), undef, undef, $cs, class => 'ltx_nounicode'); }); - -DefConstructor('\lx@text@nounicode DefToken', - "#1", - afterDigest => sub { - reportNoUnicode(ToString($_[1]->getArg(0))); }); - -DefConstructor('\@ERROR{}{}', "#2"); - -#********************************************************************** -DefConstructor('\WildCard[]', "<_WildCard_>#1"); -DefConstructorI('\WildCardA', undef, "<_WildCard_/>"); -DefConstructorI('\WildCardB', undef, "<_WildCard_/>"); -DefConstructorI('\WildCardC', undef, "<_WildCard_/>"); -#********************************************************************** -# After all other rewrites have acted, a little cleanup - -DefRewrite(xpath => 'descendant-or-self::ltx:XMWrap[count(child::*)=1]', - replace => sub { my ($document, $wrap) = @_; - if (my $node = $document->getFirstChildElement($wrap)) { - # Copy attributes but NOT internal ones, - # NOR xml:id, else we get clashes - foreach my $attribute ($wrap->attributes) { - if ($attribute->nodeType == XML_ATTRIBUTE_NODE) { - my $attr = $document->getNodeQName($attribute); - $document->setAttribute($node, $attr => $attribute->getValue) - unless ($attr eq 'xml:id') || $attr =~ /^_/; - if ($attr =~ /^_/) { } - elsif ($attr eq 'xml:id') { - my $id = $attribute->getValue; - if (my $previd = $node->getAttribute('xml:id')) { # Keep original id - # but swap any references to the one on the wrapper! - foreach my $ref ($document->findnodes("//*[\@idref='$id']")) { - $ref->setAttribute(idref => $previd); } - $wrap->removeAttribute('xml"id'); - $document->unRecordID($id); } - else { - $wrap->removeAttribute('xml:id'); - $document->unRecordID($id); - $document->setAttribute($node, 'xml:id' => $id); } } - else { - $document->setAttribute($node, $attr => $attribute->getValue); } } } - # But keep $node's font from being overwritten. - $document->setNodeFont($wrap, $document->getNodeFont($node)); - ## WHY THIS???? - $document->getNode->appendChild($node); -} }); -#====================================================================== - -sub aligningEnvironment { - my ($align, $class, $document, %props) = @_; - map { setAlignOrClass($document, $_, $align, $class) } - insertBlock($document, $props{body}); # Add class attribute to new nodes. - return; } - -# should be obsolete!!! -sub addClass { - my ($node, $class) = @_; - if ($node && $class && ($node->nodeType == XML_ELEMENT_NODE)) { - if ($node->hasAttribute('class')) { - $node->setAttribute(class => $node->getAttribute('class') . ' ' . $class); } - else { - $node->setAttribute(class => $class); } } - return; } - -DefConstructor('\@ADDCLASS Semiverbatim', sub { - $_[0]->addClass($_[0]->getElement, ToString($_[1])); }, - sizer => 0); - -sub setAlignOrClass { - my ($document, $node, $align, $class) = @_; - my $model = $document->getModel; - my $qname = $model->getNodeQName($node); - if ($qname eq 'ltx:tag') { } # HACK - elsif ($align && $document->canHaveAttribute($qname, 'align')) { - $node->setAttribute(align => $align); } - elsif ($class && $document->canHaveAttribute($qname, 'class')) { - $document->addClass($node, $class); } - return; } - -#====================================================================== -# A random collection of Tokens utility functions. -# [probably should be exported from Tokens.pm ?] -# [maybe need to do some reorganization?] -# Since this is used for textual tokens, typically to split author lists, -# we don't split within braces or math -sub SplitTokens { - my ($tokens, @delims) = @_; - my @items = (); - my @toks = (); - if ($tokens) { - my @tokens = $tokens->unlist; - my $t; - while ($t = shift(@tokens)) { - if (grep { Equals($t, $_) } @delims) { - push(@items, [@toks]); @toks = (); } - elsif ($t->defined_as(T_BEGIN)) { - push(@toks, $t); - my $level = 1; - while ($level && defined($t = shift(@tokens))) { - my $cc = $t->getCatcode; - $level++ if $cc == CC_BEGIN; - $level-- if $cc == CC_END; - push(@toks, $t); } } - elsif ($t->defined_as(T_MATH)) { - push(@toks, $t); - while (defined($t = shift(@tokens))) { - my $cc = $t->getCatcode; - push(@toks, $t); - last if $cc == CC_MATH; } } - else { - push(@toks, $t); } } } - return (@items, [@toks]); } - -sub andSplit { - my ($cs, $tokens) = @_; - return map { ($cs, T_BEGIN, @$_, T_END) } SplitTokens($tokens, T_CS('\and')); } - -sub orNull { - return (grep { defined } @_) ? @_ : undef; } - -# Inverse operation -sub JoinTokens { - my ($conjunction, @things) = @_; - if (!@things) { return (); } - my @result = (shift(@things)); - while (my $thing = shift(@things)) { - push(@result, $conjunction, $thing); } - return Tokens(@result); } - -DefMacro('\dump', sub { - Warn('unexpected', 'dump', $_[0], "Do not know how to \\dump yet, sorry"); }); - -#********************************************************************** -LoadPool('eTeX'); # unless.... ? -LoadPool('pdfTeX'); # unless.... ? -#********************************************************************** - -1; diff --git a/lib/LaTeXML/Util/Pathname.pm b/lib/LaTeXML/Util/Pathname.pm index 9407c65fe..762c44a24 100644 --- a/lib/LaTeXML/Util/Pathname.pm +++ b/lib/LaTeXML/Util/Pathname.pm @@ -429,10 +429,11 @@ sub build_kpse_cache { # These are directories which contain the tex related files we're interested in. # (but they're typically below where the ls-R indexes are!) my $texpaths = `"$kpsewhich" --show-path tex $kpse_toolchain`; chomp($texpaths); - my @filters = (); + my @filters = (); # Really shouldn't end up empty. foreach my $path (split(/$KPATHSEP/, $texpaths)) { $path =~ s/^!!//; $path =~ s|//+$|/|; push(@filters, $path) if -d $path; } + my $filterre = scalar(@filters) && '(?:' . join('|', map { "\Q$_\E"; } @filters) . ')'; $texmf =~ s/^["']//; $texmf =~ s/["']$//; $texmf =~ s/^\s*\\\{(.+?)}\s*$/$1/s; $texmf =~ s/\{\}//g; @@ -447,13 +448,12 @@ sub build_kpse_cache { open($LSR, '<', "$dir/ls-R") or die "Cannot read $dir/ls-R: $!"; while (<$LSR>) { chop; - next unless $_; - if (/^%/) { } - elsif (/^(.*?):$/) { # Move to a new subdirectory + next if !$_ || /^%/; + if (/^(.*?):$/) { # Move to a new subdirectory $subdir = $1; - $subdir =~ s|^\./||; # remove prefix - my $d = $dir . '/' . $subdir; # Hopefully OS safe, for comparison? - $skip = !grep { $d =~ /^\Q$_\E/ } @filters; # check if one of the TeX paths + $subdir =~ s|^\./||; # remove prefix + my $d = $dir . '/' . $subdir; # Hopefully OS safe, for comparison? + $skip = !$filterre || $d !~ /$filterre/; $skip |= ($d =~ m|-dev[$//]|) unless $LaTeXML::DEBUG{'latex-dev'}; } elsif (!$skip) {