Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rearrangements #2387

Merged
merged 16 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 67 additions & 42 deletions lib/LaTeXML/Common/Font.pm
Original file line number Diff line number Diff line change
Expand Up @@ -60,44 +60,53 @@ my $FLAG_EMPH = 0x10;
# NOTE: This probably doesn't really belong in here...

my %font_family = (
cmr => { family => 'serif' }, cmss => { family => 'sansserif' },
cmtt => { family => 'typewriter' }, cmvtt => { family => 'typewriter' },
cmt => { family => 'serif' }, # for cmti "text italic"
cmfib => { family => 'serif' }, cmfr => { family => 'serif' },
cmdh => { family => 'serif' }, cm => { family => 'serif' },
ptm => { family => 'serif' }, ppl => { family => 'serif' },
pnc => { family => 'serif' }, pbk => { family => 'serif' },
phv => { family => 'sansserif' }, pag => { family => 'serif' },
pcr => { family => 'typewriter' }, pzc => { family => 'script' },
put => { family => 'serif' }, bch => { family => 'serif' },
psy => { family => 'symbol' }, pzd => { family => 'dingbats' },
ccr => { family => 'serif' }, ccy => { family => 'symbol' },
cmbr => { family => 'sansserif' }, cmtl => { family => 'typewriter' },
cmbrs => { family => 'symbol' }, ul9 => { family => 'typewriter' },
txr => { family => 'serif' }, txss => { family => 'sansserif' },
txtt => { family => 'typewriter' }, txms => { family => 'symbol' },
txsya => { family => 'symbol' }, txsyb => { family => 'symbol' },
pxr => { family => 'serif' }, pxms => { family => 'symbol' },
pxsya => { family => 'symbol' }, pxsyb => { family => 'symbol' },
futs => { family => 'serif' },
uaq => { family => 'serif' }, ugq => { family => 'sansserif' },
eur => { family => 'serif' }, eus => { family => 'script' },
euf => { family => 'fraktur' }, euex => { family => 'symbol' },
cmr => { family => 'serif' },
cmss => { family => 'sansserif' },
cmssq => { family => 'sansserif' }, # quote style?
cmssqi => { family => 'sansserif', shape => 'italic' }, # quote style?
cmtt => { family => 'typewriter' }, cmvtt => { family => 'typewriter' },
cmt => { family => 'serif' }, # for cmti "text italic"
cmfib => { family => 'serif' },
cmfr => { family => 'serif' },
cm => { family => 'serif' },
cmdh => { family => 'serif' },
cmr => { family => 'serif' },
cmdunh => { family => 'serif' }, # like cmr10 but with tall body heights
cmu => { family => 'serif' }, # unslanted italic ??
ptm => { family => 'serif' }, ppl => { family => 'serif' },
pnc => { family => 'serif' }, pbk => { family => 'serif' },
phv => { family => 'sansserif' }, pag => { family => 'serif' },
pcr => { family => 'typewriter' }, pzc => { family => 'script' },
put => { family => 'serif' }, bch => { family => 'serif' },
psy => { family => 'symbol' }, pzd => { family => 'dingbats' },
ccr => { family => 'serif' }, ccy => { family => 'symbol' },
cmbr => { family => 'sansserif' }, cmtl => { family => 'typewriter' },
cmbrs => { family => 'symbol' }, ul9 => { family => 'typewriter' },
txr => { family => 'serif' }, txss => { family => 'sansserif' },
txtt => { family => 'typewriter' }, txms => { family => 'symbol' },
txsya => { family => 'symbol' }, txsyb => { family => 'symbol' },
pxr => { family => 'serif' }, pxms => { family => 'symbol' },
pxsya => { family => 'symbol' }, pxsyb => { family => 'symbol' },
futs => { family => 'serif' },
uaq => { family => 'serif' }, ugq => { family => 'sansserif' },
eur => { family => 'serif' }, eus => { family => 'script' },
euf => { family => 'fraktur' }, euex => { family => 'symbol' },
# The following are actually math fonts.
ms => { family => 'symbol' },
ccm => { family => 'serif', shape => 'italic' },
cmm => { family => 'italic', encoding => 'OML' },
cmex => { family => 'symbol', encoding => 'OMX' }, # Not really symbol, but...
cmsy => { family => 'symbol', encoding => 'OMS' },
ccitt => { family => 'typewriter', shape => 'italic' },
cmbrm => { family => 'sansserif', shape => 'italic' },
futm => { family => 'serif', shape => 'italic' },
futmi => { family => 'serif', shape => 'italic' },
txmi => { family => 'serif', shape => 'italic' },
pxmi => { family => 'serif', shape => 'italic' },
bbm => { family => 'blackboard' },
bbold => { family => 'blackboard' },
bbmss => { family => 'blackboard' },
ms => { family => 'symbol' },
ccm => { family => 'serif', shape => 'italic' },
cmm => { family => 'math', shape => 'italic', encoding => 'OML' },
cmex => { family => 'symbol', encoding => 'OMX' }, # Not really symbol, but...
cmsy => { family => 'symbol', encoding => 'OMS' },
ccitt => { family => 'typewriter', shape => 'italic' },
cmsltt => { family => 'typewriter', shape => 'slanted' },
cmbrm => { family => 'sansserif', shape => 'italic' },
futm => { family => 'serif', shape => 'italic' },
futmi => { family => 'serif', shape => 'italic' },
txmi => { family => 'serif', shape => 'italic' },
pxmi => { family => 'serif', shape => 'italic' },
bbm => { family => 'blackboard' },
bbold => { family => 'blackboard' },
bbmss => { family => 'blackboard' },
# some ams fonts
cmmib => { family => 'italic', series => 'bold' },
cmbsy => { family => 'symbol', series => 'bold' },
Expand Down Expand Up @@ -177,16 +186,17 @@ sub decodeFontname {
if (my $ffam = lookupFontFamily($fam)) { map { $props{$_} = $$ffam{$_} } keys %$ffam; }
if (my $fser = lookupFontSeries($ser)) { map { $props{$_} = $$fser{$_} } keys %$fser; }
if (my $fsh = lookupFontShape($shp)) { map { $props{$_} = $$fsh{$_} } keys %$fsh; }
$size = 1 unless $size; # Yes, also if 0, "" (from regexp)
$size = $at if defined $at;
$size *= $scaled if defined $scaled;
$size = 1 unless $size; # Yes, also if 0, "" (from regexp)
$size = $at if defined $at;
$size = $size * $scaled if defined $scaled;
$props{name} = $name;
$props{size} = $size;
# Experimental Hack !?!?!?
$props{encoding} = 'OT1' unless defined $props{encoding};
$props{at} = $at . "pt" if defined $at;
return %props; }
else {
return; } }
Info('unrecognized', 'font', undef, "Unrecognized fontname '$name'");
return (family => $name, size => DEFSIZE()); } }

sub lookupTeXFont {
my ($fontname, $seriescode, $shapecode) = @_;
Expand Down Expand Up @@ -266,6 +276,7 @@ sub stringify {
no warnings 'recursion';
my ($self) = @_;
my ($fam, $ser, $shp, $siz, $col, $bkg, $opa, $enc, $lang, $mstyle, $flags) = @$self;
# !!!!!
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What could be some words to describe that exclamation?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that got added? Hmm...

$fam = 'serif' if $fam && ($fam eq 'math');
return 'Font[' . join(',', map { Stringify($_) } grep { $_ }
(isDiff($fam, $DEFFAMILY) ? ($fam) : ()),
Expand All @@ -280,6 +291,16 @@ sub stringify {
)
. ']'; }

# Return a Fontinfo-like hash
# Eventually a more integrated representation of Fonts that accommodates
# both low-level TeX-like commands, and higher-level CSS-like ones.
sub asFontinfo {
my ($self) = @_;
my ($fam, $ser, $shp, $siz, $col, $bkg, $opa, $enc, $lang, $mstyle, $flags) = @$self;
return { family => $fam, series => $ser, shape => $shp, size => $siz,
color => $col, background => $bkg, opacity => $opa,
encoding => $enc || 'OT1', language => $lang, mathstyle => $mstyle }; }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

$flags are not returned?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they'll only show up in the CSS style of specifying fonts, so shouldn't be needed for "fontinfo", but the whole synthesization idea is a bit up-in-the-air.


sub equals {
my ($self, $other) = @_;
return (defined $other) && ((ref $self) eq (ref $other))
Expand Down Expand Up @@ -328,6 +349,7 @@ sub relativeTo {
my ($self, $other) = @_;
my ($fam, $ser, $shp, $siz, $col, $bkg, $opa, $enc, $lang, $mstyle, $flags) = @$self;
my ($ofam, $oser, $oshp, $osiz, $ocol, $obkg, $oopa, $oenc, $olang, $omstyle, $oflags) = @$other;
# !!!!
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[2] What could be some words to describe that exclamation?

$fam = 'serif' if $fam && ($fam eq 'math');
$ofam = 'serif' if $ofam && ($ofam eq 'math');
## my $emph = 0;
Expand Down Expand Up @@ -358,6 +380,9 @@ sub relativeTo {
(isDiff($opa, $oopa)
? (opacity => { value => $opa, properties => { opacity => $opa } })
: ()),
(isDiff($enc, $oenc)
? (encoding => { value => $enc, properties => { encoding => $enc } })
: ()),
(isDiff($lang, $olang)
? ('xml:lang' => { value => $lang, properties => { language => $lang } })
: ()),
Expand Down
14 changes: 9 additions & 5 deletions lib/LaTeXML/Core/Definition/CharDef.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,17 @@ sub invoke {
my $mathglyph = $$self{mathglyph};
# A dilemma: If the \chardef were in a style file, you're prefer to revert to the $cs
# but if defined in the document source, better to use \char ###\relax, so it still "works"
if (defined $mathglyph) { # Must be a math char
my $src = $$self{locator} && $$self{locator}->toString;
my $local = $src && $src !~ /\.(?:sty|ltxml|ltxmlc)/; # Dumps currently have undefined src!
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is .ltxmlc a new extension? We don't have them in the repository at the moment. If it's dump related, why c and not d? Or even better .ltxml.dump or .dump.ltxml

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly for dumps and your naming suggestion sounds good

if (defined $mathglyph) { # Must be a math char
return Box($mathglyph, undef, undef,
Tokens(T_CS('\mathchar'), $value->revert, T_CS('\relax')),
($local ? Tokens(T_CS('\mathchar'), $value->revert, T_CS('\relax')) : $$self{cs}),
role => $$self{role}); }
else { # else text; but note defered font/encoding till digestion!
return Box(LaTeXML::Package::FontDecode($value->valueOf), undef, undef,
Tokens(T_CS('\char'), $value->revert, T_CS('\relax'))); } }
else { # else text; but note defered font/encoding till digestion!
my ($char, %props) = LaTeXML::Package::FontDecode($value->valueOf);
return Box($char, undef, undef,
($local ? Tokens(T_CS('\char'), $value->revert, T_CS('\relax')) : $$self{cs}),
%props); } }

sub equals {
my ($self, $other) = @_;
Expand Down
7 changes: 3 additions & 4 deletions lib/LaTeXML/Core/State.pm
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,12 @@ sub new {
$$self{delcode} = {};
$$self{tracing_definitions} = {};
# Initializations that INITEX would have set.
$$self{mathcode}{'.'} = [0];
for (my $c = ord('0') ; $c <= ord('9') ; $c++) {
$$self{mathcode}{ chr($c) } = [0x7000]; }
$$self{mathcode}{ chr($c) } = [0x7000 + $c]; }
for (my $c = ord('a') ; $c <= ord('z') ; $c++) {
my $C = $c + ord('A') - ord('a');
$$self{mathcode}{ chr($c) } = [0x7100];
$$self{mathcode}{ chr($C) } = [0x7100];
$$self{mathcode}{ chr($c) } = [0x7100 + $c];
$$self{mathcode}{ chr($C) } = [0x7100 + $C];
$$self{uccode}{ chr($c) } = [$C];
$$self{lccode}{ chr($C) } = [$c];
$$self{sfcode}{ chr($C) } = [999]; }
Expand Down
1 change: 1 addition & 0 deletions lib/LaTeXML/Engine/LaTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -4889,6 +4889,7 @@ DefConstructor('\@framebox[Dimension][]{}',
$document->setAttribute($c[0], $k => $v); } } } }
);

AssignValue(allocated_boxes => 0, 'global');
DefPrimitive('\newsavebox DefToken', sub {
my $n = LookupValue('allocated_boxes') + 1;
AssignValue(allocated_boxes => $n, 'global');
Expand Down
2 changes: 1 addition & 1 deletion lib/LaTeXML/Engine/TeX_Character.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ DefRegister('\catcode Number', Number(0),
# Not used anywhere (yet)
DefRegister('\sfcode Number', Number(0),
getter => sub { my $code = $STATE->lookupSFcode(chr($_[0]->valueOf));
Number(defined $code ? $code : 0); },
Number(defined $code ? $code : 1000); },
setter => sub { $STATE->assignSFcode(chr($_[2]->valueOf) => $_[0]->valueOf, $_[1]); });
DefRegister('\lccode Number', Number(0),
getter => sub { my $code = $STATE->lookupLCcode(chr($_[0]->valueOf));
Expand Down
33 changes: 33 additions & 0 deletions lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -285,5 +285,38 @@ DeclareFontMap('OMX',
# [missing tips for horizontal curly braces]
"\x{2191}", "\x{2193}", undef, undef, undef, undef, "\x{21D1}", "\x{21D3}"]);

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# TeX's ligatures handled by rewrite regexps.
# Note: applied in reverse order of definition (latest defined applied first!)
# Note also, these area only applied in text content, not in attributes!
sub nonTypewriter {
my ($font) = @_;
return ($font->getFamily ne 'typewriter'); }

sub nonTypewriterT1 {
my ($font) = @_;
return ($font->getFamily ne 'typewriter') && (($font->getEncoding || 'OT1') =~ /^(OT1|T1)$/); }

# EN DASH (NOTE: With digits before & aft => \N{FIGURE DASH})
DefLigature(qr{--}, "\x{2013}", fontTest => \&nonTypewriter); # EN dash
DefLigature(qr{---}, "\x{2014}", fontTest => \&nonTypewriter); # EM dash

# Ligatures for doubled single left & right quotes to convert to double quotes
# [should ligatures be part of a font, in the first place? (it is in TeX!)
DefLigature(qr{\x{2018}\x{2018}}, "\x{201C}", fontTest => \&nonTypewriterT1); # double left quote
DefLigature(qr{\x{2019}\x{2019}}, "\x{201D}", fontTest => \&nonTypewriterT1); # double right quote
DefLigature(qr{\?\x{2018}}, UTF(0xBF), fontTest => \&nonTypewriterT1); # ? backquote
DefLigature(qr{!\x{2018}}, UTF(0xA1), fontTest => \&nonTypewriterT1); # ! backquote
# These ligatures are also handled by TeX.
# However, it appears that decent modern fonts in modern browsers handle these at that level.
# So it's likely not worth doing it at the conversion level, possibly adversely affecting search.
# DefLigature(qr{ff}, "\x{FB00}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fi}, "\x{FB01}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{fl}, "\x{FB02}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffi}, "\x{FB03}", fontTest => \&nonTypewriterT1);
# DefLigature(qr{ffl}, "\x{FB04}", fontTest => \&nonTypewriterT1);

DefLigature(qr{\.\.\.}, "\x{2026}", fontTest => \&nonTypewriter); # ldots

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1;
9 changes: 9 additions & 0 deletions lib/LaTeXML/Engine/TeX_Math.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1193,5 +1193,14 @@ DefConstructor('\lx@eqno{}',
"^ <ltx:tags><ltx:tag><ltx:Math><ltx:XMath>#1</ltx:XMath></ltx:Math></ltx:tag></ltx:tags>",
reversion => '');

#======================================================================
# Pretest for XMath to keep from interpreting math that the DOM may not allow!!
##DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'\cdot\cdot\cdot',replace=>'\cdots');

DefMathLigature("\x{22C5}\x{22C5}\x{22C5}" => "\x{22EF}", role => 'ID', name => 'cdots');

#DefMathRewrite(xpath=>'descendant-or-self::ltx:XMath',match=>'...',replace=>'\ldots');
DefMathLigature("..." => "\x{2026}", role => 'ID', name => 'ldots');

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1;
13 changes: 13 additions & 0 deletions lib/LaTeXML/Engine/TeX_Paragraph.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ DefConstructorI('\noindent', undef, sub {
# Otherwise ignore.
return; });

sub alignLine {
my ($document, $line, $alignment) = @_;
if ($document->isOpenable('ltx:p')) {
$document->insertElement('ltx:p', $line, class => 'ltx_align_' . $alignment); }
elsif ($document->isOpenable('ltx:text')) {
$document->insertElement('ltx:text', $line, class => 'ltx_align_' . $alignment);
$document->insertElement('ltx:break'); }
else {
Info('unexpected', 'alignment', $document,
"Lost requested alignment '$alignment'; no suitable element");
$document->absorb($line); }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this case emit an Info or Warning message? Silently absorbing while dropping the alignment may be worth logging.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not a bad idea...

return; }

# <ltx:para> represents a Logical Paragraph, whereas <ltx:p> is a `physical paragraph'.
# A para can contain both p and displayed equations and such.

Expand Down
Loading
Loading