Skip to content

Commit

Permalink
Merge pull request #2167 from Omikhleia/feat-operator-table
Browse files Browse the repository at this point in the history
feat(math): Support the MathML operator dictionary and many TeX-like aliases
  • Loading branch information
alerque authored Nov 23, 2024
2 parents 6bc618c + 48c2011 commit f0ddaed
Show file tree
Hide file tree
Showing 19 changed files with 13,017 additions and 2,926 deletions.
6 changes: 3 additions & 3 deletions Makefile-fonts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

if FONT_DOWNLOAD_TOOLS

# Target defined in Makefile.am, just adding a dependency here
.sources: fonttooling

.fonts: fonttooling
[ -h .fonts ] || mkdir -p $@

.sources: fonttooling
[ -h .sources ] || mkdir -p $@

fonttooling:
$(if $(BSDTAR),,$(error Please set BSDTAR with path or `./configure --enable-developer-mode`))
$(if $(CURL),,$(error Please set CURL with path or `./configure --enable-developer-mode`))
Expand Down
17 changes: 17 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ dist_license_DATA = LICENSE.md
EXTRA_DIST = spec tests documentation sile-dev-1.rockspec fontconfig.conf
EXTRA_DIST += build-aux/action-updater.js build-aux/cargo-updater.js build-aux/config.ld build-aux/decore-automake.sh build-aux/git-version-gen
EXTRA_DIST += Dockerfile build-aux/docker-bootstrap.sh build-aux/docker-fontconfig.conf hooks/build
EXTRA_DIST += build-aux/xml-entities-to-lua.xsl
EXTRA_DIST += default.nix flake.nix flake.lock shell.nix build-aux/pkg.nix
EXTRA_DIST += package.json # imported by both Nix and Docker
EXTRA_DIST += $(FIGURES)
Expand All @@ -94,6 +95,10 @@ else !SHARED
EXTRA_RUNTIME_DEPS =
endif

MATHML_ENTITIES = packages/math/mathml-entities.lua
EXTRA_DIST += $(MATHML_ENTITIES)
BUILT_SOURCES += $(MATHML_ENTITIES)

CLEANFILES = $(MANUAL)

DISTCLEANFILES = @AMINCLUDE@
Expand Down Expand Up @@ -312,6 +317,18 @@ patterndeps = $(_FORCED) $(_TEST_DEPS) $(_DOCS_DEPS) | $(bin_PROGRAMS) $(EXTRA_R
$(DOT) -Tpdf $< -o $@.gs
$(GS) -q -sDEVICE=pdfwrite -dCompatibilityLevel=1.5 -o $@ $@.gs

XML_ENTITIES = .sources/unicode.xml
XML_ENTITIES_COMMIT = 77acf14428202e4e1dba54ff1e5ed43fe5ab474f

.sources:
[ -h .sources ] || mkdir -p $@

$(XML_ENTITIES):
$(CURL) https://raw.githubusercontent.com/w3c/xml-entities/$(XML_ENTITIES_COMMIT)/unicode.xml -o $@

$(MATHML_ENTITIES): build-aux/xml-entities-to-lua.xsl
$(XSLTPROC) $< $(XML_ENTITIES) | $(or $(STYLUA),cat) - > $@

.PHONY: force
force: ;

Expand Down
2 changes: 2 additions & 0 deletions build-aux/pkg.nix
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
icu,
fontconfig,
libiconv,
libxslt,
stylua,
taplo,
typos,
Expand Down Expand Up @@ -125,6 +126,7 @@ stdenv.mkDerivation (finalAttrs: {
icu
fontconfig
libiconv
libxslt
stylua
taplo
typos
Expand Down
199 changes: 199 additions & 0 deletions build-aux/xml-entities-to-lua.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" indent="no"/>

<xsl:template name="format-value">
<xsl:param name="value" />
<xsl:choose>
<!-- integer -->
<xsl:when test="floor($value) = $value"><xsl:value-of select="$value" /></xsl:when>
<!-- boolean -->
<xsl:when test="$value = 'true' or $value = 'false'"><xsl:value-of select="$value" /></xsl:when>
<!-- string -->
<xsl:otherwise>"<xsl:value-of select="$value" />"</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-codepoint">
<xsl:param name="codepoint" />
<!-- Codepoint is UXXXX, remove the U -->
<xsl:variable name="hex" select="concat('U(0x', substring($codepoint, 2), ')')" />
<xsl:choose>
<xsl:when test="contains($hex, '-')">
<!-- Special case for 2-characters operators -->
<!-- CAVEAT: We do not expect operators with more than 2 characters -->
<xsl:value-of select="substring-before($hex, '-')" />
<xsl:value-of select="concat(', 0x', substring-after($hex, '-'))" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$hex" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-class">
<xsl:param name="class" />
<xsl:param name="combclass" />
<xsl:param name="description" />
<xsl:choose>
<xsl:when test="$class = 'N'">ord</xsl:when><!-- Normal = mathord = atomType.ordinary -->
<xsl:when test="$class = 'A'">ord</xsl:when><!-- Alphabetic = mathalpha = atomType.ordinary -->
<xsl:when test="$class = 'B'">bin</xsl:when><!-- Binary = mathbin = atomType.binaryOperator -->
<xsl:when test="$class = 'C'">close</xsl:when><!-- Closing = mathclose = atomType.closeSymbol -->
<xsl:when test="$class = 'D'"><!-- Diacritic -->
<xsl:choose>
<xsl:when test="$combclass = '220'">botaccent</xsl:when>
<xsl:when test="$combclass = '230'">accent</xsl:when>
<xsl:otherwise>ord</xsl:otherwise><!-- assuming ordinary -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'F'">ord</xsl:when><!-- Fence = assiming ordinary -->
<xsl:when test="$class = 'G'">ord</xsl:when><!-- Glyph Part = assuming ordinary -->
<xsl:when test="$class = 'L'"><!-- Large -->
<xsl:choose>
<!-- SILE uses the atom for spacing currently (ignoring lspace and rspace) -->
<!-- HACK: integral signs are NOT considered as mathop for spacing purpose -->
<xsl:when test="contains($description,'INTEGRAL') or contains($description,'INTEGRATION')">ord</xsl:when>
<xsl:otherwise>op</xsl:otherwise><!-- mathop = atomType.bigOperator -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'O'">open</xsl:when><!-- Opening -->
<xsl:when test="$class = 'P'">punct</xsl:when><!-- Punctuation -->
<xsl:when test="$class = 'R'">rel</xsl:when><!-- Relation -->
<xsl:when test="$class = 'S'">ord</xsl:when><!-- Space = assuming ordinary -->
<xsl:when test="$class = 'U'">ord</xsl:when><!-- Unary = assuming ordinary -->
<xsl:when test="$class = 'V'">bin</xsl:when><!-- Vary = assume binary and let the logic decide later -->
<xsl:otherwise>ord</xsl:otherwise><!-- assuming ordinary if not specified -->
</xsl:choose>
</xsl:template>

<xsl:template name="format-mathlatex">
<xsl:param name="mathlatex" />
<xsl:choose>
<xsl:when test="$mathlatex">"<xsl:value-of select="substring($mathlatex, 2)" />"</xsl:when>
<xsl:otherwise>nil</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template match="unicode">--- GENERATED FILE, DO NOT EDIT MANUALLY
--
-- Operator dictionary for unicode characters
--
-- Extracted from https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
-- (https://github.com/w3c/xml-entities)
-- Copyright David Carlisle 1999-2024
-- Use and distribution of this code are permitted under the terms of the
-- W3C Software Notice and License.
-- http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html
-- This file is a collection of information about how to map Unicode entities to LaTeX,
-- and various SGML/XML entity sets (ISO and MathML/HTML).
-- A Unicode character may be mapped to several entities.
-- Originally designed by Sebastian Rahtz in conjunction with Barbara Beeton for the STIX project
--

local atoms = require("packages.math.atoms")

--- Transform a list of codepoints into a string
local function U (...)
local t = { ... }
local str = ""
for i = 1, #t do
str = str .. luautf8.char(t[i])
end
return str
end

local symbols = {}
local operatorDict = {}

--- Register a symbol
-- @tparam string str String representation of the symbol
-- @tparam string shortatom Short atom type
-- @tparam string mathlatex TeX-like name of the symbol (from unicode-math)
-- @tparam string _ Unicode name of the symbol (informative)
-- @tparam table ops List of operator forms and their properties
local function addSymbol (str, shortatom, mathlatex, _, ops)
if mathlatex then
SU.debug("math.symbols", "Registering symbol", str, "as", mathlatex)
symbols[mathlatex] = str
end
local op = {}
op.atom = atoms.types[shortatom]
if ops then
op.forms = {}
for _, v in pairs(ops) do
if v.form then
-- NOTE: At this point the mu unit is not yet defined, so keep it as a string.
v.lspace = v.lspace and ("%smu"):format(v.lspace) or "0mu"
v.rspace = v.rspace and ("%smu"):format(v.rspace) or "0mu"
op.forms[v.form] = v
else
SU.warn("No form for operator " .. str .. " (operator dictionary is probably incomplete)")
end
end
end
operatorDict[str] = op
end

<xsl:apply-templates select="charlist/character" />

return {
operatorDict = operatorDict,
symbols = symbols,
}
</xsl:template>

<xsl:template match="character">
<xsl:variable name="mathclass" select="unicodedata/@mathclass" />
<xsl:variable name="mathlatex" select="mathlatex[@set='unicode-math']/text()" />
<xsl:variable name="combclass" select="unicodedata/@combclass" />
<xsl:variable name="atom">
<xsl:call-template name="format-class">
<xsl:with-param name="class" select="$mathclass" />
<xsl:with-param name="combclass" select="$combclass" />
<xsl:with-param name="description" select="description" />
</xsl:call-template>
</xsl:variable>
<xsl:if test="$atom != 'ord' or $mathlatex or operator-dictionary">
<xsl:text>addSymbol(</xsl:text>
<!-- Codepoints -->
<xsl:call-template name="format-codepoint">
<xsl:with-param name="codepoint" select="@id" />
</xsl:call-template>
<!-- Atom type -->
<xsl:text>,"</xsl:text><xsl:value-of select="$atom" /><xsl:text>",</xsl:text>
<!-- Math latex name or nil -->
<xsl:call-template name="format-mathlatex">
<xsl:with-param name="mathlatex" select="$mathlatex" />
</xsl:call-template>
<!-- Description -->
<xsl:text>,"</xsl:text><xsl:value-of select="description" /><xsl:text>"</xsl:text>
<!-- Operator dictionary or nil -->
<xsl:choose>
<xsl:when test="operator-dictionary">
<xsl:text>,{</xsl:text>
<xsl:apply-templates select="operator-dictionary">
<!-- sort by @priority -->
<xsl:sort select="@priority" data-type="number" order="descending" />
</xsl:apply-templates>
<xsl:text>}</xsl:text>
</xsl:when>
<xsl:otherwise><xsl:text>,nil</xsl:text></xsl:otherwise>
</xsl:choose>
<xsl:text>)</xsl:text>
</xsl:if>
</xsl:template>

<xsl:template match="operator-dictionary">
{
<xsl:for-each select="@*">
<xsl:sort select="name()" />
<xsl:value-of select="name()" />
=
<xsl:call-template name="format-value">
<xsl:with-param name="value" select="." />
</xsl:call-template>,
</xsl:for-each>
},
</xsl:template>

</xsl:stylesheet>
3 changes: 2 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ AM_CONDITIONAL([ICU], [test "x$with_icu" = "xyes"])
# Required for downloading fonts for the manual and for tests
# Since the source tarball includes a prebuilt manual we only need this for Git source builds
AM_COND_IF([FONT_DOWNLOAD_TOOLS], [
QUE_PROGVAR([curl])
QUE_PROGVAR([bsdtar])
QUE_PROGVAR([curl])
])

AM_COND_IF([DEVELOPER_MODE], [
Expand All @@ -197,6 +197,7 @@ AM_COND_IF([DEVELOPER_TOOLS], [
QUE_PROGVAR([taplo])
QUE_PROGVAR([tr])
QUE_PROGVAR([typos])
QUE_PROGVAR([xsltproc])
])

AX_PROG_LUA([5.1], [], [], [], [prefer])
Expand Down
20 changes: 20 additions & 0 deletions packages/math/atoms.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- Shorthands for atom types, used in the `atom` command option
-- and also in the unicode symbols table / operator dictionary
local atomType = {
-- The 8 atom types defined in the TeXbook's spacing table
ord = 0,
op = 1,
bin = 2,
rel = 3,
open = 4,
close = 5,
punct = 6,
inner = 7, -- Unused for now (used for fractions in The TeXbook)
-- Other atom types (considered as "ord" for spacing)
over = 8, -- Unused for now (used for overlines etc. in The TeXbook)
under = 9, -- Unused for now (used for underlines etc. in The TeXbook)
accent = 10,
botaccent = 11, -- Unused for now but botaccent is encoded in our dictionary
}

return { types = atomType }
Loading

0 comments on commit f0ddaed

Please sign in to comment.