From 95d962bb09180df82cfc1ec93ad476df3fa327ef Mon Sep 17 00:00:00 2001 From: Derek Rhodes Date: Sun, 22 Dec 2024 17:31:13 -0500 Subject: [PATCH] This project now uses HTML as the base languages instead of XML. XML is too strict. --- pyproject.toml | 10 +++++++++- src/auxml/macro_manager.py | 8 +++++--- src/auxml/main.py | 2 +- src/auxml/parser.py | 14 ++++++++------ tests/macro_test.py | 6 +++++- tests/macrofile1.html | 9 +++++++++ 6 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 tests/macrofile1.html diff --git a/pyproject.toml b/pyproject.toml index bbcb375..b43421c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,15 @@ build-backend = "setuptools.build_meta" [project] name = "auxml" version = "0.1.0" -dependencies = [ "lxml", "pytest", "pudb", "pytest-watcher", "xmldiff", "livereload" ] +dependencies = [ + "lxml", + "pytest", + "pudb", + "pytest-watcher", + "xmldiff", + "livereload", + "beautifulsoup4", + ] requires-python = ">=3.8" authors = [ {name = "Derek A. Rhodes", email = "physci@gmail.com"} ] description = "An author friendly macro system for XML" diff --git a/src/auxml/macro_manager.py b/src/auxml/macro_manager.py index bef3c73..73fcfbe 100644 --- a/src/auxml/macro_manager.py +++ b/src/auxml/macro_manager.py @@ -4,7 +4,7 @@ from auxml.macro import MacroDef, MacroCall from auxml.util import * from auxml.html_tags import is_an_html_tag -from auxml.parser import parse_xml_file +from auxml.parser import parse_html_file from auxml.err import SyntaxErrorAuXML @@ -17,7 +17,7 @@ def register_directive(self, name, cls): self.directives[name] = cls def load_macro_file(self, infile): - macros = parse_xml_file(infile) + macros = parse_html_file(infile) for el in macros.findall(".//define-macro"): md = MacroDef(el) self.add_macro_def(md) @@ -28,7 +28,9 @@ def add_macro_def(self, macdef): if macdef.name in self.macro_defs: raise Exception(f"macro already defined: {macdef.name}") if len(macdef.el.getchildren()) > 1: - raise SyntaxErrorAuXML(f"macro `{macdef.name}` may not have more than one child element") + msg = f"macro `{macdef.name}` may not have more than one child element\n" + msg += f"Got `{macdef.el.getchildren()}` child elements\n" + raise SyntaxErrorAuXML(msg) self.macro_defs[macdef.name] = macdef def cant_find(self, tag): diff --git a/src/auxml/main.py b/src/auxml/main.py index 01e4b66..e66a96a 100644 --- a/src/auxml/main.py +++ b/src/auxml/main.py @@ -18,7 +18,7 @@ def __init__(self): mm.load_macro_file(cmdline.macros()) fname = cmdline.infile() - root = parse_xml_file(fname) + root = parse_html_file(fname) exp = mm.expand_all(fname, root) self.save(exp) diff --git a/src/auxml/parser.py b/src/auxml/parser.py index 83bde81..f6d001c 100644 --- a/src/auxml/parser.py +++ b/src/auxml/parser.py @@ -1,13 +1,15 @@ from lxml import etree - +from bs4 import BeautifulSoup _html_parser = etree.HTMLParser(remove_blank_text=True, remove_comments=True) _xml_parser = etree.XMLParser(remove_blank_text=True, remove_comments=True) def parse_html_file(fname): - tree = etree.parse(fname, _html_parser) - return tree.getroot() + text = open(fname).read() + soup = BeautifulSoup(text, "html.parser") + tree = etree.fromstring(str(soup), _xml_parser) + return tree -def parse_xml_file(fname): - tree = etree.parse(fname, _xml_parser) - return tree.getroot() +# def parse_xml_file(fname): +# tree = etree.parse(fname, _xml_parser) +# return tree.getroot() diff --git a/tests/macro_test.py b/tests/macro_test.py index 01c0e9f..d8195b6 100644 --- a/tests/macro_test.py +++ b/tests/macro_test.py @@ -234,6 +234,10 @@ def test_mac_multi_rearrange_bug(): e = '''
A
B
''' #pudb.set_trace() with_mm([m], c, e) - +def with_mm_file(filename): + mm = MacroManager() + mm.load_macro_file(filename) +def test_mac_multi_els_with_html_bug(): + with_mm_file("tests/macrofile1.html") diff --git a/tests/macrofile1.html b/tests/macrofile1.html new file mode 100644 index 0000000..9bdd2ed --- /dev/null +++ b/tests/macrofile1.html @@ -0,0 +1,9 @@ + + + +
&
+
+ +
+
+