Skip to content

Commit

Permalink
perf(html5): optimize gumbo code to add a libxml2 property
Browse files Browse the repository at this point in the history
Introduce code that is an optimized version of libxml2's xmlNewNsProp
to avoid traversing the properties linked list to append an attribute
every time we add one.
  • Loading branch information
flavorjones committed Dec 26, 2024
1 parent 729c96c commit 8637196
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
1 change: 1 addition & 0 deletions ext/nokogiri/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ def compile
have_func("xmlCtxtSetOptions") # introduced in libxml2 2.13.0
have_func("xmlCtxtGetOptions") # introduced in libxml2 2.14.0
have_func("xmlSwitchEncodingName") # introduced in libxml2 2.13.0
have_func("xmlAddIDSafe") # introduced in libxml2 2.13.0
have_func("rb_category_warning") # introduced in Ruby 3.0 but had trouble resolving this symbol in truffleruby

other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
Expand Down
55 changes: 54 additions & 1 deletion ext/nokogiri/gumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,56 @@ set_line(xmlNodePtr node, size_t line)
}
}

// This function is essentially xmlNewNsProp, but we skip the full list traversal to append by
// providing the last property in the linked list as a parameter.
static xmlAttrPtr
append_property(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, const xmlChar *value, xmlAttrPtr last_prop)
{
xmlAttrPtr cur = (xmlAttrPtr) xmlMalloc(sizeof(xmlAttr));
memset(cur, 0, sizeof(xmlAttr));
cur->type = XML_ATTRIBUTE_NODE;
cur->parent = node;
xmlDocPtr doc = node->doc;
cur->doc = doc;
cur->ns = ns;

if ((doc != NULL) && (doc->dict != NULL)) {
cur->name = (xmlChar *) xmlDictLookup(doc->dict, name, -1);
} else {
cur->name = xmlStrdup(name);
}

if (value != NULL) {
cur->children = xmlNewDocText(doc, value);
cur->last = NULL;
xmlNodePtr tmp = cur->children;
while (tmp != NULL) {
tmp->parent = (xmlNodePtr) cur;
if (tmp->next == NULL) {
cur->last = tmp;
}
tmp = tmp->next;
}

if (doc != NULL) {
int res = xmlIsID(doc, node, cur);

if (res == 1) {
xmlAddIDSafe(cur, value);
}
}
}

if (node->properties == NULL) {
node->properties = cur;
} else {
last_prop->next = cur;
cur->prev = last_prop;
}

return cur;
}

// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
// at gumbo_node.
static void
Expand Down Expand Up @@ -200,6 +250,7 @@ build_tree(
xmlAddChild(xml_node, xml_child);

// Add the attributes.
xmlAttrPtr last_prop = NULL;
const GumboVector *attrs = &gumbo_child->v.element.attributes;
for (size_t i = 0; i < attrs->length; i++) {
const GumboAttribute *attr = attrs->data[i];
Expand All @@ -220,7 +271,9 @@ build_tree(
default:
ns = NULL;
}
xmlNewNsProp(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value);

// We micromanage the attribute list for performance reasons.
last_prop = append_property(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value, last_prop);
}

// Add children for this element.
Expand Down
11 changes: 11 additions & 0 deletions ext/nokogiri/libxml2_polyfill.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,14 @@ xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding)
return (xmlSwitchToEncoding(ctxt, handler));
}
#endif

#ifndef HAVE_XMLADDIDSAFE
int
xmlAddIDSafe(xmlAttrPtr attr, const xmlChar *value) {
xmlIDPtr id = xmlAddID(NULL, attr->doc, value, attr);
if (id) {
return 1;
}
return 0;
}
#endif
3 changes: 3 additions & 0 deletions ext/nokogiri/nokogiri.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ int xmlCtxtGetOptions(xmlParserCtxtPtr ctxt);
#ifndef HAVE_XMLSWITCHENCODINGNAME
int xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding);
#endif
#ifndef HAVE_XMLADDIDSAFE
int xmlAddIDSafe(xmlAttrPtr attr, const xmlChar *value);
#endif

#define XMLNS_PREFIX "xmlns"
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
Expand Down

0 comments on commit 8637196

Please sign in to comment.