/*
Language: HTML, XML
Website: https://www.w3.org/XML/
Category: common, web
Audit: 2020
*/
/** @type LanguageFn */
function xml(hljs) {
const regex = hljs.regex;
// XML names can have the following additional letters: https://www.w3.org/TR/xml/#NT-NameChar
// OTHER_NAME_CHARS = /[:\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]/;
// Element names start with NAME_START_CHAR followed by optional other Unicode letters, ASCII digits, hyphens, underscores, and periods
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);;
// const XML_IDENT_RE = /[A-Z_a-z:\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]+/;
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);
// however, to cater for performance and more Unicode support rely simply on the Unicode letter class
const TAG_NAME_RE = regex.concat(/[\p{L}_]/u, regex.optional(/[\p{L}0-9_.-]*:/u), /[\p{L}0-9_.-]*/u);
const XML_IDENT_RE = /[\p{L}0-9._:-]+/u;
const XML_ENTITIES = {
className: 'symbol',
begin: /&[a-z]+;|[0-9]+;|[a-f0-9]+;/
};
const XML_META_KEYWORDS = {
begin: /\s/,
contains: [
{
className: 'keyword',
begin: /#?[a-z_][a-z1-9_-]+/,
illegal: /\n/
}
]
};
const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {
begin: /\(/,
end: /\)/
});
const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { className: 'string' });
const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { className: 'string' });
const TAG_INTERNALS = {
endsWithParent: true,
illegal: /,
relevance: 0,
contains: [
{
className: 'attr',
begin: XML_IDENT_RE,
relevance: 0
},
{
begin: /=\s*/,
relevance: 0,
contains: [
{
className: 'string',
endsParent: true,
variants: [
{
begin: /"/,
end: /"/,
contains: [ XML_ENTITIES ]
},
{
begin: /'/,
end: /'/,
contains: [ XML_ENTITIES ]
},
{ begin: /[^\s"'=<>`]+/ }
]
}
]
}
]
};
return {
name: 'HTML, XML',
aliases: [
'html',
'xhtml',
'rss',
'atom',
'xjb',
'xsd',
'xsl',
'plist',
'wsf',
'svg'
],
case_insensitive: true,
unicodeRegex: true,
contains: [
{
className: 'meta',
begin: //,
relevance: 10,
contains: [
XML_META_KEYWORDS,
QUOTE_META_STRING_MODE,
APOS_META_STRING_MODE,
XML_META_PAR_KEYWORDS,
{
begin: /\[/,
end: /\]/,
contains: [
{
className: 'meta',
begin: //,
contains: [
XML_META_KEYWORDS,
XML_META_PAR_KEYWORDS,
QUOTE_META_STRING_MODE,
APOS_META_STRING_MODE
]
}
]
}
]
},
hljs.COMMENT(
//,
{ relevance: 10 }
),
{
begin: //,
relevance: 10
},
XML_ENTITIES,
// xml processing instructions
{
className: 'meta',
end: /\?>/,
variants: [
{
begin: /<\?xml/,
relevance: 10,
contains: [
QUOTE_META_STRING_MODE
]
},
{
begin: /<\?[a-z][a-z0-9]+/,
}
]
},
{
className: 'tag',
/*
The lookahead pattern (?=...) ensures that 'begin' only matches
'