2023-10-03 11:14:36 +08:00
|
|
|
/*
|
|
|
|
Language: HTML, XML
|
|
|
|
Website: https://www.w3.org/XML/
|
|
|
|
Category: common, web
|
|
|
|
Audit: 2020
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @type LanguageFn */
|
|
|
|
function xml(hljs) {
|
|
|
|
const regex = hljs.regex;
|
|
|
|
// XML names can have the following additional letters: https://www.w3.org/TR/xml/#NT-NameChar
|
|
|
|
// OTHER_NAME_CHARS = /[:\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]/;
|
|
|
|
// Element names start with NAME_START_CHAR followed by optional other Unicode letters, ASCII digits, hyphens, underscores, and periods
|
|
|
|
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);;
|
|
|
|
// const XML_IDENT_RE = /[A-Z_a-z:\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]+/;
|
|
|
|
// const TAG_NAME_RE = regex.concat(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/, regex.optional(/[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*:/), /[A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\-.0-9\u00B7\u0300-\u036F\u203F-\u2040]*/);
|
|
|
|
// however, to cater for performance and more Unicode support rely simply on the Unicode letter class
|
|
|
|
const TAG_NAME_RE = regex.concat(/[\p{L}_]/u, regex.optional(/[\p{L}0-9_.-]*:/u), /[\p{L}0-9_.-]*/u);
|
|
|
|
const XML_IDENT_RE = /[\p{L}0-9._:-]+/u;
|
|
|
|
const XML_ENTITIES = {
|
|
|
|
className: 'symbol',
|
|
|
|
begin: /&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/
|
|
|
|
};
|
|
|
|
const XML_META_KEYWORDS = {
|
|
|
|
begin: /\s/,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'keyword',
|
|
|
|
begin: /#?[a-z_][a-z1-9_-]+/,
|
|
|
|
illegal: /\n/
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {
|
|
|
|
begin: /\(/,
|
|
|
|
end: /\)/
|
|
|
|
});
|
|
|
|
const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, { className: 'string' });
|
|
|
|
const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, { className: 'string' });
|
|
|
|
const TAG_INTERNALS = {
|
|
|
|
endsWithParent: true,
|
|
|
|
illegal: /</,
|
|
|
|
relevance: 0,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'attr',
|
|
|
|
begin: XML_IDENT_RE,
|
|
|
|
relevance: 0
|
|
|
|
},
|
|
|
|
{
|
|
|
|
begin: /=\s*/,
|
|
|
|
relevance: 0,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'string',
|
|
|
|
endsParent: true,
|
|
|
|
variants: [
|
|
|
|
{
|
|
|
|
begin: /"/,
|
|
|
|
end: /"/,
|
|
|
|
contains: [ XML_ENTITIES ]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
begin: /'/,
|
|
|
|
end: /'/,
|
|
|
|
contains: [ XML_ENTITIES ]
|
|
|
|
},
|
|
|
|
{ begin: /[^\s"'=<>`]+/ }
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
return {
|
|
|
|
name: 'HTML, XML',
|
|
|
|
aliases: [
|
|
|
|
'html',
|
|
|
|
'xhtml',
|
|
|
|
'rss',
|
|
|
|
'atom',
|
|
|
|
'xjb',
|
|
|
|
'xsd',
|
|
|
|
'xsl',
|
|
|
|
'plist',
|
|
|
|
'wsf',
|
|
|
|
'svg'
|
|
|
|
],
|
|
|
|
case_insensitive: true,
|
|
|
|
unicodeRegex: true,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'meta',
|
|
|
|
begin: /<![a-z]/,
|
|
|
|
end: />/,
|
|
|
|
relevance: 10,
|
|
|
|
contains: [
|
|
|
|
XML_META_KEYWORDS,
|
|
|
|
QUOTE_META_STRING_MODE,
|
|
|
|
APOS_META_STRING_MODE,
|
|
|
|
XML_META_PAR_KEYWORDS,
|
|
|
|
{
|
|
|
|
begin: /\[/,
|
|
|
|
end: /\]/,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'meta',
|
|
|
|
begin: /<![a-z]/,
|
|
|
|
end: />/,
|
|
|
|
contains: [
|
|
|
|
XML_META_KEYWORDS,
|
|
|
|
XML_META_PAR_KEYWORDS,
|
|
|
|
QUOTE_META_STRING_MODE,
|
|
|
|
APOS_META_STRING_MODE
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
hljs.COMMENT(
|
|
|
|
/<!--/,
|
|
|
|
/-->/,
|
|
|
|
{ relevance: 10 }
|
|
|
|
),
|
|
|
|
{
|
|
|
|
begin: /<!\[CDATA\[/,
|
|
|
|
end: /\]\]>/,
|
|
|
|
relevance: 10
|
|
|
|
},
|
|
|
|
XML_ENTITIES,
|
|
|
|
// xml processing instructions
|
|
|
|
{
|
|
|
|
className: 'meta',
|
|
|
|
end: /\?>/,
|
|
|
|
variants: [
|
|
|
|
{
|
|
|
|
begin: /<\?xml/,
|
|
|
|
relevance: 10,
|
|
|
|
contains: [
|
|
|
|
QUOTE_META_STRING_MODE
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
begin: /<\?[a-z][a-z0-9]+/,
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
className: 'tag',
|
|
|
|
/*
|
|
|
|
The lookahead pattern (?=...) ensures that 'begin' only matches
|
|
|
|
'<style' as a single word, followed by a whitespace or an
|
|
|
|
ending bracket.
|
|
|
|
*/
|
|
|
|
begin: /<style(?=\s|>)/,
|
|
|
|
end: />/,
|
|
|
|
keywords: { name: 'style' },
|
|
|
|
contains: [ TAG_INTERNALS ],
|
|
|
|
starts: {
|
|
|
|
end: /<\/style>/,
|
|
|
|
returnEnd: true,
|
|
|
|
subLanguage: [
|
|
|
|
'css',
|
|
|
|
'xml'
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
className: 'tag',
|
|
|
|
// See the comment in the <style tag about the lookahead pattern
|
|
|
|
begin: /<script(?=\s|>)/,
|
|
|
|
end: />/,
|
|
|
|
keywords: { name: 'script' },
|
|
|
|
contains: [ TAG_INTERNALS ],
|
|
|
|
starts: {
|
|
|
|
end: /<\/script>/,
|
|
|
|
returnEnd: true,
|
|
|
|
subLanguage: [
|
|
|
|
'javascript',
|
|
|
|
'handlebars',
|
|
|
|
'xml'
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
// we need this for now for jSX
|
|
|
|
{
|
|
|
|
className: 'tag',
|
|
|
|
begin: /<>|<\/>/
|
|
|
|
},
|
|
|
|
// open tag
|
|
|
|
{
|
|
|
|
className: 'tag',
|
|
|
|
begin: regex.concat(
|
|
|
|
/</,
|
|
|
|
regex.lookahead(regex.concat(
|
|
|
|
TAG_NAME_RE,
|
|
|
|
// <tag/>
|
|
|
|
// <tag>
|
|
|
|
// <tag ...
|
|
|
|
regex.either(/\/>/, />/, /\s/)
|
|
|
|
))
|
|
|
|
),
|
|
|
|
end: /\/?>/,
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'name',
|
|
|
|
begin: TAG_NAME_RE,
|
|
|
|
relevance: 0,
|
|
|
|
starts: TAG_INTERNALS
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
// close tag
|
|
|
|
{
|
|
|
|
className: 'tag',
|
|
|
|
begin: regex.concat(
|
|
|
|
/<\//,
|
|
|
|
regex.lookahead(regex.concat(
|
|
|
|
TAG_NAME_RE, />/
|
|
|
|
))
|
|
|
|
),
|
|
|
|
contains: [
|
|
|
|
{
|
|
|
|
className: 'name',
|
|
|
|
begin: TAG_NAME_RE,
|
|
|
|
relevance: 0
|
|
|
|
},
|
|
|
|
{
|
|
|
|
begin: />/,
|
|
|
|
relevance: 0,
|
|
|
|
endsParent: true
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = xml;
|