hexo/node_modules/highlight.js/es/utils/regex.js

156 lines
4.0 KiB
JavaScript

/**
* @param {string} value
* @returns {RegExp}
* */
function escape(value) {
return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
function source(re) {
if (!re) return null;
if (typeof re === "string") return re;
return re.source;
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
function lookahead(re) {
return concat('(?=', re, ')');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
function anyNumberOfTimes(re) {
return concat('(?:', re, ')*');
}
/**
* @param {RegExp | string } re
* @returns {string}
*/
function optional(re) {
return concat('(?:', re, ')?');
}
/**
* @param {...(RegExp | string) } args
* @returns {string}
*/
function concat(...args) {
const joined = args.map((x) => source(x)).join("");
return joined;
}
/**
* @param { Array<string | RegExp | Object> } args
* @returns {object}
*/
function stripOptionsFromArgs(args) {
const opts = args[args.length - 1];
if (typeof opts === 'object' && opts.constructor === Object) {
args.splice(args.length - 1, 1);
return opts;
} else {
return {};
}
}
/** @typedef { {capture?: boolean} } RegexEitherOptions */
/**
* Any of the passed expresssions may match
*
* Creates a huge this | this | that | that match
* @param {(RegExp | string)[] | [...(RegExp | string)[], RegexEitherOptions]} args
* @returns {string}
*/
function either(...args) {
/** @type { object & {capture?: boolean} } */
const opts = stripOptionsFromArgs(args);
const joined = '('
+ (opts.capture ? "" : "?:")
+ args.map((x) => source(x)).join("|") + ")";
return joined;
}
/**
* @param {RegExp | string} re
* @returns {number}
*/
function countMatchGroups(re) {
return (new RegExp(re.toString() + '|')).exec('').length - 1;
}
/**
* Does lexeme start with a regular expression match at the beginning
* @param {RegExp} re
* @param {string} lexeme
*/
function startsWith(re, lexeme) {
const match = re && re.exec(lexeme);
return match && match.index === 0;
}
// BACKREF_RE matches an open parenthesis or backreference. To avoid
// an incorrect parse, it additionally matches the following:
// - [...] elements, where the meaning of parentheses and escapes change
// - other escape sequences, so we do not misparse escape sequences as
// interesting elements
// - non-matching or lookahead parentheses, which do not capture. These
// follow the '(' with a '?'.
const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
// **INTERNAL** Not intended for outside usage
// join logically computes regexps.join(separator), but fixes the
// backreferences so they continue to match.
// it also places each individual regular expression into it's own
// match group, keeping track of the sequencing of those match groups
// is currently an exercise for the caller. :-)
/**
* @param {(string | RegExp)[]} regexps
* @param {{joinWith: string}} opts
* @returns {string}
*/
function _rewriteBackreferences(regexps, { joinWith }) {
let numCaptures = 0;
return regexps.map((regex) => {
numCaptures += 1;
const offset = numCaptures;
let re = source(regex);
let out = '';
while (re.length > 0) {
const match = BACKREF_RE.exec(re);
if (!match) {
out += re;
break;
}
out += re.substring(0, match.index);
re = re.substring(match.index + match[0].length);
if (match[0][0] === '\\' && match[1]) {
// Adjust the backreference.
out += '\\' + String(Number(match[1]) + offset);
} else {
out += match[0];
if (match[0] === '(') {
numCaptures++;
}
}
}
return out;
}).map(re => `(${re})`).join(joinWith);
}
export { _rewriteBackreferences, anyNumberOfTimes, concat, countMatchGroups, either, escape, lookahead, optional, source, startsWith };