2023-10-03 11:14:36 +08:00
|
|
|
/**
|
|
|
|
* @param {string} value
|
|
|
|
* @returns {RegExp}
|
|
|
|
* */
|
|
|
|
function escape(value) {
|
|
|
|
return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {RegExp | string } re
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function source(re) {
|
|
|
|
if (!re) return null;
|
|
|
|
if (typeof re === "string") return re;
|
|
|
|
|
|
|
|
return re.source;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {RegExp | string } re
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function lookahead(re) {
|
|
|
|
return concat('(?=', re, ')');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {RegExp | string } re
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function anyNumberOfTimes(re) {
|
|
|
|
return concat('(?:', re, ')*');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {RegExp | string } re
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function optional(re) {
|
|
|
|
return concat('(?:', re, ')?');
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {...(RegExp | string) } args
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function concat(...args) {
|
|
|
|
const joined = args.map((x) => source(x)).join("");
|
|
|
|
return joined;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param { Array<string | RegExp | Object> } args
|
|
|
|
* @returns {object}
|
|
|
|
*/
|
|
|
|
function stripOptionsFromArgs(args) {
|
|
|
|
const opts = args[args.length - 1];
|
|
|
|
|
|
|
|
if (typeof opts === 'object' && opts.constructor === Object) {
|
|
|
|
args.splice(args.length - 1, 1);
|
|
|
|
return opts;
|
|
|
|
} else {
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @typedef { {capture?: boolean} } RegexEitherOptions */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Any of the passed expresssions may match
|
|
|
|
*
|
|
|
|
* Creates a huge this | this | that | that match
|
|
|
|
* @param {(RegExp | string)[] | [...(RegExp | string)[], RegexEitherOptions]} args
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function either(...args) {
|
|
|
|
/** @type { object & {capture?: boolean} } */
|
|
|
|
const opts = stripOptionsFromArgs(args);
|
|
|
|
const joined = '('
|
|
|
|
+ (opts.capture ? "" : "?:")
|
|
|
|
+ args.map((x) => source(x)).join("|") + ")";
|
|
|
|
return joined;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param {RegExp | string} re
|
|
|
|
* @returns {number}
|
|
|
|
*/
|
|
|
|
function countMatchGroups(re) {
|
|
|
|
return (new RegExp(re.toString() + '|')).exec('').length - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Does lexeme start with a regular expression match at the beginning
|
|
|
|
* @param {RegExp} re
|
|
|
|
* @param {string} lexeme
|
|
|
|
*/
|
|
|
|
function startsWith(re, lexeme) {
|
|
|
|
const match = re && re.exec(lexeme);
|
|
|
|
return match && match.index === 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// BACKREF_RE matches an open parenthesis or backreference. To avoid
|
|
|
|
// an incorrect parse, it additionally matches the following:
|
|
|
|
// - [...] elements, where the meaning of parentheses and escapes change
|
|
|
|
// - other escape sequences, so we do not misparse escape sequences as
|
|
|
|
// interesting elements
|
|
|
|
// - non-matching or lookahead parentheses, which do not capture. These
|
|
|
|
// follow the '(' with a '?'.
|
|
|
|
const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
|
|
|
|
|
|
|
|
// **INTERNAL** Not intended for outside usage
|
|
|
|
// join logically computes regexps.join(separator), but fixes the
|
|
|
|
// backreferences so they continue to match.
|
|
|
|
// it also places each individual regular expression into it's own
|
|
|
|
// match group, keeping track of the sequencing of those match groups
|
|
|
|
// is currently an exercise for the caller. :-)
|
|
|
|
/**
|
|
|
|
* @param {(string | RegExp)[]} regexps
|
|
|
|
* @param {{joinWith: string}} opts
|
|
|
|
* @returns {string}
|
|
|
|
*/
|
|
|
|
function _rewriteBackreferences(regexps, { joinWith }) {
|
|
|
|
let numCaptures = 0;
|
|
|
|
|
|
|
|
return regexps.map((regex) => {
|
|
|
|
numCaptures += 1;
|
|
|
|
const offset = numCaptures;
|
|
|
|
let re = source(regex);
|
|
|
|
let out = '';
|
|
|
|
|
|
|
|
while (re.length > 0) {
|
|
|
|
const match = BACKREF_RE.exec(re);
|
|
|
|
if (!match) {
|
|
|
|
out += re;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
out += re.substring(0, match.index);
|
|
|
|
re = re.substring(match.index + match[0].length);
|
|
|
|
if (match[0][0] === '\\' && match[1]) {
|
|
|
|
// Adjust the backreference.
|
|
|
|
out += '\\' + String(Number(match[1]) + offset);
|
|
|
|
} else {
|
|
|
|
out += match[0];
|
|
|
|
if (match[0] === '(') {
|
|
|
|
numCaptures++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}).map(re => `(${re})`).join(joinWith);
|
|
|
|
}
|
|
|
|
|
|
|
|
export { _rewriteBackreferences, anyNumberOfTimes, concat, countMatchGroups, either, escape, lookahead, optional, source, startsWith };
|