/** * @param {string} value * @returns {RegExp} * */ function escape(value) { return new RegExp(value.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&'), 'm'); } /** * @param {RegExp | string } re * @returns {string} */ function source(re) { if (!re) return null; if (typeof re === "string") return re; return re.source; } /** * @param {RegExp | string } re * @returns {string} */ function lookahead(re) { return concat('(?=', re, ')'); } /** * @param {RegExp | string } re * @returns {string} */ function anyNumberOfTimes(re) { return concat('(?:', re, ')*'); } /** * @param {RegExp | string } re * @returns {string} */ function optional(re) { return concat('(?:', re, ')?'); } /** * @param {...(RegExp | string) } args * @returns {string} */ function concat(...args) { const joined = args.map((x) => source(x)).join(""); return joined; } /** * @param { Array } args * @returns {object} */ function stripOptionsFromArgs(args) { const opts = args[args.length - 1]; if (typeof opts === 'object' && opts.constructor === Object) { args.splice(args.length - 1, 1); return opts; } else { return {}; } } /** @typedef { {capture?: boolean} } RegexEitherOptions */ /** * Any of the passed expresssions may match * * Creates a huge this | this | that | that match * @param {(RegExp | string)[] | [...(RegExp | string)[], RegexEitherOptions]} args * @returns {string} */ function either(...args) { /** @type { object & {capture?: boolean} } */ const opts = stripOptionsFromArgs(args); const joined = '(' + (opts.capture ? "" : "?:") + args.map((x) => source(x)).join("|") + ")"; return joined; } /** * @param {RegExp | string} re * @returns {number} */ function countMatchGroups(re) { return (new RegExp(re.toString() + '|')).exec('').length - 1; } /** * Does lexeme start with a regular expression match at the beginning * @param {RegExp} re * @param {string} lexeme */ function startsWith(re, lexeme) { const match = re && re.exec(lexeme); return match && match.index === 0; } // BACKREF_RE matches an open parenthesis or backreference. To avoid // an incorrect parse, it additionally matches the following: // - [...] elements, where the meaning of parentheses and escapes change // - other escape sequences, so we do not misparse escape sequences as // interesting elements // - non-matching or lookahead parentheses, which do not capture. These // follow the '(' with a '?'. const BACKREF_RE = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./; // **INTERNAL** Not intended for outside usage // join logically computes regexps.join(separator), but fixes the // backreferences so they continue to match. // it also places each individual regular expression into it's own // match group, keeping track of the sequencing of those match groups // is currently an exercise for the caller. :-) /** * @param {(string | RegExp)[]} regexps * @param {{joinWith: string}} opts * @returns {string} */ function _rewriteBackreferences(regexps, { joinWith }) { let numCaptures = 0; return regexps.map((regex) => { numCaptures += 1; const offset = numCaptures; let re = source(regex); let out = ''; while (re.length > 0) { const match = BACKREF_RE.exec(re); if (!match) { out += re; break; } out += re.substring(0, match.index); re = re.substring(match.index + match[0].length); if (match[0][0] === '\\' && match[1]) { // Adjust the backreference. out += '\\' + String(Number(match[1]) + offset); } else { out += match[0]; if (match[0] === '(') { numCaptures++; } } } return out; }).map(re => `(${re})`).join(joinWith); } export { _rewriteBackreferences, anyNumberOfTimes, concat, countMatchGroups, either, escape, lookahead, optional, source, startsWith };