hexo/node_modules/entities/lib/encode.js

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
    return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;
var xml_json_1 = __importDefault(require("./maps/xml.json"));
var encode_trie_1 = require("./encode-trie");
var entities_json_1 = __importDefault(require("./maps/entities.json"));
var htmlReplacer = getCharRegExp(entities_json_1.default, true);
var xmlReplacer = getCharRegExp(xml_json_1.default, true);
var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);
var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [
    xml_json_1.default[k].charCodeAt(0),
    "&" + k + ";",
]; }));
/**
 * Encodes all non-ASCII characters, as well as characters not valid in XML
 * documents using XML entities.
 *
 * If a character has no equivalent entity, a
 * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
 */
function encodeXML(str) {
    var ret = "";
    var lastIdx = 0;
    var match;
    while ((match = xmlReplacer.exec(str)) !== null) {
        var i = match.index;
        var char = str.charCodeAt(i);
        var next = xmlCodeMap.get(char);
        if (next) {
            ret += str.substring(lastIdx, i) + next;
            lastIdx = i + 1;
        }
        else {
            ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";
            // Increase by 1 if we have a surrogate pair
            lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);
        }
    }
    return ret + str.substr(lastIdx);
}
exports.encodeXML = encodeXML;
/**
 * Encodes all entities and non-ASCII characters in the input.
 *
 * This includes characters that are valid ASCII characters in HTML documents.
 * For example `#` will be encoded as `&num;`. To get a more compact output,
 * consider using the `encodeNonAsciiHTML` function.
 *
 * If a character has no equivalent entity, a
 * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
 */
function encodeHTML(data) {
    return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);
}
exports.encodeHTML = encodeHTML;
/**
 * Encodes all non-ASCII characters, as well as characters not valid in HTML
 * documents using HTML entities.
 *
 * If a character has no equivalent entity, a
 * numeric hexadecimal reference (eg. `&#xfc;`) will be used.
 */
function encodeNonAsciiHTML(data) {
    return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);
}
exports.encodeNonAsciiHTML = encodeNonAsciiHTML;
function getCharRegExp(map, nonAscii) {
    // Collect the start characters of all entities
    var chars = Object.keys(map)
        .map(function (k) { return "\\" + map[k].charAt(0); })
        .filter(function (v) { return !nonAscii || v.charCodeAt(1) < 128; })
        .sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })
        // Remove duplicates
        .filter(function (v, i, a) { return v !== a[i + 1]; });
    // Add ranges to single characters.
    for (var start = 0; start < chars.length - 1; start++) {
        // Find the end of a run of characters
        var end = start;
        while (end < chars.length - 1 &&
            chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {
            end += 1;
        }
        var count = 1 + end - start;
        // We want to replace at least three characters
        if (count < 3)
            continue;
        chars.splice(start, count, chars[start] + "-" + chars[end]);
    }
    return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");
}
/**
 * Encodes all non-ASCII characters, as well as characters not valid in XML
 * documents using numeric hexadecimal reference (eg. `&#xfc;`).
 *
 * Have a look at `escapeUTF8` if you want a more concise output at the expense
 * of reduced transportability.
 *
 * @param data String to escape.
 */
exports.escape = encodeXML;
/**
 * Encodes all characters not valid in XML documents using XML entities.
 *
 * Note that the output will be character-set dependent.
 *
 * @param data String to escape.
 */
function escapeUTF8(data) {
    var match;
    var lastIdx = 0;
    var result = "";
    while ((match = xmlInvalidChars.exec(data))) {
        if (lastIdx !== match.index) {
            result += data.substring(lastIdx, match.index);
        }
        // We know that this chararcter will be in `inverseXML`
        result += xmlCodeMap.get(match[0].charCodeAt(0));
        // Every match will be of length 1
        lastIdx = match.index + 1;
    }
    return result + data.substring(lastIdx);
}
exports.escapeUTF8 = escapeUTF8;
20231003 update 2023-10-03 11:14:36 +08:00			`"use strict";`
			`var __importDefault = (this && this.__importDefault) \|\| function (mod) {`
			`return (mod && mod.__esModule) ? mod : { "default": mod };`
			`};`
			`Object.defineProperty(exports, "__esModule", { value: true });`
			`exports.escapeUTF8 = exports.escape = exports.encodeNonAsciiHTML = exports.encodeHTML = exports.encodeXML = void 0;`
			`var xml_json_1 = __importDefault(require("./maps/xml.json"));`
			`var encode_trie_1 = require("./encode-trie");`
			`var entities_json_1 = __importDefault(require("./maps/entities.json"));`
			`var htmlReplacer = getCharRegExp(entities_json_1.default, true);`
			`var xmlReplacer = getCharRegExp(xml_json_1.default, true);`
			`var xmlInvalidChars = getCharRegExp(xml_json_1.default, false);`
			`var xmlCodeMap = new Map(Object.keys(xml_json_1.default).map(function (k) { return [`
			`xml_json_1.default[k].charCodeAt(0),`
			`"&" + k + ";",`
			`]; }));`
			`/**`
			`* Encodes all non-ASCII characters, as well as characters not valid in XML`
			`* documents using XML entities.`
			`*`
			`* If a character has no equivalent entity, a`
			* numeric hexadecimal reference (eg. `ü`) will be used.
			`*/`
			`function encodeXML(str) {`
			`var ret = "";`
			`var lastIdx = 0;`
			`var match;`
			`while ((match = xmlReplacer.exec(str)) !== null) {`
			`var i = match.index;`
			`var char = str.charCodeAt(i);`
			`var next = xmlCodeMap.get(char);`
			`if (next) {`
			`ret += str.substring(lastIdx, i) + next;`
			`lastIdx = i + 1;`
			`}`
			`else {`
			`ret += str.substring(lastIdx, i) + "&#x" + encode_trie_1.getCodePoint(str, i).toString(16) + ";";`
			`// Increase by 1 if we have a surrogate pair`
			`lastIdx = xmlReplacer.lastIndex += Number((char & 65408) === 0xd800);`
			`}`
			`}`
			`return ret + str.substr(lastIdx);`
			`}`
			`exports.encodeXML = encodeXML;`
			`/**`
			`* Encodes all entities and non-ASCII characters in the input.`
			`*`
			`* This includes characters that are valid ASCII characters in HTML documents.`
			* For example `#` will be encoded as `&num;`. To get a more compact output,
			* consider using the `encodeNonAsciiHTML` function.
			`*`
			`* If a character has no equivalent entity, a`
			* numeric hexadecimal reference (eg. `ü`) will be used.
			`*/`
			`function encodeHTML(data) {`
			`return encode_trie_1.encodeHTMLTrieRe(htmlReplacer, data);`
			`}`
			`exports.encodeHTML = encodeHTML;`
			`/**`
			`* Encodes all non-ASCII characters, as well as characters not valid in HTML`
			`* documents using HTML entities.`
			`*`
			`* If a character has no equivalent entity, a`
			* numeric hexadecimal reference (eg. `ü`) will be used.
			`*/`
			`function encodeNonAsciiHTML(data) {`
			`return encode_trie_1.encodeHTMLTrieRe(xmlReplacer, data);`
			`}`
			`exports.encodeNonAsciiHTML = encodeNonAsciiHTML;`
			`function getCharRegExp(map, nonAscii) {`
			`// Collect the start characters of all entities`
			`var chars = Object.keys(map)`
			`.map(function (k) { return "\\" + map[k].charAt(0); })`
			`.filter(function (v) { return !nonAscii \|\| v.charCodeAt(1) < 128; })`
			`.sort(function (a, b) { return a.charCodeAt(1) - b.charCodeAt(1); })`
			`// Remove duplicates`
			`.filter(function (v, i, a) { return v !== a[i + 1]; });`
			`// Add ranges to single characters.`
			`for (var start = 0; start < chars.length - 1; start++) {`
			`// Find the end of a run of characters`
			`var end = start;`
			`while (end < chars.length - 1 &&`
			`chars[end].charCodeAt(1) + 1 === chars[end + 1].charCodeAt(1)) {`
			`end += 1;`
			`}`
			`var count = 1 + end - start;`
			`// We want to replace at least three characters`
			`if (count < 3)`
			`continue;`
			`chars.splice(start, count, chars[start] + "-" + chars[end]);`
			`}`
			`return new RegExp("[" + chars.join("") + (nonAscii ? "\\x80-\\uFFFF" : "") + "]", "g");`
			`}`
			`/**`
			`* Encodes all non-ASCII characters, as well as characters not valid in XML`
			* documents using numeric hexadecimal reference (eg. `ü`).
			`*`
			* Have a look at `escapeUTF8` if you want a more concise output at the expense
			`* of reduced transportability.`
			`*`
			`* @param data String to escape.`
			`*/`
			`exports.escape = encodeXML;`
			`/**`
			`* Encodes all characters not valid in XML documents using XML entities.`
			`*`
			`* Note that the output will be character-set dependent.`
			`*`
			`* @param data String to escape.`
			`*/`
			`function escapeUTF8(data) {`
			`var match;`
			`var lastIdx = 0;`
			`var result = "";`
			`while ((match = xmlInvalidChars.exec(data))) {`
			`if (lastIdx !== match.index) {`
			`result += data.substring(lastIdx, match.index);`
			`}`
			// We know that this chararcter will be in `inverseXML`
			`result += xmlCodeMap.get(match[0].charCodeAt(0));`
			`// Every match will be of length 1`
			`lastIdx = match.index + 1;`
			`}`
			`return result + data.substring(lastIdx);`
			`}`
			`exports.escapeUTF8 = escapeUTF8;`