hexo/node_modules/js-tiktoken/dist/index.cjs

274 lines
3.1 MiB
JavaScript
Raw Normal View History

2024-05-09 16:35:20 +08:00
'use strict';
var base64 = require('base64-js');
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
var base64__default = /*#__PURE__*/_interopDefault(base64);
var __defProp = Object.defineProperty;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __publicField = (obj, key, value) => {
__defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
return value;
};
// src/ranks/gpt2.js
var gpt2_default = { "explicit_n_vocab": 50257, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRlcw==
// src/ranks/p50k_base.js
var p50k_base_default = { "explicit_n_vocab": 50281, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRl
// src/ranks/p50k_edit.js
var p50k_edit_default = { "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256, "<|fim_prefix|>": 50281, "<|fim_middle|>": 50282, "<|fim_suffix|>": 50283 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXV
// src/ranks/r50k_base.js
var r50k_base_default = { "explicit_n_vocab": 50257, "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 50256 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== IHQ= IGE= aGU= aW4= cmU= b24= IHRoZQ== ZXI= IHM= YXQ= IHc= IG8= ZW4= IGM= aXQ= aXM= YW4= b3I= ZXM= IGI= ZWQ= IGY= aW5n IHA= b3U= IGFu YWw= YXI= IHRv IG0= IG9m IGlu IGQ= IGg= IGFuZA== aWM= YXM= bGU= IHRo aW9u b20= bGw= ZW50 IG4= IGw= c3Q= IHJl dmU= IGU= cm8= bHk= IGJl IGc= IFQ= Y3Q= IFM= aWQ= b3Q= IEk= dXQ= ZXQ= IEE= IGlz IG9u aW0= YW0= b3c= YXk= YWQ= c2U= IHRoYXQ= IEM= aWc= IGZvcg== YWM= IHk= dmVy dXI= IHU= bGQ= IHN0 IE0= J3M= IGhl IGl0 YXRpb24= aXRo aXI= Y2U= IHlvdQ== aWw= IEI= IHdo b2w= IFA= IHdpdGg= IDE= dGVy Y2g= IGFz IHdl ICg= bmQ= aWxs IEQ= aWY= IDI= YWc= ZXJz a2U= ICI= IEg= ZW0= IGNvbg== IFc= IFI= aGVy IHdhcw== IHI= b2Q= IEY= dWw= YXRl IGF0 cmk= cHA= b3Jl IFRoZQ== IHNl dXM= IHBybw== IGhh dW0= IGFyZQ== IGRl YWlu YW5k IG9y aWdo ZXN0 aXN0 YWI= cm9t IE4= dGg= IGNvbQ== IEc= dW4= b3A= MDA= IEw= IG5vdA== ZXNz IGV4 IHY= cmVz IEU= ZXc= aXR5 YW50 IGJ5 ZWw= b3M= b3J0 b2M= cXU= IGZyb20= IGhhdmU= IHN1 aXZl b3VsZA== IHNo IHRoaXM= bnQ= cmE= cGU= aWdodA== YXJ0 bWVudA== IGFs dXN0 ZW5k LS0= YWxs IE8= YWNr IGNo IGxl aWVz cmVk YXJk 4oA= b3V0 IEo= IGFi ZWFy aXY= YWxseQ== b3Vy b3N0 Z2g= cHQ= IHBs YXN0 IGNhbg== YWs= b21l dWQ= VGhl IGhpcw== IGRv IGdv IGhhcw== Z2U= J3Q= IFU= cm91 IHNh IGo= IGJ1dA== IHdvcg== IGFsbA== ZWN0 IGs= YW1l IHdpbGw= b2s= IHdoZQ== IHRoZXk= aWRl MDE= ZmY= aWNo cGw= dGhlcg== IHRy Li4= IGludA== aWU= dXJl YWdl IG5l aWFs YXA= aW5l aWNl IG1l IG91dA== YW5z b25l b25n aW9ucw== IHdobw== IEs= IHVw IHRoZWly IGFk IDM= IHVz YXRlZA== b3Vz IG1vcmU= dWU= b2c= IFN0 aW5k aWtl IHNv aW1l cGVy LiI= YmVy aXo= YWN0 IG9uZQ== IHNhaWQ= IC0= YXJl IHlvdXI= Y2M= IFRo IGNs ZXA= YWtl YWJsZQ== aXA= IGNvbnQ= IHdoaWNo aWE= IGlt IGFib3V0 IHdlcmU= dmVyeQ== dWI= IGhhZA== IGVu IGNvbXA= LCI= IElu IHVu IGFn aXJl YWNl YXU= YXJ5 IHdvdWxk YXNz cnk= IOKA Y2w= b29r ZXJl c28= IFY= aWdu aWI= IG9mZg== IHRl dmVu IFk= aWxl b3Nl aXRl b3Jt IDIwMQ== IHJlcw== IG1hbg== IHBlcg== IG90aGVy b3Jk dWx0 IGJlZW4= IGxpa2U= YXNl YW5jZQ== a3M= YXlz b3du ZW5jZQ== IGRpcw== Y3Rpb24= IGFueQ== IGFwcA== IHNw aW50 cmVzcw== YXRpb25z YWls IDQ= aWNhbA== IHRoZW0= IGhlcg== b3VudA== IENo IGFy IGlm IHRoZXJl IHBl IHllYXI= YXY= IG15 IHNvbWU= IHdoZW4= b3VnaA== YWNo IHRoYW4= cnU= b25k aWNr IG92ZXI= dmVs IHF1 Cgo= IHNj cmVhdA== cmVl IEl0 b3VuZA== cG9ydA== IGFsc28= IHBhcnQ= ZnRlcg== IGtu IGJlYw== IHRpbWU= ZW5z IDU= b3BsZQ== IHdoYXQ= IG5v ZHU= bWVy YW5n IG5ldw== LS0tLQ== IGdldA== b3J5 aXRpb24= aW5ncw== IGp1c3Q= IGludG8= IDA= ZW50cw== b3Zl dGU= IHBlb3BsZQ== IHByZQ== IGl0cw== IHJlYw== IHR3 aWFu aXJzdA== YXJr b3Jz IHdvcms= YWRl b2I= IHNoZQ== IG91cg== d24= aW5r bGlj IDE5 IEhl aXNo bmRlcg== YXVzZQ== IGhpbQ== b25z IFs= IHJv Zm9ybQ== aWxk YXRl
// src/ranks/cl100k_base.js
var cl100k_base_default = { "pat_str": "('s|'S|'t|'T|'re|'rE|'Re|'RE|'ve|'vE|'Ve|'VE|'m|'M|'ll|'lL|'Ll|'LL|'d|'D)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "special_tokens": { "<|endoftext|>": 100257, "<|fim_prefix|>": 100258, "<|fim_middle|>": 100259, "<|fim_suffix|>": 100260, "<|endofprompt|>": 100276 }, "bpe_ranks": "! 0 IQ== Ig== Iw== JA== JQ== Jg== Jw== KA== KQ== Kg== Kw== LA== LQ== Lg== Lw== MA== MQ== Mg== Mw== NA== NQ== Ng== Nw== OA== OQ== Og== Ow== PA== PQ== Pg== Pw== QA== QQ== Qg== Qw== RA== RQ== Rg== Rw== SA== SQ== Sg== Sw== TA== TQ== Tg== Tw== UA== UQ== Ug== Uw== VA== VQ== Vg== Vw== WA== WQ== Wg== Ww== XA== XQ== Xg== Xw== YA== YQ== Yg== Yw== ZA== ZQ== Zg== Zw== aA== aQ== ag== aw== bA== bQ== bg== bw== cA== cQ== cg== cw== dA== dQ== dg== dw== eA== eQ== eg== ew== fA== fQ== fg== oQ== og== ow== pA== pQ== pg== pw== qA== qQ== qg== qw== rA== rg== rw== sA== sQ== sg== sw== tA== tQ== tg== tw== uA== uQ== ug== uw== vA== vQ== vg== vw== wA== wQ== wg== ww== xA== xQ== xg== xw== yA== yQ== yg== yw== zA== zQ== zg== zw== 0A== 0Q== 0g== 0w== 1A== 1Q== 1g== 1w== 2A== 2Q== 2g== 2w== 3A== 3Q== 3g== 3w== 4A== 4Q== 4g== 4w== 5A== 5Q== 5g== 5w== 6A== 6Q== 6g== 6w== 7A== 7Q== 7g== 7w== 8A== 8Q== 8g== 8w== 9A== 9Q== 9g== 9w== +A== +Q== +g== +w== /A== /Q== /g== /w== AA== AQ== Ag== Aw== BA== BQ== Bg== Bw== CA== CQ== Cg== Cw== DA== DQ== Dg== Dw== EA== EQ== Eg== Ew== FA== FQ== Fg== Fw== GA== GQ== Gg== Gw== HA== HQ== Hg== Hw== IA== fw== gA== gQ== gg== gw== hA== hQ== hg== hw== iA== iQ== ig== iw== jA== jQ== jg== jw== kA== kQ== kg== kw== lA== lQ== lg== lw== mA== mQ== mg== mw== nA== nQ== ng== nw== oA== rQ== ICA= ICAgIA== aW4= IHQ= ICAgICAgICA= ZXI= ICAg b24= IGE= cmU= YXQ= c3Q= ZW4= b3I= IHRo Cgo= IGM= bGU= IHM= aXQ= YW4= YXI= YWw= IHRoZQ== Owo= IHA= IGY= b3U= ID0= aXM= ICAgICAgIA== aW5n ZXM= IHc= aW9u ZWQ= aWM= IGI= IGQ= ZXQ= IG0= IG8= CQk= cm8= YXM= ZWw= Y3Q= bmQ= IGlu IGg= ZW50 aWQ= IG4= YW0= ICAgICAgICAgICA= IHRv IHJl LS0= IHs= IG9m b20= KTsK aW0= DQo= ICg= aWw= Ly8= IGFuZA== dXI= c2U= IGw= ZXg= IFM= YWQ= ICI= Y2g= dXQ= aWY= Kio= IH0= ZW0= b2w= ICAgICAgICAgICAgICAgIA== dGg= KQo= IHsK IGc= aWc= aXY= LAo= Y2U= b2Q= IHY= YXRl IFQ= YWc= YXk= ICo= b3Q= dXM= IEM= IHN0 IEk= dW4= dWw= dWU= IEE= b3c= ICc= ZXc= IDw= YXRpb24= KCk= IGZvcg== YWI= b3J0 dW0= YW1l IGlz cGU= dHI= Y2s= 4oA= IHk= aXN0 LS0tLQ== LgoK aGU= IGU= bG8= IE0= IGJl ZXJz IG9u IGNvbg== YXA= dWI= IFA= ICAgICAgICAgICAgICAg YXNz aW50 Pgo= bHk= dXJu ICQ= OwoK YXY= cG9ydA== aXI= LT4= bnQ= Y3Rpb24= ZW5k IGRl MDA= aXRo b3V0 dHVybg== b3Vy ICAgICA= bGlj cmVz cHQ= PT0= IHRoaXM= IHdo IGlm IEQ= dmVy YWdl IEI= aHQ= ZXh0 PSI= IHRoYXQ= KioqKg== IFI= IGl0 ZXNz IEY= IHI= b3M= YW5k IGFz ZWN0 a2U= cm9t IC8v Y29u IEw= KCI= cXU= bGFzcw== IHdpdGg= aXo= ZGU= IE4= IGFs b3A= dXA= Z2V0 IH0K aWxl IGFu YXRh b3Jl cmk= IHBybw== Ow0K CQkJCQ== dGVy YWlu IFc= IEU= IGNvbQ== IHJldHVybg== YXJ0 IEg= YWNr aW1wb3J0 dWJsaWM= IG9y ZXN0 bWVudA== IEc= YWJsZQ== IC0= aW5l aWxs aW5k ZXJl Ojo= aXR5 ICs= IHRy ZWxm aWdodA== KCc= b3Jt dWx0 c3Ry Li4= Iiw= IHlvdQ== eXBl cGw= IG5ldw== IGo= ICAgICAgICAgICAgICAgICAgIA== IGZyb20= IGV4 IE8= MjA= bGQ= IFs= b2M= Ogo= IHNl IGxl LS0tLS0tLS0= LnM= ewo= Jyw= YW50 IGF0 YXNl LmM= IGNo PC8= YXZl YW5n IGFyZQ== IGludA== 4oCZ X3Q= ZXJ0 aWFs YWN0 fQo= aXZl b2Rl b3N0 IGNsYXNz IG5vdA== b2c= b3Jk YWx1ZQ== YWxs ZmY= KCk7Cg== b250 aW1l YXJl IFU= IHBy IDo= aWVz aXpl dXJl IGJ5 aXJl IH0KCg== LnA= IHNo aWNl YXN0 cHRpb24= dHJpbmc= b2s= X18= Y2w= IyM= IGhl YXJk KS4= IEA= aWV3 CQkJ IHdhcw== aXA= dGhpcw== IHU= IFRoZQ== aWRl YWNl aWI= YWM= cm91 IHdl amVjdA== IHB1YmxpYw== YWs= dmU= YXRo b2lk ID0+ dXN0 cXVl IHJlcw== KSk= J3M= IGs= YW5z eXN0 dW5jdGlvbg== KioqKioqKio= IGk= IHVz cHA= MTA= b25l YWls PT09PQ== bmFtZQ== IHN0cg== IC8= ICY= YWNo ZGl2 eXN0ZW0= ZWxs IGhhdmU= ZXJy b3VsZA== dWxs cG9u IEo= X3A= ID09 aWdu U3Q= Lgo= IHBs KTsKCg== Zm9ybQ== cHV0 b3VudA== fQoK ZGQ= aXRl IGdldA== cnI= b21l IOKA YXJhbQ== Y2M= ICov RVI= SW4= bGVz X3M= b25n aWU= IGNhbg== IFY= ZXJ2 cHI= IHVu cm93 YmVy IGRv bGw= IGVs IHNlbGY= YXRlZA== YXJ5 IC4= J10= dWQ= IGVu IFRo ICAgICAgICAgICAgICAgIC
// src/core.ts
function bytePairMerge(piece, ranks) {
let parts = Array.from(
{ length: piece.length },
(_, i) => ({ start: i, end: i + 1 })
);
while (parts.length > 1) {
let minRank = null;
for (let i = 0; i < parts.length - 1; i++) {
const slice = piece.slice(parts[i].start, parts[i + 1].end);
const rank = ranks.get(slice.join(","));
if (rank == null)
continue;
if (minRank == null || rank < minRank[0]) {
minRank = [rank, i];
}
}
if (minRank != null) {
const i = minRank[1];
parts[i] = { start: parts[i].start, end: parts[i + 1].end };
parts.splice(i + 1, 1);
} else {
break;
}
}
return parts;
}
function bytePairEncode(piece, ranks) {
if (piece.length === 1)
return [ranks.get(piece.join(","))];
return bytePairMerge(piece, ranks).map((p) => ranks.get(piece.slice(p.start, p.end).join(","))).filter((x) => x != null);
}
function escapeRegex(str) {
return str.replace(/[\\^$*+?.()|[\]{}]/g, "\\$&");
}
var _Tiktoken = class {
/** @internal */
specialTokens;
/** @internal */
inverseSpecialTokens;
/** @internal */
patStr;
/** @internal */
textEncoder = new TextEncoder();
/** @internal */
textDecoder = new TextDecoder("utf-8");
/** @internal */
rankMap = /* @__PURE__ */ new Map();
/** @internal */
textMap = /* @__PURE__ */ new Map();
constructor(ranks, extendedSpecialTokens) {
this.patStr = ranks.pat_str;
const uncompressed = ranks.bpe_ranks.split("\n").filter(Boolean).reduce((memo, x) => {
const [_, offsetStr, ...tokens] = x.split(" ");
const offset = Number.parseInt(offsetStr, 10);
tokens.forEach((token, i) => memo[token] = offset + i);
return memo;
}, {});
for (const [token, rank] of Object.entries(uncompressed)) {
const bytes = base64__default.default.toByteArray(token);
this.rankMap.set(bytes.join(","), rank);
this.textMap.set(rank, bytes);
}
this.specialTokens = { ...ranks.special_tokens, ...extendedSpecialTokens };
this.inverseSpecialTokens = Object.entries(this.specialTokens).reduce((memo, [text, rank]) => {
memo[rank] = this.textEncoder.encode(text);
return memo;
}, {});
}
encode(text, allowedSpecial = [], disallowedSpecial = "all") {
const regexes = new RegExp(this.patStr, "ug");
const specialRegex = _Tiktoken.specialTokenRegex(
Object.keys(this.specialTokens)
);
const ret = [];
const allowedSpecialSet = new Set(
allowedSpecial === "all" ? Object.keys(this.specialTokens) : allowedSpecial
);
const disallowedSpecialSet = new Set(
disallowedSpecial === "all" ? Object.keys(this.specialTokens).filter(
(x) => !allowedSpecialSet.has(x)
) : disallowedSpecial
);
if (disallowedSpecialSet.size > 0) {
const disallowedSpecialRegex = _Tiktoken.specialTokenRegex([
...disallowedSpecialSet
]);
const specialMatch = text.match(disallowedSpecialRegex);
if (specialMatch != null) {
throw new Error(
`The text contains a special token that is not allowed: ${specialMatch[0]}`
);
}
}
let start = 0;
while (true) {
let nextSpecial = null;
let startFind = start;
while (true) {
specialRegex.lastIndex = startFind;
nextSpecial = specialRegex.exec(text);
if (nextSpecial == null || allowedSpecialSet.has(nextSpecial[0]))
break;
startFind = nextSpecial.index + 1;
}
const end = nextSpecial?.index ?? text.length;
for (const match of text.substring(start, end).matchAll(regexes)) {
const piece = this.textEncoder.encode(match[0]);
const token2 = this.rankMap.get(piece.join(","));
if (token2 != null) {
ret.push(token2);
continue;
}
ret.push(...bytePairEncode(piece, this.rankMap));
}
if (nextSpecial == null)
break;
let token = this.specialTokens[nextSpecial[0]];
ret.push(token);
start = nextSpecial.index + nextSpecial[0].length;
}
return ret;
}
decode(tokens) {
const res = [];
let length = 0;
for (let i2 = 0; i2 < tokens.length; ++i2) {
const token = tokens[i2];
const bytes = this.textMap.get(token) ?? this.inverseSpecialTokens[token];
if (bytes != null) {
res.push(bytes);
length += bytes.length;
}
}
const mergedArray = new Uint8Array(length);
let i = 0;
for (const bytes of res) {
mergedArray.set(bytes, i);
i += bytes.length;
}
return this.textDecoder.decode(mergedArray);
}
};
var Tiktoken = _Tiktoken;
__publicField(Tiktoken, "specialTokenRegex", (tokens) => {
return new RegExp(tokens.map((i) => escapeRegex(i)).join("|"), "g");
});
function getEncodingNameForModel(model) {
switch (model) {
case "gpt2": {
return "gpt2";
}
case "code-cushman-001":
case "code-cushman-002":
case "code-davinci-001":
case "code-davinci-002":
case "cushman-codex":
case "davinci-codex":
case "davinci-002":
case "text-davinci-002":
case "text-davinci-003": {
return "p50k_base";
}
case "code-davinci-edit-001":
case "text-davinci-edit-001": {
return "p50k_edit";
}
case "ada":
case "babbage":
case "babbage-002":
case "code-search-ada-code-001":
case "code-search-babbage-code-001":
case "curie":
case "davinci":
case "text-ada-001":
case "text-babbage-001":
case "text-curie-001":
case "text-davinci-001":
case "text-search-ada-doc-001":
case "text-search-babbage-doc-001":
case "text-search-curie-doc-001":
case "text-search-davinci-doc-001":
case "text-similarity-ada-001":
case "text-similarity-babbage-001":
case "text-similarity-curie-001":
case "text-similarity-davinci-001": {
return "r50k_base";
}
case "gpt-3.5-turbo-instruct-0914":
case "gpt-3.5-turbo-instruct":
case "gpt-3.5-turbo-16k-0613":
case "gpt-3.5-turbo-16k":
case "gpt-3.5-turbo-0613":
case "gpt-3.5-turbo-0301":
case "gpt-3.5-turbo":
case "gpt-4-32k-0613":
case "gpt-4-32k-0314":
case "gpt-4-32k":
case "gpt-4-0613":
case "gpt-4-0314":
case "gpt-4":
case "gpt-3.5-turbo-1106":
case "gpt-35-turbo":
case "gpt-4-1106-preview":
case "gpt-4-vision-preview":
case "gpt-3.5-turbo-0125":
case "gpt-4-turbo":
case "gpt-4-turbo-2024-04-09":
case "gpt-4-turbo-preview":
case "gpt-4-0125-preview":
case "text-embedding-ada-002": {
return "cl100k_base";
}
default:
throw new Error("Unknown model");
}
}
// src/index.ts
function getEncoding(encoding, extendSpecialTokens) {
switch (encoding) {
case "gpt2":
return new Tiktoken(gpt2_default, extendSpecialTokens);
case "r50k_base":
return new Tiktoken(r50k_base_default, extendSpecialTokens);
case "p50k_base":
return new Tiktoken(p50k_base_default, extendSpecialTokens);
case "p50k_edit":
return new Tiktoken(p50k_edit_default, extendSpecialTokens);
case "cl100k_base":
return new Tiktoken(cl100k_base_default, extendSpecialTokens);
default:
throw new Error("Unknown encoding");
}
}
function encodingForModel(model, extendSpecialTokens) {
return getEncoding(getEncodingNameForModel(model), extendSpecialTokens);
}
exports.Tiktoken = Tiktoken;
exports.encodingForModel = encodingForModel;
exports.getEncoding = getEncoding;
exports.getEncodingNameForModel = getEncodingNameForModel;