fatsify核心功能示例测试!!!

2025-09-21 14:50:41 +08:00
commit 9145aea047
1958 changed files with 230098 additions and 0 deletions
--- a/node_modules/ret/dist/tokenizer.js
+++ b/node_modules/ret/dist/tokenizer.js
@@ -0,0 +1,361 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || function (mod) {
+    if (mod && mod.__esModule) return mod;
+    var result = {};
+    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
+    __setModuleDefault(result, mod);
+    return result;
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.tokenizer = void 0;
+const util = __importStar(require("./util"));
+const types_1 = require("./types");
+const sets = __importStar(require("./sets"));
+/**
+ * Valid opening characters for capture group names.
+ */
+const captureGroupFirstChar = /^[a-zA-Z_$]$/i;
+/**
+ * Valid characters for capture group names.
+ */
+const captureGroupChars = /^[a-zA-Z0-9_$]$/i;
+const digit = /\d/;
+/**
+ * Tokenizes a regular expression (that is currently a string)
+ * @param {string} regexpStr String of regular expression to be tokenized
+ *
+ * @returns {Root}
+ */
+exports.tokenizer = (regexpStr) => {
+    let i = 0, c;
+    let start = { type: types_1.types.ROOT, stack: [] };
+    // Keep track of last clause/group and stack.
+    let lastGroup = start;
+    let last = start.stack;
+    let groupStack = [];
+    let referenceQueue = [];
+    let groupCount = 0;
+    const repeatErr = (col) => {
+        throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Nothing to repeat at column ${col - 1}`);
+    };
+    // Decode a few escaped characters.
+    let str = util.strToChars(regexpStr);
+    // Iterate through each character in string.
+    while (i < str.length) {
+        switch (c = str[i++]) {
+            // Handle escaped characters, inclues a few sets.
+            case '\\':
+                if (i === str.length) {
+                    throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: \\ at end of pattern`);
+                }
+                switch (c = str[i++]) {
+                    case 'b':
+                        last.push({ type: types_1.types.POSITION, value: 'b' });
+                        break;
+                    case 'B':
+                        last.push({ type: types_1.types.POSITION, value: 'B' });
+                        break;
+                    case 'w':
+                        last.push(sets.words());
+                        break;
+                    case 'W':
+                        last.push(sets.notWords());
+                        break;
+                    case 'd':
+                        last.push(sets.ints());
+                        break;
+                    case 'D':
+                        last.push(sets.notInts());
+                        break;
+                    case 's':
+                        last.push(sets.whitespace());
+                        break;
+                    case 'S':
+                        last.push(sets.notWhitespace());
+                        break;
+                    default:
+                        // Check if c is integer.
+                        // In which case it's a reference.
+                        if (digit.test(c)) {
+                            let digits = c;
+                            while (i < str.length && digit.test(str[i])) {
+                                digits += str[i++];
+                            }
+                            let value = parseInt(digits, 10);
+                            const reference = { type: types_1.types.REFERENCE, value };
+                            last.push(reference);
+                            referenceQueue.push({ reference, stack: last, index: last.length - 1 });
+                            // Escaped character.
+                        }
+                        else {
+                            last.push({ type: types_1.types.CHAR, value: c.charCodeAt(0) });
+                        }
+                }
+                break;
+            // Positionals.
+            case '^':
+                last.push({ type: types_1.types.POSITION, value: '^' });
+                break;
+            case '$':
+                last.push({ type: types_1.types.POSITION, value: '$' });
+                break;
+            // Handle custom sets.
+            case '[': {
+                // Check if this class is 'anti' i.e. [^abc].
+                let not;
+                if (str[i] === '^') {
+                    not = true;
+                    i++;
+                }
+                else {
+                    not = false;
+                }
+                // Get all the characters in class.
+                let classTokens = util.tokenizeClass(str.slice(i), regexpStr);
+                // Increase index by length of class.
+                i += classTokens[1];
+                last.push({
+                    type: types_1.types.SET,
+                    set: classTokens[0],
+                    not,
+                });
+                break;
+            }
+            // Class of any character except \n.
+            case '.':
+                last.push(sets.anyChar());
+                break;
+            // Push group onto stack.
+            case '(': {
+                // Create group.
+                let group = {
+                    type: types_1.types.GROUP,
+                    stack: [],
+                    remember: true,
+                };
+                // If this is a special kind of group.
+                if (str[i] === '?') {
+                    c = str[i + 1];
+                    i += 2;
+                    // Match if followed by.
+                    if (c === '=') {
+                        group.followedBy = true;
+                        group.remember = false;
+                        // Match if not followed by.
+                    }
+                    else if (c === '!') {
+                        group.notFollowedBy = true;
+                        group.remember = false;
+                    }
+                    else if (c === '<') {
+                        let name = '';
+                        if (captureGroupFirstChar.test(str[i])) {
+                            name += str[i];
+                            i++;
+                        }
+                        else {
+                            throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid capture group name, character '${str[i]}'` +
+                                ` after '<' at column ${i + 1}`);
+                        }
+                        while (i < str.length && captureGroupChars.test(str[i])) {
+                            name += str[i];
+                            i++;
+                        }
+                        if (!name) {
+                            throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid capture group name, character '${str[i]}'` +
+                                ` after '<' at column ${i + 1}`);
+                        }
+                        if (str[i] !== '>') {
+                            throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unclosed capture group name, expected '>', found` +
+                                ` '${str[i]}' at column ${i + 1}`);
+                        }
+                        group.name = name;
+                        i++;
+                    }
+                    else if (c === ':') {
+                        group.remember = false;
+                    }
+                    else {
+                        throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Invalid group, character '${c}'` +
+                            ` after '?' at column ${i - 1}`);
+                    }
+                }
+                else {
+                    groupCount += 1;
+                }
+                // Insert subgroup into current group stack.
+                last.push(group);
+                // Remember the current group for when the group closes.
+                groupStack.push(lastGroup);
+                // Make this new group the current group.
+                lastGroup = group;
+                last = group.stack;
+                break;
+            }
+            // Pop group out of stack.
+            case ')':
+                if (groupStack.length === 0) {
+                    throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unmatched ) at column ${i - 1}`);
+                }
+                lastGroup = groupStack.pop();
+                // Check if this group has a PIPE.
+                // To get back the correct last stack.
+                last = lastGroup.options ?
+                    lastGroup.options[lastGroup.options.length - 1] :
+                    lastGroup.stack;
+                break;
+            // Use pipe character to give more choices.
+            case '|': {
+                // Create array where options are if this is the first PIPE
+                // in this clause.
+                if (!lastGroup.options) {
+                    lastGroup.options = [lastGroup.stack];
+                    delete lastGroup.stack;
+                }
+                // Create a new stack and add to options for rest of clause.
+                let stack = [];
+                lastGroup.options.push(stack);
+                last = stack;
+                break;
+            }
+            // Repetition.
+            // For every repetition, remove last element from last stack
+            // then insert back a RANGE object.
+            // This design is chosen because there could be more than
+            // one repetition symbols in a regex i.e. `a?+{2,3}`.
+            case '{': {
+                let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
+                if (rs !== null) {
+                    if (last.length === 0) {
+                        repeatErr(i);
+                    }
+                    min = parseInt(rs[1], 10);
+                    max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
+                    i += rs[0].length;
+                    last.push({
+                        type: types_1.types.REPETITION,
+                        min,
+                        max,
+                        value: last.pop(),
+                    });
+                }
+                else {
+                    last.push({
+                        type: types_1.types.CHAR,
+                        value: 123,
+                    });
+                }
+                break;
+            }
+            case '?':
+                if (last.length === 0) {
+                    repeatErr(i);
+                }
+                last.push({
+                    type: types_1.types.REPETITION,
+                    min: 0,
+                    max: 1,
+                    value: last.pop(),
+                });
+                break;
+            case '+':
+                if (last.length === 0) {
+                    repeatErr(i);
+                }
+                last.push({
+                    type: types_1.types.REPETITION,
+                    min: 1,
+                    max: Infinity,
+                    value: last.pop(),
+                });
+                break;
+            case '*':
+                if (last.length === 0) {
+                    repeatErr(i);
+                }
+                last.push({
+                    type: types_1.types.REPETITION,
+                    min: 0,
+                    max: Infinity,
+                    value: last.pop(),
+                });
+                break;
+            // Default is a character that is not `\[](){}?+*^$`.
+            default:
+                last.push({
+                    type: types_1.types.CHAR,
+                    value: c.charCodeAt(0),
+                });
+        }
+    }
+    // Check if any groups have not been closed.
+    if (groupStack.length !== 0) {
+        throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated group`);
+    }
+    updateReferences(referenceQueue, groupCount);
+    return start;
+};
+/**
+ * This is a side effecting function that changes references to chars
+ * if there are not enough capturing groups to reference
+ * See: https://github.com/fent/ret.js/pull/39#issuecomment-1006475703
+ * See: https://github.com/fent/ret.js/issues/38
+ * @param {(Reference | Char)[]} referenceQueue
+ * @param {number} groupCount
+ * @returns {void}
+ */
+function updateReferences(referenceQueue, groupCount) {
+    // Note: We go through the queue in reverse order so
+    // that index we use is correct even if we have to add
+    // multiple tokens to one stack
+    for (const elem of referenceQueue.reverse()) {
+        if (groupCount < elem.reference.value) {
+            // If there is nothing to reference then turn this into a char token
+            elem.reference.type = types_1.types.CHAR;
+            const valueString = elem.reference.value.toString();
+            elem.reference.value = parseInt(valueString, 8);
+            // If the number is not octal then we need to create multiple tokens
+            // https://github.com/fent/ret.js/pull/39#issuecomment-1008229226
+            if (!/^[0-7]+$/.test(valueString)) {
+                let i = 0;
+                while (valueString[i] !== '8' && valueString[i] !== '9') {
+                    i += 1;
+                }
+                if (i === 0) {
+                    // Handling case when escaped number starts with 8 or 9
+                    elem.reference.value = valueString.charCodeAt(0);
+                    i += 1;
+                }
+                else {
+                    // If the escaped number does not start with 8 or 9, then all
+                    // 0-7 digits before the first 8/9 form the first character code
+                    // see: https://github.com/fent/ret.js/pull/39#discussion_r780747085
+                    elem.reference.value = parseInt(valueString.slice(0, i), 8);
+                }
+                if (valueString.length > i) {
+                    const tail = elem.stack.splice(elem.index + 1);
+                    for (const char of valueString.slice(i)) {
+                        elem.stack.push({
+                            type: types_1.types.CHAR,
+                            value: char.charCodeAt(0),
+                        });
+                    }
+                    elem.stack.push(...tail);
+                }
+            }
+        }
+    }
+}
+//# sourceMappingURL=tokenizer.js.map