Slight improvements on lexer & first tests

2025-06-05 21:59:27 +02:00 · 2024-07-28 03:39:07 +02:00
parent dd8537fa18
commit 1f1bd4427b
2 changed files with 213 additions and 14 deletions
--- a/public/scripts/macros/MacroLexer.js
+++ b/public/scripts/macros/MacroLexer.js
@@ -17,27 +17,26 @@ const Tokens = {
    // General macro capture
    Macro: {
        Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
        // Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
        // We need this, because this token is going to switch lexer mode, while the general identifier does not.
        Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
        // CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
        End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
    },
    // Captures that only appear inside arguments
    Args: {
    },
    // All tokens that can be captured inside a macro
        DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
        Colon: createToken({ name: 'Colon', pattern: /:/ }),
        Equals: createToken({ name: 'Equals', pattern: /=/ }),
        Quote: createToken({ name: 'Quote', pattern: /"/ }),
-    Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
+    },
    WhiteSpace: createToken({
        name: 'WhiteSpace',
        pattern: /\s+/,
        group: Lexer.SKIPPED,
    }),
    // All tokens that can be captured inside a macro
    Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
    WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),
    // Capture unknown characters one by one, to still allow other tokens being matched once they are there
    Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
    // TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
@@ -69,10 +68,10 @@ const Def = {
            // Macro args allow nested macros
            enter(Tokens.Macro.Start, modes.macro_def),
-            using(Tokens.DoubleColon),
+            using(Tokens.Args.DoubleColon),
-            using(Tokens.Colon),
+            using(Tokens.Args.Colon),
-            using(Tokens.Equals),
+            using(Tokens.Args.Equals),
-            using(Tokens.Quote),
+            using(Tokens.Args.Quote),
            using(Tokens.Identifier),
            using(Tokens.WhiteSpace),
@@ -134,6 +133,7 @@ instance = MacroLexer.instance;
 * @returns {TokenType} The token again
 */
 function enter(token, mode) {
    if (!token) throw new Error('Token must not be undefined');
    if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
        throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
    }
@@ -155,6 +155,7 @@ function enter(token, mode) {
 * @returns {TokenType} The token again
 */
 function exits(token, mode) {
    if (!token) throw new Error('Token must not be undefined');
    token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
    return token;
 }
@@ -169,6 +170,7 @@ function exits(token, mode) {
 * @returns {TokenType} The token again
 */
 function using(token) {
    if (!token) throw new Error('Token must not be undefined');
    if (enterModesMap.has(token.name)) {
        throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
    }
--- a/tests/frontend/MacroLexer.test.js
+++ b/tests/frontend/MacroLexer.test.js
@@ -0,0 +1,197 @@
 /** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
 /** @typedef {{type: string, text: string}} TestableToken */
 describe("MacroLexer Tests", () => {
    beforeAll(async () => {
        await page.goto(global.ST_URL);
        await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
    });
    it("basic macro tokenization", async () => {
        const input = "Hello, {{user}}!";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'Plaintext', text: 'Hello, ' },
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'user' },
            { type: 'MacroEnd', text: '}}' },
            { type: 'Plaintext', text: '!' },
        ];
        // Compare the actual result with expected tokens
        expect(tokens).toEqual(expectedTokens);
    });
    it("should tokenize plaintext only", async () => {
        const input = "Just some text here.";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'Plaintext', text: 'Just some text here.' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle macro only", async () => {
        const input = "{{user}}";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'user' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle empty macro", async () => {
        const input = "{{}}";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle nested macros", async () => {
        const input = "{{outerMacro {{innerMacro}}}}";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'outerMacro' },
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'innerMacro' },
            { type: 'MacroEnd', text: '}}' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should tokenize macros with double colons arguments correctly", async () => {
        const input = "{{setvar::myVar::This is Sparta!}}";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'setvar' },
            { type: 'DoubleColon', text: '::' },
            { type: 'Identifier', text: 'myVar' },
            { type: 'DoubleColon', text: '::' },
            { type: 'Identifier', text: 'This' },
            { type: 'Identifier', text: 'is' },
            { type: 'Identifier', text: 'Sparta' },
            { type: 'Unknown', text: '!' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle named arguments with key=value syntax", async () => {
        const input = "{{doStuff key=MyValue another=AnotherValue}}";
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'doStuff' },
            { type: 'Identifier', text: 'key' },
            { type: 'Equals', text: '=' },
            { type: 'Identifier', text: 'MyValue' },
            { type: 'Identifier', text: 'another' },
            { type: 'Equals', text: '=' },
            { type: 'Identifier', text: 'AnotherValue' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle named arguments with quotation marks", async () => {
        const input = '{{getvar key="My variable"}}';
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'getvar' },
            { type: 'Identifier', text: 'key' },
            { type: 'Equals', text: '=' },
            { type: 'Quote', text: '"' },
            { type: 'Identifier', text: 'My' },
            { type: 'Identifier', text: 'variable' },
            { type: 'Quote', text: '"' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
    it("should handle multiple unnamed arguments in quotation marks", async () => {
        const input = '{{random "this" "and that" "and some more"}}';
        const tokens = await runLexerGetTokens(input);
        const expectedTokens = [
            { type: 'MacroStart', text: '{{' },
            { type: 'MacroIdentifier', text: 'random' },
            { type: 'Quote', text: '"' },
            { type: 'Identifier', text: 'this' },
            { type: 'Quote', text: '"' },
            { type: 'Quote', text: '"' },
            { type: 'Identifier', text: 'and' },
            { type: 'Identifier', text: 'that' },
            { type: 'Quote', text: '"' },
            { type: 'Quote', text: '"' },
            { type: 'Identifier', text: 'and' },
            { type: 'Identifier', text: 'some' },
            { type: 'Identifier', text: 'more' },
            { type: 'Quote', text: '"' },
            { type: 'MacroEnd', text: '}}' }
        ];
        expect(tokens).toEqual(expectedTokens);
    });
 });
 /**
 * Asynchronously runs the MacroLexer on the given input and returns the tokens.
 *
 * @param {string} input - The input string to be tokenized.
 * @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
 */
 async function runLexerGetTokens(input) {
    const result = await page.evaluate(async (input) => {
        /** @type {import('../../public/scripts/macros/MacroLexer.js')} */
        const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
        const result = MacroLexer.tokenize(input);
        return result;
    }, input);
    const tokens = getTestableTokens(result);
    return tokens;
 }
 /**
 *
 * @param {ILexingResult} result The result from the lexer
 * @returns {TestableToken[]} The tokens
 */
 function getTestableTokens(result) {
    return result.tokens
        // Filter out the mode popper. We don't care aobut that for testing
        .filter(token => token.tokenType.name !== 'EndMode')
        // Extract relevant properties from tokens for comparison
        .map(token => ({
            type: token.tokenType.name,
            text: token.image
        }));
 }