Slight improvements on lexer & first tests

2025-06-05 21:59:27 +02:00 · 2024-07-28 03:39:07 +02:00
parent dd8537fa18
commit 1f1bd4427b
2 changed files with 213 additions and 14 deletions
--- a/public/scripts/macros/MacroLexer.js
+++ b/public/scripts/macros/MacroLexer.js
@@ -17,27 +17,26 @@ const Tokens = {
    // General macro capture
    Macro: {
        Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
+        // Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
+        // We need this, because this token is going to switch lexer mode, while the general identifier does not.
        Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
        // CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
        End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
    },

+    // Captures that only appear inside arguments
    Args: {
-
+        DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
+        Colon: createToken({ name: 'Colon', pattern: /:/ }),
+        Equals: createToken({ name: 'Equals', pattern: /=/ }),
+        Quote: createToken({ name: 'Quote', pattern: /"/ }),
    },

    // All tokens that can be captured inside a macro
-    DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
-    Colon: createToken({ name: 'Colon', pattern: /:/ }),
-    Equals: createToken({ name: 'Equals', pattern: /=/ }),
-    Quote: createToken({ name: 'Quote', pattern: /"/ }),
    Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
-    WhiteSpace: createToken({
-        name: 'WhiteSpace',
-        pattern: /\s+/,
-        group: Lexer.SKIPPED,
-    }),
+    WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),

+    // Capture unknown characters one by one, to still allow other tokens being matched once they are there
    Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),

    // TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
@@ -69,10 +68,10 @@ const Def = {
            // Macro args allow nested macros
            enter(Tokens.Macro.Start, modes.macro_def),

-            using(Tokens.DoubleColon),
-            using(Tokens.Colon),
-            using(Tokens.Equals),
-            using(Tokens.Quote),
+            using(Tokens.Args.DoubleColon),
+            using(Tokens.Args.Colon),
+            using(Tokens.Args.Equals),
+            using(Tokens.Args.Quote),
            using(Tokens.Identifier),

            using(Tokens.WhiteSpace),
@@ -134,6 +133,7 @@ instance = MacroLexer.instance;
 * @returns {TokenType} The token again
 */
 function enter(token, mode) {
+    if (!token) throw new Error('Token must not be undefined');
    if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
        throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
    }
@@ -155,6 +155,7 @@ function enter(token, mode) {
 * @returns {TokenType} The token again
 */
 function exits(token, mode) {
+    if (!token) throw new Error('Token must not be undefined');
    token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
    return token;
 }
@@ -169,6 +170,7 @@ function exits(token, mode) {
 * @returns {TokenType} The token again
 */
 function using(token) {
+    if (!token) throw new Error('Token must not be undefined');
    if (enterModesMap.has(token.name)) {
        throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
    }
--- a/tests/frontend/MacroLexer.test.js
+++ b/tests/frontend/MacroLexer.test.js
@@ -0,0 +1,197 @@
+/** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
+/** @typedef {{type: string, text: string}} TestableToken */
+
+describe("MacroLexer Tests", () => {
+    beforeAll(async () => {
+        await page.goto(global.ST_URL);
+        await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
+    });
+
+    it("basic macro tokenization", async () => {
+        const input = "Hello, {{user}}!";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'Plaintext', text: 'Hello, ' },
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'user' },
+            { type: 'MacroEnd', text: '}}' },
+            { type: 'Plaintext', text: '!' },
+        ];
+
+        // Compare the actual result with expected tokens
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should tokenize plaintext only", async () => {
+        const input = "Just some text here.";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'Plaintext', text: 'Just some text here.' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should handle macro only", async () => {
+        const input = "{{user}}";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'user' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should handle empty macro", async () => {
+        const input = "{{}}";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+
+    it("should handle nested macros", async () => {
+        const input = "{{outerMacro {{innerMacro}}}}";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'outerMacro' },
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'innerMacro' },
+            { type: 'MacroEnd', text: '}}' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should tokenize macros with double colons arguments correctly", async () => {
+        const input = "{{setvar::myVar::This is Sparta!}}";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'setvar' },
+            { type: 'DoubleColon', text: '::' },
+            { type: 'Identifier', text: 'myVar' },
+            { type: 'DoubleColon', text: '::' },
+            { type: 'Identifier', text: 'This' },
+            { type: 'Identifier', text: 'is' },
+            { type: 'Identifier', text: 'Sparta' },
+            { type: 'Unknown', text: '!' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should handle named arguments with key=value syntax", async () => {
+        const input = "{{doStuff key=MyValue another=AnotherValue}}";
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'doStuff' },
+            { type: 'Identifier', text: 'key' },
+            { type: 'Equals', text: '=' },
+            { type: 'Identifier', text: 'MyValue' },
+            { type: 'Identifier', text: 'another' },
+            { type: 'Equals', text: '=' },
+            { type: 'Identifier', text: 'AnotherValue' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should handle named arguments with quotation marks", async () => {
+        const input = '{{getvar key="My variable"}}';
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'getvar' },
+            { type: 'Identifier', text: 'key' },
+            { type: 'Equals', text: '=' },
+            { type: 'Quote', text: '"' },
+            { type: 'Identifier', text: 'My' },
+            { type: 'Identifier', text: 'variable' },
+            { type: 'Quote', text: '"' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+    it("should handle multiple unnamed arguments in quotation marks", async () => {
+        const input = '{{random "this" "and that" "and some more"}}';
+        const tokens = await runLexerGetTokens(input);
+
+        const expectedTokens = [
+            { type: 'MacroStart', text: '{{' },
+            { type: 'MacroIdentifier', text: 'random' },
+            { type: 'Quote', text: '"' },
+            { type: 'Identifier', text: 'this' },
+            { type: 'Quote', text: '"' },
+            { type: 'Quote', text: '"' },
+            { type: 'Identifier', text: 'and' },
+            { type: 'Identifier', text: 'that' },
+            { type: 'Quote', text: '"' },
+            { type: 'Quote', text: '"' },
+            { type: 'Identifier', text: 'and' },
+            { type: 'Identifier', text: 'some' },
+            { type: 'Identifier', text: 'more' },
+            { type: 'Quote', text: '"' },
+            { type: 'MacroEnd', text: '}}' }
+        ];
+
+        expect(tokens).toEqual(expectedTokens);
+    });
+
+});
+
+/**
+ * Asynchronously runs the MacroLexer on the given input and returns the tokens.
+ *
+ * @param {string} input - The input string to be tokenized.
+ * @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
+ */
+async function runLexerGetTokens(input) {
+    const result = await page.evaluate(async (input) => {
+        /** @type {import('../../public/scripts/macros/MacroLexer.js')} */
+        const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
+
+        const result = MacroLexer.tokenize(input);
+        return result;
+    }, input);
+
+    const tokens = getTestableTokens(result);
+    return tokens;
+}
+
+/**
+ *
+ * @param {ILexingResult} result The result from the lexer
+ * @returns {TestableToken[]} The tokens
+ */
+function getTestableTokens(result) {
+    return result.tokens
+        // Filter out the mode popper. We don't care aobut that for testing
+        .filter(token => token.tokenType.name !== 'EndMode')
+        // Extract relevant properties from tokens for comparison
+        .map(token => ({
+            type: token.tokenType.name,
+            text: token.image
+        }));
+}