More nested macro tests

Add error case tests to enforce macro start position requirements Include nested macro parsing scenarios and invalid syntax checks Ensures parser correctly handles edge cases with embedded macros
Improve macro argument parsing to allow colons in values
2025-06-05 21:59:27 +02:00 · 2025-03-20 02:49:17 +01:00 · 2025-03-20 02:25:07 +01:00 · 2025-03-17 00:12:04 +01:00 · 2025-03-08 01:26:15 +01:00 · 2025-03-07 22:42:44 +01:00
12 changed files with 4859 additions and 2 deletions
--- a/.github/readme.md
+++ b/.github/readme.md
@@ -390,6 +390,7 @@ GNU Affero General Public License for more details.**
 * Portions of CncAnon's TavernAITurbo mod used with permission
 * Visual Novel Mode inspired by the work of PepperTaco (<https://github.com/peppertaco/Tavern/>)
 * Noto Sans font by Google (OFL license)
+* Lexer/Parser by Chevrotain (Apache-2.0 license) <https://github.com/chevrotain/chevrotain>
 * Icon theme by Font Awesome <https://fontawesome.com> (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
 * Default content by @OtisAlejandro (Seraphina character and lorebook) and @kallmeflocc (10K Discord Users Celebratory Background)
 * Docker guide by [@mrguymiah](https://github.com/mrguymiah) and [@Bronya-Rand](https://github.com/Bronya-Rand)
--- a/public/lib/chevrotain.d.ts
+++ b/public/lib/chevrotain.d.ts
--- a/public/lib/chevrotain.js
+++ b/public/lib/chevrotain.js
--- a/public/scripts/macros/MacroEngine.js
+++ b/public/scripts/macros/MacroEngine.js
@@ -0,0 +1,39 @@
+import { MacroLexer } from './MacroLexer.js';
+import { MacroParser } from './MacroParser.js';
+
+class MacroEngine {
+    static instance = new MacroEngine();
+
+    constructor() {
+        this.parser = MacroParser;
+    }
+
+    parseDocument(input) {
+        const lexingResult = MacroLexer.tokenize(input);
+        this.parser.input = lexingResult.tokens;
+        // const cst = this.parser.document();
+        // return cst;
+    }
+
+    evaluate(input) {
+        const lexingResult = MacroLexer.tokenize(input);
+        this.parser.input = lexingResult.tokens;
+        // const cst = this.parser.macro();
+
+        // if (this.parser.errors.length > 0) {
+        //     throw new Error('Parsing errors detected');
+        // }
+
+        // return this.execute(cst);
+    }
+
+    execute(cstNode) {
+        // Implement execution logic here, traversing the CST and replacing macros with their values
+        // For now, we'll just return a placeholder result
+        return 'Executed Macro';
+    }
+}
+
+const macroEngineInstance = MacroEngine.instance;
+
+export { MacroEngine, macroEngineInstance };
--- a/public/scripts/macros/MacroLexer.js
+++ b/public/scripts/macros/MacroLexer.js
@@ -0,0 +1,226 @@
+import { createToken, Lexer } from '../../lib/chevrotain.js';
+
+/** @typedef {import('../../lib/chevrotain.js').TokenType} TokenType */
+
+/** @enum {string} */
+const modes = {
+    plaintext: 'plaintext_mode',
+    macro_def: 'macro_def_mode',
+    macro_identifier_end: 'macro_identifier_end_mode',
+    macro_args: 'macro_args_mode',
+    macro_filter_modifer: 'macro_filter_modifer_mode',
+    macro_filter_modifier_end: 'macro_filter_modifier_end_mode',
+};
+
+/** @readonly */
+const Tokens = {
+    // General capture-all plaintext without macros
+    Plaintext: createToken({ name: 'Plaintext', pattern: /(.+?)(?=\{\{)|(.+)/, line_breaks: true }), // Match everything up till opening brackets. Or to the end.
+
+    // General macro capture
+    Macro: {
+        Start: createToken({ name: 'Macro.Start', pattern: /\{\{/ }),
+        // Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
+        // We need this, because this token is going to switch lexer mode, while the general identifier does not.
+        Flags: createToken({ name: 'Macro.Flag', pattern: /[!?#~/.$]/ }),
+        Identifier: createToken({ name: 'Macro.Identifier', pattern: /[a-zA-Z][\w-_]*/ }),
+        // At the end of an identifier, there has to be whitspace, or must be directly followed by colon/double-colon separator, output modifier or closing braces
+        EndOfIdentifier: createToken({ name: 'Macro.EndOfIdentifier', pattern: /(?:\s+|(?=:{1,2})|(?=[|}]))/, group: Lexer.SKIPPED }),
+        BeforeEnd: createToken({ name: 'Macro.BeforeEnd', pattern: /(?=\}\})/, group: Lexer.SKIPPED }),
+        End: createToken({ name: 'Macro.End', pattern: /\}\}/ }),
+    },
+
+    // Captures that only appear inside arguments
+    Args: {
+        DoubleColon: createToken({ name: 'Args.DoubleColon', pattern: /::/ }),
+        Colon: createToken({ name: 'Args.Colon', pattern: /:/ }),
+        Equals: createToken({ name: 'Args.Equals', pattern: /=/ }),
+        Quote: createToken({ name: 'Args.Quote', pattern: /"/ }),
+    },
+
+    Filter: {
+        EscapedPipe: createToken({ name: 'Filter.EscapedPipe', pattern: /\\\|/ }),
+        Pipe: createToken({ name: 'Filter.Pipe', pattern: /\|/ }),
+        Identifier: createToken({ name: 'Filter.Identifier', pattern: /[a-zA-Z][\w-_]*/ }),
+        // At the end of an identifier, there has to be whitspace, or must be directly followed by colon/double-colon separator, output modifier or closing braces
+        EndOfIdentifier: createToken({ name: 'Filter.EndOfIdentifier', pattern: /(?:\s+|(?=:{1,2})|(?=[|}]))/, group: Lexer.SKIPPED }),
+    },
+
+    // All tokens that can be captured inside a macro
+    Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z][\w-_]*/ }),
+    WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),
+
+    // Capture unknown characters one by one, to still allow other tokens being matched once they are there
+    Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
+
+    // TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
+    Text: createToken({ name: 'Text', pattern: /.+(?=\}\}|\{\{)/, line_breaks: true }),
+
+    // DANGER ZONE: Careful with this token. This is used as a way to pop the current mode, if no other token matches.
+    // Can be used in modes that don't have a "defined" end really, like when capturing a single argument, argument list, etc.
+    // Has to ALWAYS be the last token.
+    ModePopper: createToken({ name: 'ModePopper', pattern: () => [''], line_breaks: false, group: Lexer.SKIPPED }),
+};
+
+/** @type {Map<string,string>} Saves all token definitions that are marked as entering modes */
+const enterModesMap = new Map();
+
+const Def = {
+    modes: {
+        [modes.plaintext]: [
+            enter(Tokens.Macro.Start, modes.macro_def),
+            using(Tokens.Plaintext),
+        ],
+        [modes.macro_def]: [
+            exits(Tokens.Macro.End, modes.macro_def),
+
+            using(Tokens.Macro.Flags),
+
+            // We allow whitspaces inbetween flags or in front of the modifier
+            using(Tokens.WhiteSpace),
+
+            // Inside a macro, we will match the identifier
+            // Enter 'macro_identifier_end' mode automatically at the end of the identifier, so we don't match more than one identifier
+            enter(Tokens.Macro.Identifier, modes.macro_identifier_end),
+        ],
+        [modes.macro_identifier_end]: [
+            // Valid options after a macro identifier: whitespace, colon/double-colon (captured), macro end braces, or output modifier pipe.
+            exits(Tokens.Macro.BeforeEnd, modes.macro_identifier_end),
+            enter(Tokens.Macro.EndOfIdentifier, modes.macro_args, { andExits: modes.macro_identifier_end }),
+        ],
+        [modes.macro_args]: [
+            // Macro args allow nested macros
+            enter(Tokens.Macro.Start, modes.macro_def),
+
+            // We allow escaped pipes to not start output modifiers. We need to capture this first, before the pipe
+            using(Tokens.Filter.EscapedPipe),
+
+            // If at any place during args writing there is a pipe, we lex it as an output identifier, and then continue with lex its args
+            enter(Tokens.Filter.Pipe, modes.macro_filter_modifer),
+
+            using(Tokens.Args.DoubleColon),
+            using(Tokens.Args.Colon),
+            using(Tokens.Args.Equals),
+            using(Tokens.Args.Quote),
+            using(Tokens.Identifier),
+
+            using(Tokens.WhiteSpace),
+
+            // Last fallback, before we need to exit the mode, as we might have characters we (wrongly) haven't defined yet
+            using(Tokens.Unknown),
+
+            // Args are optional, and we don't know how long, so exit the mode to be able to capture the actual macro end
+            exits(Tokens.ModePopper, modes.macro_args),
+        ],
+        [modes.macro_filter_modifer]: [
+            using(Tokens.WhiteSpace),
+
+            enter(Tokens.Filter.Identifier, modes.macro_filter_modifier_end, { andExits: modes.macro_filter_modifer }),
+        ],
+        [modes.macro_filter_modifier_end]: [
+            // Valid options after a filter itenfier: whitespace, colon/double-colon (captured), macro end braces, or output modifier pipe.
+            exits(Tokens.Macro.BeforeEnd, modes.macro_identifier_end),
+            exits(Tokens.Filter.EndOfIdentifier, modes.macro_filter_modifer),
+        ],
+    },
+    defaultMode: modes.plaintext,
+};
+
+/**
+ * The singleton instance of the MacroLexer.
+ *
+ * @type {MacroLexer}
+ */
+let instance;
+export { instance as MacroLexer };
+
+class MacroLexer extends Lexer {
+    /** @type {MacroLexer} */ static #instance;
+    /** @type {MacroLexer} */ static get instance() { return MacroLexer.#instance ?? (MacroLexer.#instance = new MacroLexer()); }
+
+    // Define the tokens
+    /** @readonly */ static tokens = Tokens;
+    /** @readonly */ static def = Def;
+    /** @readonly */ tokens = Tokens;
+    /** @readonly */ def = MacroLexer.def;
+
+    /** @private */
+    constructor() {
+        super(MacroLexer.def, {
+            traceInitPerf: true,
+        });
+    }
+
+    test(input) {
+        const result = this.tokenize(input);
+        return {
+            errors: result.errors,
+            groups: result.groups,
+            tokens: result.tokens.map(({ tokenType, ...rest }) => ({ type: tokenType.name, ...rest, tokenType: tokenType })),
+        };
+    }
+}
+
+instance = MacroLexer.instance;
+
+/**
+ * [Utility]
+ * Set push mode on the token definition.
+ * Can be used inside the token mode definition block.
+ *
+ * Marks the token to **enter** the following lexer mode.
+ *
+ * Optionally, you can specify the modes to exit when entering this mode.
+ *
+ * @param {TokenType} token - The token to modify
+ * @param {string} mode - The mode to set
+ * @param {object} [options={}] - Additional options
+ * @param {string?} [options.andExits=null] - The modes to exit when entering this mode
+ * @returns {TokenType} The token again
+ */
+function enter(token, mode, { andExits = null } = {}) {
+    if (!token) throw new Error('Token must not be undefined');
+    if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
+        throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
+    }
+
+    if (andExits) exits(token, andExits);
+
+    token.PUSH_MODE = mode;
+    enterModesMap.set(token.name, mode);
+    return token;
+}
+
+/**
+ * [Utility]
+ * Set pop mode on the token definition.
+ * Can be used inside the token mode definition block.
+ *
+ * Marks the token to **exit** the following lexer mode.
+ *
+ * @param {TokenType} token - The token to modify
+ * @param {string} mode - The mode to leave
+ * @returns {TokenType} The token again
+ */
+function exits(token, mode) {
+    if (!token) throw new Error('Token must not be undefined');
+    token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
+    return token;
+}
+
+/**
+ * [Utility]
+ * Can be used inside the token mode definition block.
+ *
+ * Marks the token to to just be used/consumed, and not exit or enter a mode.
+ *
+ * @param {TokenType} token - The token to modify
+ * @returns {TokenType} The token again
+ */
+function using(token) {
+    if (!token) throw new Error('Token must not be undefined');
+    if (enterModesMap.has(token.name)) {
+        throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
+    }
+    return token;
+}
--- a/public/scripts/macros/MacroParser.js
+++ b/public/scripts/macros/MacroParser.js
@@ -0,0 +1,76 @@
+import { CstParser } from '../../lib/chevrotain.js';
+import { MacroLexer } from './MacroLexer.js';
+
+/** @typedef {import('../../lib/chevrotain.js').TokenType} TokenType */
+
+/**
+ * The singleton instance of the MacroParser.
+ *
+ * @type {MacroParser}
+ */
+let instance;
+export { instance as MacroParser };
+
+class MacroParser extends CstParser {
+    /** @type {MacroParser} */ static #instance;
+    /** @type {MacroParser} */ static get instance() { return MacroParser.#instance ?? (MacroParser.#instance = new MacroParser()); }
+
+    /** @private */
+    constructor() {
+        super(MacroLexer.def, {
+            traceInitPerf: true,
+            nodeLocationTracking: 'full',
+        });
+        const Tokens = MacroLexer.tokens;
+
+        const $ = this;
+
+        // Basic Macro Structure
+        $.macro = $.RULE('macro', () => {
+            $.CONSUME(Tokens.Macro.Start);
+            $.CONSUME(Tokens.Macro.Identifier);
+            $.OPTION(() => $.SUBRULE($.arguments));
+            $.CONSUME(Tokens.Macro.End);
+        });
+
+        // Arguments Parsing
+        $.arguments = $.RULE('arguments', () => {
+            $.OR([
+                { ALT: () => $.CONSUME(Tokens.Args.DoubleColon, { LABEL: 'separator' }) },
+                { ALT: () => $.CONSUME(Tokens.Args.Colon, { LABEL: 'separator' }) },
+            ]);
+            $.AT_LEAST_ONE_SEP({
+                SEP: Tokens.Args.DoubleColon,
+                DEF: () => $.SUBRULE($.argument),
+            });
+        });
+
+        $.argument = $.RULE('argument', () => {
+            $.AT_LEAST_ONE(() => {
+                $.OR([
+                    { ALT: () => $.SUBRULE($.macro) }, // Nested Macros
+                    { ALT: () => $.CONSUME(Tokens.Identifier) },
+                    { ALT: () => $.CONSUME(Tokens.Unknown) },
+                    { ALT: () => $.CONSUME(Tokens.Args.Colon) },
+                ]);
+            });
+        });
+
+        this.performSelfAnalysis();
+    }
+
+    test(input) {
+        const lexingResult = MacroLexer.tokenize(input);
+        // "input" is a setter which will reset the parser's state.
+        this.input = lexingResult.tokens;
+        const cst = this.macro();
+
+        // For testing purposes we need to actually persist the error messages in the object,
+        // otherwise the test cases cannot read those, as they don't have access to the exception object type.
+        const errors = this.errors.map(x => ({ message: x.message, ...x, stack: x.stack }));
+
+        return { cst, errors: errors };
+    }
+}
+
+instance = MacroParser.instance;
--- a/public/scripts/st-context.js
+++ b/public/scripts/st-context.js
@@ -60,6 +60,9 @@ import { groups, openGroupChat, selected_group, unshallowGroupMembers } from './
 import { addLocaleData, getCurrentLocale, t, translate } from './i18n.js';
 import { hideLoader, showLoader } from './loader.js';
 import { MacrosParser } from './macros.js';
+import { MacroEngine } from './macros/MacroEngine.js';
+import { MacroLexer } from './macros/MacroLexer.js';
+import { MacroParser } from './macros/MacroParser.js';
 import { getChatCompletionModel, oai_settings } from './openai.js';
 import { callGenericPopup, Popup, POPUP_RESULT, POPUP_TYPE } from './popup.js';
 import { power_user, registerDebugFunction } from './power-user.js';
@@ -189,6 +192,11 @@ export function getContext() {
        humanizedDateTime,
        updateMessageBlock,
        appendMediaToMessage,
+        macros: {
+            MacroLexer,
+            MacroParser,
+            MacroEngine,
+        },
        variables: {
            local: {
                get: getLocalVariable,
--- a/tests/.eslintrc.cjs
+++ b/tests/.eslintrc.cjs
@@ -9,7 +9,7 @@ module.exports = {
    env: {
        es6: true,
        node: true,
-        "jest/globals": true,
+        'jest/globals': true,
    },
    parserOptions: {
        ecmaVersion: 'latest',
@@ -17,7 +17,16 @@ module.exports = {
    overrides: [
    ],
    ignorePatterns: [
+        '*.min.js',
+        'node_modules/**/*',
    ],
+    globals: {
+        browser: 'readonly',
+        page: 'readonly',
+        context: 'readonly',
+        puppeteerConfig: 'readonly',
+        jestPuppeteer: 'readonly',
+    },
    rules: {
        'no-unused-vars': ['error', { args: 'none' }],
        'no-control-regex': 'off',
@@ -33,5 +42,9 @@ module.exports = {
        'space-infix-ops': 'error',
        'no-unused-expressions': ['error', { allowShortCircuit: true, allowTernary: true }],
        'no-cond-assign': 'error',
+
+        // These rules should eventually be enabled.
+        'no-async-promise-executor': 'off',
+        'no-inner-declarations': 'off',
    },
 };
--- a/tests/frontend/MacroLexer.test.js
+++ b/tests/frontend/MacroLexer.test.js
--- a/tests/frontend/MacroParser.test.js
+++ b/tests/frontend/MacroParser.test.js
@@ -0,0 +1,402 @@
+/** @typedef {import('../../public/lib/chevrotain.js').CstNode} CstNode */
+/** @typedef {import('../../public/lib/chevrotain.js').IRecognitionException} IRecognitionException */
+
+/** @typedef {{[tokenName: string]: (string|string[]|TestableCstNode|TestableCstNode[])}} TestableCstNode */
+/** @typedef {{name: string, message: string}} TestableRecognitionException */
+
+// Those tests ar evaluating via puppeteer, the need more time to run and finish
+jest.setTimeout(10_000);
+
+describe('MacroParser', () => {
+    beforeAll(async () => {
+        await page.goto(global.ST_URL);
+        await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
+    });
+
+    describe('General Macro', () => {
+        // {{user}}
+        it('should parse a simple macro', async () => {
+            const input = '{{user}}';
+            const macroCst = await runParser(input);
+
+            const expectedCst = {
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'user',
+                'Macro.End': '}}',
+            };
+
+            expect(macroCst).toEqual(expectedCst);
+        });
+        // {{  user  }}
+        it('should generally handle whitespaces', async () => {
+            const input = '{{  user  }}';
+            const macroCst = await runParser(input);
+
+            const expectedCst = {
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'user',
+                'Macro.End': '}}',
+            };
+
+            expect(macroCst).toEqual(expectedCst);
+        });
+
+        describe('Error Cases (General Macro)', () => {
+            // {{}}
+            it('[Error] should throw an error for empty macro', async () => {
+                const input = '{{}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                const expectedErrors = [
+                    { name: 'MismatchedTokenException', message: 'Expecting token of type --> Macro.Identifier <-- but found --> \'}}\' <--' },
+                ];
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toEqual(expectedErrors);
+            });
+            // {{§!#&blah}}
+            it('[Error] should throw an error for invalid identifier', async () => {
+                const input = '{{§!#&blah}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                const expectedErrors = [
+                    { name: 'MismatchedTokenException', message: 'Expecting token of type --> Macro.Identifier <-- but found --> \'!\' <--' },
+                ];
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toEqual(expectedErrors);
+            });
+            // {{user
+            it('[Error] should throw an error for incomplete macro', async () => {
+                const input = '{{user';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                const expectedErrors = [
+                    { name: 'MismatchedTokenException', message: 'Expecting token of type --> Macro.End <-- but found --> \'\' <--' },
+                ];
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toEqual(expectedErrors);
+            });
+
+            // something{{user}}
+            // something{{user}}
+            it('[Error] for testing purposes, macros need to start at the beginning of the string', async () => {
+                const input = 'something{{user}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                const expectedErrors = [
+                    { name: 'MismatchedTokenException', message: 'Expecting token of type --> Macro.Start <-- but found --> \'something\' <--' },
+                ];
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toEqual(expectedErrors);
+            });
+        });
+    });
+
+    describe('Arguments Handling', () => {
+        // {{getvar::myvar}}
+        it('should parse macros with double-colon argument', async () => {
+            const input = '{{getvar::myvar}}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'getvar',
+                'arguments': {
+                    'separator': '::',
+                    'argument': 'myvar',
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        // {{roll:3d20}}
+        it('should parse macros with single colon argument', async () => {
+            const input = '{{roll:3d20}}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'roll',
+                'arguments': {
+                    'separator': ':',
+                    'argument': '3d20',
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        // {{setvar::myvar::value}}
+        it('should parse macros with multiple double-colon arguments', async () => {
+            const input = '{{setvar::myvar::value}}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+                ignoreKeys: ['arguments.Args.DoubleColon'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'setvar',
+                'arguments': {
+                    'separator': '::',
+                    'argument': ['myvar', 'value'],
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        // {{something::  spaced  }}
+        it('should strip spaces around arguments', async () => {
+            const input = '{{something::  spaced  }}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+                ignoreKeys: ['arguments.separator', 'arguments.Args.DoubleColon'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'something',
+                'arguments': { 'argument': 'spaced' },
+                'Macro.End': '}}',
+            });
+        });
+
+        // {{something::with:single:colons}}
+        it('should treat single colons as part of the argument with double-colon separator', async () => {
+            const input = '{{something::with:single:colons}}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+                ignoreKeys: ['arguments.Args.DoubleColon'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'something',
+                'arguments': {
+                    'separator': '::',
+                    'argument': 'with:single:colons',
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        // {{legacy:something:else}}
+        it('should treat single colons as part of the argument even with colon separator', async () => {
+            const input = '{{legacy:something:else}}';
+            const macroCst = await runParser(input, {
+                flattenKeys: ['arguments.argument'],
+                ignoreKeys: ['arguments.separator', 'arguments.Args.Colon'],
+            });
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'legacy',
+                'arguments': { 'argument': 'something:else' },
+                'Macro.End': '}}',
+            });
+        });
+
+        describe('Error Cases (Arguments Handling)', () => {
+            // {{something::}}
+            it('[Error] should throw an error for double-colon without a value', async () => {
+                const input = '{{something::}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                const expectedErrors = [
+                    {
+                        name: 'EarlyExitException', message: expect.stringMatching(/^Expecting: expecting at least one iteration which starts with one of these possible Token sequences:/),
+                    },
+                ];
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toEqual(expectedErrors);
+            });
+        });
+
+    });
+
+    describe('Nested Macros', () => {
+        it('should parse nested macros inside arguments', async () => {
+            const input = '{{outer::word {{inner}}}}';
+            const macroCst = await runParser(input, {});
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'outer',
+                'arguments': {
+                    'argument': {
+                        'Identifier': 'word',
+                        'macro': {
+                            'Macro.Start': '{{',
+                            'Macro.Identifier': 'inner',
+                            'Macro.End': '}}',
+                        },
+                    },
+                    'separator': '::',
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        it('should parse two nested macros next to each other inside an argument', async () => {
+            const input = '{{outer::word {{inner1}}{{inner2}}}}';
+            const macroCst = await runParser(input, {});
+            expect(macroCst).toEqual({
+                'Macro.Start': '{{',
+                'Macro.Identifier': 'outer',
+                'arguments': {
+                    'argument': {
+                        'Identifier': 'word',
+                        'macro': [
+                            {
+                                'Macro.Start': '{{',
+                                'Macro.Identifier': 'inner1',
+                                'Macro.End': '}}',
+                            },
+                            {
+                                'Macro.Start': '{{',
+                                'Macro.Identifier': 'inner2',
+                                'Macro.End': '}}',
+                            },
+                        ],
+                    },
+                    'separator': '::',
+                },
+                'Macro.End': '}}',
+            });
+        });
+
+        describe('Error Cases (Nested Macros)', () => {
+
+            it('[Error] should throw when there is a nested macro instead of an identifier', async () => {
+                const input = '{{{{macroindentifier}}::value}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toHaveLength(1); // error doesn't really matter. Just don't parse it pls.
+            });
+
+            it('[Error] should throw when there is a macro inside an identifier', async () => {
+                const input = '{{inside{{macro}}me}}';
+                const { macroCst, errors } = await runParserAndGetErrors(input);
+
+                expect(macroCst).toBeUndefined();
+                expect(errors).toHaveLength(1); // error doesn't really matter. Just don't parse it pls.
+            });
+
+        });
+    });
+});
+
+/**
+ * Runs the input through the MacroParser and returns the result.
+ *
+ * @param {string} input - The input string to be parsed.
+ * @param {Object} [options={}] Optional arguments
+ * @param {string[]} [options.flattenKeys=[]] Optional array of dot-separated keys to flatten
+ * @param {string[]} [options.ignoreKeys=[]] Optional array of dot-separated keys to ignore
+ * @returns {Promise<TestableCstNode>} A promise that resolves to the result of the MacroParser.
+ */
+async function runParser(input, options = {}) {
+    const { cst, errors } = await runParserAndGetErrors(input, options);
+
+    // Make sure that parser errors get correctly marked as errors during testing, even if the resulting structure might work.
+    // If we don't test for errors, the test should fail.
+    if (errors.length > 0) {
+        throw new Error('Parser errors found\n' + errors.map(x => x.message).join('\n'));
+    }
+
+    return cst;
+}
+
+/**
+ * Runs the input through the MacroParser and returns the syntax tree result and any parser errors.
+ *
+ * Use `runParser` if you don't want to explicitly test against parser errors.
+ *
+ * @param {string} input - The input string to be parsed.
+ * @param {Object} [options={}] Optional arguments
+ * @param {string[]} [options.flattenKeys=[]] Optional array of dot-separated keys to flatten
+ * @param {string[]} [options.ignoreKeys=[]] Optional array of dot-separated keys to ignore
+ * @returns {Promise<{cst: TestableCstNode, errors: TestableRecognitionException[]}>} A promise that resolves to the result of the MacroParser and error list.
+ */
+async function runParserAndGetErrors(input, options = {}) {
+    const result = await page.evaluate(async (input) => {
+        /** @type {import('../../public/scripts/macros/MacroParser.js')} */
+        const { MacroParser } = await import('./scripts/macros/MacroParser.js');
+
+        const result = MacroParser.test(input);
+        return result;
+    }, input);
+
+    return { cst: simplifyCstNode(result.cst, input, options), errors: simplifyErrors(result.errors) };
+}
+
+/**
+ * Simplify the parser syntax tree result into an easily testable format.
+ *
+ * @param {CstNode} result The result from the parser
+ * @param {Object} [options={}] Optional arguments
+ * @param {string[]} [options.flattenKeys=[]] Optional array of dot-separated keys to flatten
+ * @param {string[]} [options.ignoreKeys=[]] Optional array of dot-separated keys to ignore
+ * @returns {TestableCstNode} The testable syntax tree
+ */
+function simplifyCstNode(cst, input, { flattenKeys = [], ignoreKeys = [] } = {}) {
+    /** @returns {TestableCstNode} @param {CstNode} node @param {string[]} path */
+    function simplifyNode(node, path = []) {
+        if (!node) return node;
+        if (Array.isArray(node)) {
+            // Single-element arrays are converted to a single string
+            if (node.length === 1) {
+                return node[0].image || simplifyNode(node[0], path.concat('[]'));
+            }
+            // For multiple elements, return an array of simplified nodes
+            return node.map(child => simplifyNode(child, path.concat('[]')));
+        }
+        if (node.children) {
+            const simplifiedChildren = {};
+            for (const key in node.children) {
+                function simplifyChildNode(childNode, path) {
+                    if (Array.isArray(childNode)) {
+                        // Single-element arrays are converted to a single string
+                        if (childNode.length === 1) {
+                            return simplifyChildNode(childNode[0], path.concat('[]'));
+                        }
+                        return childNode.map(child => simplifyChildNode(child, path.concat('[]')));
+                    }
+
+                    const flattenKey = path.filter(x => x !== '[]').join('.');
+                    if (ignoreKeys.includes(flattenKey)) {
+                        return null;
+                    } else if (flattenKeys.includes(flattenKey)) {
+                        const startOffset = childNode.location.startOffset;
+                        const endOffset = childNode.location.endOffset;
+                        return input.slice(startOffset, endOffset + 1);
+                    } else {
+                        return simplifyNode(childNode, path);
+                    }
+                }
+
+                const simplifiedValue = simplifyChildNode(node.children[key], path.concat(key));
+                simplifiedValue && (simplifiedChildren[key] = simplifiedValue);
+            }
+            return simplifiedChildren;
+        }
+        return node.image;
+    }
+
+    return simplifyNode(cst);
+}
+
+
+/**
+ * Simplifies a recognition exceptions into an easily testable format.
+ *
+ * @param {IRecognitionException[]} errors - The error list containing exceptions to be simplified.
+ * @return {TestableRecognitionException[]} - The simplified error list
+ */
+function simplifyErrors(errors) {
+    return errors.map(exception => ({
+        name: exception.name,
+        message: exception.message,
+    }));
+}
--- a/tests/jsconfig.json
+++ b/tests/jsconfig.json
@@ -0,0 +1,5 @@
+{
+    "compilerOptions": {
+        "baseUrl": ".",
+    },
+}
--- a/tests/package.json
+++ b/tests/package.json
@@ -3,7 +3,9 @@
    "type": "module",
    "license": "AGPL-3.0",
    "scripts": {
-        "test": "jest"
+        "test": "jest",
+        "lint": "eslint \"**/*.js\" ./*.js",
+        "lint:fix": "eslint \"**/*.js\" ./*.js --fix"
    },
    "dependencies": {
        "@types/jest": "^29.5.12",
Author	SHA1	Message	Date
Wolfsblvt	3c5277ded2	More nested macro tests Add error case tests to enforce macro start position requirements Include nested macro parsing scenarios and invalid syntax checks Ensures parser correctly handles edge cases with embedded macros	2025-03-20 02:49:17 +01:00
Wolfsblvt	f9d4deb583	Improve macro argument parsing to allow colons in values Enhances separator handling by fixing separator type detection and enabling colon characters within argument values Updates validation to require at least one argument component and adds error cases for empty arguments Includes expanded test coverage for mixed separator scenarios and edge cases	2025-03-20 02:25:07 +01:00
Wolfsblvt	efa367541a	Parser consumes basic macros - Fix lexer mode names - Add basic macro parsing (identifier, and arguments) - Tests: basic macro parsing tests - Tests: simplifyCstNode supports ignoring nodes, or flattening nodes to just plaintext	2025-03-17 00:12:04 +01:00
Wolfsblvt	6a72369327	macros test case naming + lint	2025-03-08 01:26:15 +01:00
Wolfsblvt	d6dbc19697	Merge branch 'staging' into macros-2.0	2025-03-07 22:42:44 +01:00
Wolfsblvt	d989079fae	Add macros stuff to SillyTavern.getContext	2025-03-01 18:28:04 +01:00
Wolfsblvt	6e814b4b47	Merge branch 'staging' into macros-2.0	2025-03-01 18:24:16 +01:00
Wolfsblvt	9a414b9915	Make parser errors testable	2024-08-12 06:13:12 +02:00
Wolfsblvt	559339d2de	Basic setup for MacroParser + initial tests	2024-08-12 04:32:32 +02:00
Wolfsblvt	ec09a4e952	Improve lexer, removing warnings	2024-08-12 02:29:56 +02:00
Wolfsblvt	e1797ea13d	Test case for legacy single-colon syntax	2024-08-12 01:55:57 +02:00
Wolfsblvt	7654480b6b	Allow legacy underscores in macro identifiers	2024-08-12 01:37:35 +02:00
Wolfsblvt	a925fe8d39	Restructure lexer error testcases	2024-08-11 07:31:43 +02:00
Wolfsblvt	2b53774d6f	Increase tests default timeout	2024-08-11 00:03:30 +02:00
Wolfsblvt	8e3ca60fc8	Clearer names for lexer tokens	2024-08-11 00:02:34 +02:00
Wolfsblvt	da4c80c398	Add lexing for output modifiers	2024-08-10 08:32:13 +02:00
Wolfsblvt	2b1e83dc07	Rewrote lexer modes/tokes to capture errors better	2024-08-10 02:45:50 +02:00
Wolfsblvt	b7840eb9cd	Fix lexing unknown flags - treat as error	2024-08-09 04:15:42 +02:00
Wolfsblvt	ddb317f189	enable eslint for tests and run it	2024-08-01 02:46:34 +02:00
Wolfsblvt	cab03421bf	Add macro execution modifiers + more tests - Added macro flags (execution modifiers) to lexer - Fixed some lexing issues - Expanded lexer tests - Treat lexer errors as failed test	2024-08-01 02:33:05 +02:00
Wolfsblvt	09e2911161	Reorder tests	2024-08-01 00:05:33 +02:00
Wolfsblvt	47e219c494	More edge cases tests	2024-07-28 07:56:05 +02:00
Wolfsblvt	04eb5573a7	Add more lexer tests	2024-07-28 06:19:07 +02:00
Wolfsblvt	1f1bd4427b	Slight improvements on lexer & first tests	2024-07-28 03:39:07 +02:00
Wolfsblvt	dd8537fa18	Add jsconfig to tests folder - Add jsconfig.json to tests folder, to prevent IDE errors on dynamic imports inside the page.evaluate execution.	2024-07-28 03:36:03 +02:00
Wolfsblvt	5bda8b4f54	Readme link to Chevrotain & license	2024-07-27 23:01:47 +02:00
Wolfsblvt	6c1acf7901	Merge branch 'staging' into macros-2.0	2024-07-27 21:40:43 +02:00
Wolfsblvt	99b5b6ea57	Cleaner lexer modes	2024-07-17 05:25:38 +02:00
Wolfsblvt	58481a6382	fix ESLint types loading for chevrotain	2024-07-17 04:44:52 +02:00
Wolfsblvt	f63b875b76	First draft of the macro lexer	2024-07-16 01:24:03 +02:00
Wolfsblvt	7a36901bfc	Chevrotain lib and env setup	2024-07-16 00:43:01 +02:00