Slight improvements on lexer & first tests

This commit is contained in:
Wolfsblvt
2024-07-28 03:39:07 +02:00
parent dd8537fa18
commit 1f1bd4427b
2 changed files with 213 additions and 14 deletions

View File

@@ -17,27 +17,26 @@ const Tokens = {
// General macro capture
Macro: {
Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
// Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
// We need this, because this token is going to switch lexer mode, while the general identifier does not.
Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
// CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
},
// Captures that only appear inside arguments
Args: {
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
Colon: createToken({ name: 'Colon', pattern: /:/ }),
Equals: createToken({ name: 'Equals', pattern: /=/ }),
Quote: createToken({ name: 'Quote', pattern: /"/ }),
},
// All tokens that can be captured inside a macro
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
Colon: createToken({ name: 'Colon', pattern: /:/ }),
Equals: createToken({ name: 'Equals', pattern: /=/ }),
Quote: createToken({ name: 'Quote', pattern: /"/ }),
Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
WhiteSpace: createToken({
name: 'WhiteSpace',
pattern: /\s+/,
group: Lexer.SKIPPED,
}),
WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),
// Capture unknown characters one by one, to still allow other tokens being matched once they are there
Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
// TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
@@ -69,10 +68,10 @@ const Def = {
// Macro args allow nested macros
enter(Tokens.Macro.Start, modes.macro_def),
using(Tokens.DoubleColon),
using(Tokens.Colon),
using(Tokens.Equals),
using(Tokens.Quote),
using(Tokens.Args.DoubleColon),
using(Tokens.Args.Colon),
using(Tokens.Args.Equals),
using(Tokens.Args.Quote),
using(Tokens.Identifier),
using(Tokens.WhiteSpace),
@@ -134,6 +133,7 @@ instance = MacroLexer.instance;
* @returns {TokenType} The token again
*/
function enter(token, mode) {
if (!token) throw new Error('Token must not be undefined');
if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
}
@@ -155,6 +155,7 @@ function enter(token, mode) {
* @returns {TokenType} The token again
*/
function exits(token, mode) {
if (!token) throw new Error('Token must not be undefined');
token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
return token;
}
@@ -169,6 +170,7 @@ function exits(token, mode) {
* @returns {TokenType} The token again
*/
function using(token) {
if (!token) throw new Error('Token must not be undefined');
if (enterModesMap.has(token.name)) {
throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
}

View File

@@ -0,0 +1,197 @@
/** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
/** @typedef {{type: string, text: string}} TestableToken */
describe("MacroLexer Tests", () => {
beforeAll(async () => {
await page.goto(global.ST_URL);
await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
});
it("basic macro tokenization", async () => {
const input = "Hello, {{user}}!";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'Plaintext', text: 'Hello, ' },
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'user' },
{ type: 'MacroEnd', text: '}}' },
{ type: 'Plaintext', text: '!' },
];
// Compare the actual result with expected tokens
expect(tokens).toEqual(expectedTokens);
});
it("should tokenize plaintext only", async () => {
const input = "Just some text here.";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'Plaintext', text: 'Just some text here.' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle macro only", async () => {
const input = "{{user}}";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'user' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle empty macro", async () => {
const input = "{{}}";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle nested macros", async () => {
const input = "{{outerMacro {{innerMacro}}}}";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'outerMacro' },
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'innerMacro' },
{ type: 'MacroEnd', text: '}}' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should tokenize macros with double colons arguments correctly", async () => {
const input = "{{setvar::myVar::This is Sparta!}}";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'setvar' },
{ type: 'DoubleColon', text: '::' },
{ type: 'Identifier', text: 'myVar' },
{ type: 'DoubleColon', text: '::' },
{ type: 'Identifier', text: 'This' },
{ type: 'Identifier', text: 'is' },
{ type: 'Identifier', text: 'Sparta' },
{ type: 'Unknown', text: '!' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle named arguments with key=value syntax", async () => {
const input = "{{doStuff key=MyValue another=AnotherValue}}";
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'doStuff' },
{ type: 'Identifier', text: 'key' },
{ type: 'Equals', text: '=' },
{ type: 'Identifier', text: 'MyValue' },
{ type: 'Identifier', text: 'another' },
{ type: 'Equals', text: '=' },
{ type: 'Identifier', text: 'AnotherValue' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle named arguments with quotation marks", async () => {
const input = '{{getvar key="My variable"}}';
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'getvar' },
{ type: 'Identifier', text: 'key' },
{ type: 'Equals', text: '=' },
{ type: 'Quote', text: '"' },
{ type: 'Identifier', text: 'My' },
{ type: 'Identifier', text: 'variable' },
{ type: 'Quote', text: '"' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
it("should handle multiple unnamed arguments in quotation marks", async () => {
const input = '{{random "this" "and that" "and some more"}}';
const tokens = await runLexerGetTokens(input);
const expectedTokens = [
{ type: 'MacroStart', text: '{{' },
{ type: 'MacroIdentifier', text: 'random' },
{ type: 'Quote', text: '"' },
{ type: 'Identifier', text: 'this' },
{ type: 'Quote', text: '"' },
{ type: 'Quote', text: '"' },
{ type: 'Identifier', text: 'and' },
{ type: 'Identifier', text: 'that' },
{ type: 'Quote', text: '"' },
{ type: 'Quote', text: '"' },
{ type: 'Identifier', text: 'and' },
{ type: 'Identifier', text: 'some' },
{ type: 'Identifier', text: 'more' },
{ type: 'Quote', text: '"' },
{ type: 'MacroEnd', text: '}}' }
];
expect(tokens).toEqual(expectedTokens);
});
});
/**
* Asynchronously runs the MacroLexer on the given input and returns the tokens.
*
* @param {string} input - The input string to be tokenized.
* @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
*/
async function runLexerGetTokens(input) {
const result = await page.evaluate(async (input) => {
/** @type {import('../../public/scripts/macros/MacroLexer.js')} */
const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
const result = MacroLexer.tokenize(input);
return result;
}, input);
const tokens = getTestableTokens(result);
return tokens;
}
/**
*
* @param {ILexingResult} result The result from the lexer
* @returns {TestableToken[]} The tokens
*/
function getTestableTokens(result) {
return result.tokens
// Filter out the mode popper. We don't care aobut that for testing
.filter(token => token.tokenType.name !== 'EndMode')
// Extract relevant properties from tokens for comparison
.map(token => ({
type: token.tokenType.name,
text: token.image
}));
}