mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Slight improvements on lexer & first tests
This commit is contained in:
@@ -17,27 +17,26 @@ const Tokens = {
|
||||
// General macro capture
|
||||
Macro: {
|
||||
Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
|
||||
// Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
|
||||
// We need this, because this token is going to switch lexer mode, while the general identifier does not.
|
||||
Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
|
||||
// CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
|
||||
End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
|
||||
},
|
||||
|
||||
// Captures that only appear inside arguments
|
||||
Args: {
|
||||
|
||||
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
|
||||
Colon: createToken({ name: 'Colon', pattern: /:/ }),
|
||||
Equals: createToken({ name: 'Equals', pattern: /=/ }),
|
||||
Quote: createToken({ name: 'Quote', pattern: /"/ }),
|
||||
},
|
||||
|
||||
// All tokens that can be captured inside a macro
|
||||
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
|
||||
Colon: createToken({ name: 'Colon', pattern: /:/ }),
|
||||
Equals: createToken({ name: 'Equals', pattern: /=/ }),
|
||||
Quote: createToken({ name: 'Quote', pattern: /"/ }),
|
||||
Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
|
||||
WhiteSpace: createToken({
|
||||
name: 'WhiteSpace',
|
||||
pattern: /\s+/,
|
||||
group: Lexer.SKIPPED,
|
||||
}),
|
||||
WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),
|
||||
|
||||
// Capture unknown characters one by one, to still allow other tokens being matched once they are there
|
||||
Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
|
||||
|
||||
// TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
|
||||
@@ -69,10 +68,10 @@ const Def = {
|
||||
// Macro args allow nested macros
|
||||
enter(Tokens.Macro.Start, modes.macro_def),
|
||||
|
||||
using(Tokens.DoubleColon),
|
||||
using(Tokens.Colon),
|
||||
using(Tokens.Equals),
|
||||
using(Tokens.Quote),
|
||||
using(Tokens.Args.DoubleColon),
|
||||
using(Tokens.Args.Colon),
|
||||
using(Tokens.Args.Equals),
|
||||
using(Tokens.Args.Quote),
|
||||
using(Tokens.Identifier),
|
||||
|
||||
using(Tokens.WhiteSpace),
|
||||
@@ -134,6 +133,7 @@ instance = MacroLexer.instance;
|
||||
* @returns {TokenType} The token again
|
||||
*/
|
||||
function enter(token, mode) {
|
||||
if (!token) throw new Error('Token must not be undefined');
|
||||
if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
|
||||
throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
|
||||
}
|
||||
@@ -155,6 +155,7 @@ function enter(token, mode) {
|
||||
* @returns {TokenType} The token again
|
||||
*/
|
||||
function exits(token, mode) {
|
||||
if (!token) throw new Error('Token must not be undefined');
|
||||
token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
|
||||
return token;
|
||||
}
|
||||
@@ -169,6 +170,7 @@ function exits(token, mode) {
|
||||
* @returns {TokenType} The token again
|
||||
*/
|
||||
function using(token) {
|
||||
if (!token) throw new Error('Token must not be undefined');
|
||||
if (enterModesMap.has(token.name)) {
|
||||
throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
|
||||
}
|
||||
|
197
tests/frontend/MacroLexer.test.js
Normal file
197
tests/frontend/MacroLexer.test.js
Normal file
@@ -0,0 +1,197 @@
|
||||
/** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
|
||||
/** @typedef {{type: string, text: string}} TestableToken */
|
||||
|
||||
describe("MacroLexer Tests", () => {
|
||||
beforeAll(async () => {
|
||||
await page.goto(global.ST_URL);
|
||||
await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
|
||||
});
|
||||
|
||||
it("basic macro tokenization", async () => {
|
||||
const input = "Hello, {{user}}!";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'Plaintext', text: 'Hello, ' },
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'user' },
|
||||
{ type: 'MacroEnd', text: '}}' },
|
||||
{ type: 'Plaintext', text: '!' },
|
||||
];
|
||||
|
||||
// Compare the actual result with expected tokens
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should tokenize plaintext only", async () => {
|
||||
const input = "Just some text here.";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'Plaintext', text: 'Just some text here.' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should handle macro only", async () => {
|
||||
const input = "{{user}}";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'user' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should handle empty macro", async () => {
|
||||
const input = "{{}}";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
|
||||
it("should handle nested macros", async () => {
|
||||
const input = "{{outerMacro {{innerMacro}}}}";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'outerMacro' },
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'innerMacro' },
|
||||
{ type: 'MacroEnd', text: '}}' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should tokenize macros with double colons arguments correctly", async () => {
|
||||
const input = "{{setvar::myVar::This is Sparta!}}";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'setvar' },
|
||||
{ type: 'DoubleColon', text: '::' },
|
||||
{ type: 'Identifier', text: 'myVar' },
|
||||
{ type: 'DoubleColon', text: '::' },
|
||||
{ type: 'Identifier', text: 'This' },
|
||||
{ type: 'Identifier', text: 'is' },
|
||||
{ type: 'Identifier', text: 'Sparta' },
|
||||
{ type: 'Unknown', text: '!' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should handle named arguments with key=value syntax", async () => {
|
||||
const input = "{{doStuff key=MyValue another=AnotherValue}}";
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'doStuff' },
|
||||
{ type: 'Identifier', text: 'key' },
|
||||
{ type: 'Equals', text: '=' },
|
||||
{ type: 'Identifier', text: 'MyValue' },
|
||||
{ type: 'Identifier', text: 'another' },
|
||||
{ type: 'Equals', text: '=' },
|
||||
{ type: 'Identifier', text: 'AnotherValue' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should handle named arguments with quotation marks", async () => {
|
||||
const input = '{{getvar key="My variable"}}';
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'getvar' },
|
||||
{ type: 'Identifier', text: 'key' },
|
||||
{ type: 'Equals', text: '=' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Identifier', text: 'My' },
|
||||
{ type: 'Identifier', text: 'variable' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
it("should handle multiple unnamed arguments in quotation marks", async () => {
|
||||
const input = '{{random "this" "and that" "and some more"}}';
|
||||
const tokens = await runLexerGetTokens(input);
|
||||
|
||||
const expectedTokens = [
|
||||
{ type: 'MacroStart', text: '{{' },
|
||||
{ type: 'MacroIdentifier', text: 'random' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Identifier', text: 'this' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Identifier', text: 'and' },
|
||||
{ type: 'Identifier', text: 'that' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'Identifier', text: 'and' },
|
||||
{ type: 'Identifier', text: 'some' },
|
||||
{ type: 'Identifier', text: 'more' },
|
||||
{ type: 'Quote', text: '"' },
|
||||
{ type: 'MacroEnd', text: '}}' }
|
||||
];
|
||||
|
||||
expect(tokens).toEqual(expectedTokens);
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
/**
|
||||
* Asynchronously runs the MacroLexer on the given input and returns the tokens.
|
||||
*
|
||||
* @param {string} input - The input string to be tokenized.
|
||||
* @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
|
||||
*/
|
||||
async function runLexerGetTokens(input) {
|
||||
const result = await page.evaluate(async (input) => {
|
||||
/** @type {import('../../public/scripts/macros/MacroLexer.js')} */
|
||||
const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
|
||||
|
||||
const result = MacroLexer.tokenize(input);
|
||||
return result;
|
||||
}, input);
|
||||
|
||||
const tokens = getTestableTokens(result);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {ILexingResult} result The result from the lexer
|
||||
* @returns {TestableToken[]} The tokens
|
||||
*/
|
||||
function getTestableTokens(result) {
|
||||
return result.tokens
|
||||
// Filter out the mode popper. We don't care aobut that for testing
|
||||
.filter(token => token.tokenType.name !== 'EndMode')
|
||||
// Extract relevant properties from tokens for comparison
|
||||
.map(token => ({
|
||||
type: token.tokenType.name,
|
||||
text: token.image
|
||||
}));
|
||||
}
|
Reference in New Issue
Block a user