mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Slight improvements on lexer & first tests
This commit is contained in:
@@ -17,27 +17,26 @@ const Tokens = {
|
|||||||
// General macro capture
|
// General macro capture
|
||||||
Macro: {
|
Macro: {
|
||||||
Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
|
Start: createToken({ name: 'MacroStart', pattern: /\{\{/ }),
|
||||||
|
// Separate macro identifier needed, that is similar to the global indentifier, but captures the actual macro "name"
|
||||||
|
// We need this, because this token is going to switch lexer mode, while the general identifier does not.
|
||||||
Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
|
Identifier: createToken({ name: 'MacroIdentifier', pattern: /[a-zA-Z_]\w*/ }),
|
||||||
// CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
|
// CaptureBeforeEnd: createToken({ name: 'MacroCaptureBeforeEnd', pattern: /.*?(?=\}\})/, pop_mode: true/*, group: Lexer.SKIPPED */ }),
|
||||||
End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
|
End: createToken({ name: 'MacroEnd', pattern: /\}\}/ }),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Captures that only appear inside arguments
|
||||||
Args: {
|
Args: {
|
||||||
|
|
||||||
},
|
|
||||||
|
|
||||||
// All tokens that can be captured inside a macro
|
|
||||||
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
|
DoubleColon: createToken({ name: 'DoubleColon', pattern: /::/ }),
|
||||||
Colon: createToken({ name: 'Colon', pattern: /:/ }),
|
Colon: createToken({ name: 'Colon', pattern: /:/ }),
|
||||||
Equals: createToken({ name: 'Equals', pattern: /=/ }),
|
Equals: createToken({ name: 'Equals', pattern: /=/ }),
|
||||||
Quote: createToken({ name: 'Quote', pattern: /"/ }),
|
Quote: createToken({ name: 'Quote', pattern: /"/ }),
|
||||||
Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
|
},
|
||||||
WhiteSpace: createToken({
|
|
||||||
name: 'WhiteSpace',
|
|
||||||
pattern: /\s+/,
|
|
||||||
group: Lexer.SKIPPED,
|
|
||||||
}),
|
|
||||||
|
|
||||||
|
// All tokens that can be captured inside a macro
|
||||||
|
Identifier: createToken({ name: 'Identifier', pattern: /[a-zA-Z_]\w*/ }),
|
||||||
|
WhiteSpace: createToken({ name: 'WhiteSpace', pattern: /\s+/, group: Lexer.SKIPPED }),
|
||||||
|
|
||||||
|
// Capture unknown characters one by one, to still allow other tokens being matched once they are there
|
||||||
Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
|
Unknown: createToken({ name: 'Unknown', pattern: /[^{}]/ }),
|
||||||
|
|
||||||
// TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
|
// TODO: Capture-all rest for now, that is not the macro end or opening of a new macro. Might be replaced later down the line.
|
||||||
@@ -69,10 +68,10 @@ const Def = {
|
|||||||
// Macro args allow nested macros
|
// Macro args allow nested macros
|
||||||
enter(Tokens.Macro.Start, modes.macro_def),
|
enter(Tokens.Macro.Start, modes.macro_def),
|
||||||
|
|
||||||
using(Tokens.DoubleColon),
|
using(Tokens.Args.DoubleColon),
|
||||||
using(Tokens.Colon),
|
using(Tokens.Args.Colon),
|
||||||
using(Tokens.Equals),
|
using(Tokens.Args.Equals),
|
||||||
using(Tokens.Quote),
|
using(Tokens.Args.Quote),
|
||||||
using(Tokens.Identifier),
|
using(Tokens.Identifier),
|
||||||
|
|
||||||
using(Tokens.WhiteSpace),
|
using(Tokens.WhiteSpace),
|
||||||
@@ -134,6 +133,7 @@ instance = MacroLexer.instance;
|
|||||||
* @returns {TokenType} The token again
|
* @returns {TokenType} The token again
|
||||||
*/
|
*/
|
||||||
function enter(token, mode) {
|
function enter(token, mode) {
|
||||||
|
if (!token) throw new Error('Token must not be undefined');
|
||||||
if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
|
if (enterModesMap.has(token.name) && enterModesMap.get(token.name) !== mode) {
|
||||||
throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
|
throw new Error(`Token ${token.name} already is set to enter mode ${enterModesMap.get(token.name)}. The token definition are global, so they cannot be used to lead to different modes.`);
|
||||||
}
|
}
|
||||||
@@ -155,6 +155,7 @@ function enter(token, mode) {
|
|||||||
* @returns {TokenType} The token again
|
* @returns {TokenType} The token again
|
||||||
*/
|
*/
|
||||||
function exits(token, mode) {
|
function exits(token, mode) {
|
||||||
|
if (!token) throw new Error('Token must not be undefined');
|
||||||
token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
|
token.POP_MODE = !!mode; // Always set to true. We just use the mode here, so the linter thinks it was used. We just pass it in for clarity in the definition
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
@@ -169,6 +170,7 @@ function exits(token, mode) {
|
|||||||
* @returns {TokenType} The token again
|
* @returns {TokenType} The token again
|
||||||
*/
|
*/
|
||||||
function using(token) {
|
function using(token) {
|
||||||
|
if (!token) throw new Error('Token must not be undefined');
|
||||||
if (enterModesMap.has(token.name)) {
|
if (enterModesMap.has(token.name)) {
|
||||||
throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
|
throw new Error(`Token ${token.name} is already marked to enter a mode (${enterModesMap.get(token.name)}). The token definition are global, so they cannot be used to lead or stay differently.`);
|
||||||
}
|
}
|
||||||
|
197
tests/frontend/MacroLexer.test.js
Normal file
197
tests/frontend/MacroLexer.test.js
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
/** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
|
||||||
|
/** @typedef {{type: string, text: string}} TestableToken */
|
||||||
|
|
||||||
|
describe("MacroLexer Tests", () => {
|
||||||
|
beforeAll(async () => {
|
||||||
|
await page.goto(global.ST_URL);
|
||||||
|
await page.waitForFunction('document.getElementById("preloader") === null', { timeout: 0 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("basic macro tokenization", async () => {
|
||||||
|
const input = "Hello, {{user}}!";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'Plaintext', text: 'Hello, ' },
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'user' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' },
|
||||||
|
{ type: 'Plaintext', text: '!' },
|
||||||
|
];
|
||||||
|
|
||||||
|
// Compare the actual result with expected tokens
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should tokenize plaintext only", async () => {
|
||||||
|
const input = "Just some text here.";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'Plaintext', text: 'Just some text here.' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle macro only", async () => {
|
||||||
|
const input = "{{user}}";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'user' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle empty macro", async () => {
|
||||||
|
const input = "{{}}";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
it("should handle nested macros", async () => {
|
||||||
|
const input = "{{outerMacro {{innerMacro}}}}";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'outerMacro' },
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'innerMacro' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should tokenize macros with double colons arguments correctly", async () => {
|
||||||
|
const input = "{{setvar::myVar::This is Sparta!}}";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'setvar' },
|
||||||
|
{ type: 'DoubleColon', text: '::' },
|
||||||
|
{ type: 'Identifier', text: 'myVar' },
|
||||||
|
{ type: 'DoubleColon', text: '::' },
|
||||||
|
{ type: 'Identifier', text: 'This' },
|
||||||
|
{ type: 'Identifier', text: 'is' },
|
||||||
|
{ type: 'Identifier', text: 'Sparta' },
|
||||||
|
{ type: 'Unknown', text: '!' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle named arguments with key=value syntax", async () => {
|
||||||
|
const input = "{{doStuff key=MyValue another=AnotherValue}}";
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'doStuff' },
|
||||||
|
{ type: 'Identifier', text: 'key' },
|
||||||
|
{ type: 'Equals', text: '=' },
|
||||||
|
{ type: 'Identifier', text: 'MyValue' },
|
||||||
|
{ type: 'Identifier', text: 'another' },
|
||||||
|
{ type: 'Equals', text: '=' },
|
||||||
|
{ type: 'Identifier', text: 'AnotherValue' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle named arguments with quotation marks", async () => {
|
||||||
|
const input = '{{getvar key="My variable"}}';
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'getvar' },
|
||||||
|
{ type: 'Identifier', text: 'key' },
|
||||||
|
{ type: 'Equals', text: '=' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Identifier', text: 'My' },
|
||||||
|
{ type: 'Identifier', text: 'variable' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should handle multiple unnamed arguments in quotation marks", async () => {
|
||||||
|
const input = '{{random "this" "and that" "and some more"}}';
|
||||||
|
const tokens = await runLexerGetTokens(input);
|
||||||
|
|
||||||
|
const expectedTokens = [
|
||||||
|
{ type: 'MacroStart', text: '{{' },
|
||||||
|
{ type: 'MacroIdentifier', text: 'random' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Identifier', text: 'this' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Identifier', text: 'and' },
|
||||||
|
{ type: 'Identifier', text: 'that' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'Identifier', text: 'and' },
|
||||||
|
{ type: 'Identifier', text: 'some' },
|
||||||
|
{ type: 'Identifier', text: 'more' },
|
||||||
|
{ type: 'Quote', text: '"' },
|
||||||
|
{ type: 'MacroEnd', text: '}}' }
|
||||||
|
];
|
||||||
|
|
||||||
|
expect(tokens).toEqual(expectedTokens);
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asynchronously runs the MacroLexer on the given input and returns the tokens.
|
||||||
|
*
|
||||||
|
* @param {string} input - The input string to be tokenized.
|
||||||
|
* @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
|
||||||
|
*/
|
||||||
|
async function runLexerGetTokens(input) {
|
||||||
|
const result = await page.evaluate(async (input) => {
|
||||||
|
/** @type {import('../../public/scripts/macros/MacroLexer.js')} */
|
||||||
|
const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
|
||||||
|
|
||||||
|
const result = MacroLexer.tokenize(input);
|
||||||
|
return result;
|
||||||
|
}, input);
|
||||||
|
|
||||||
|
const tokens = getTestableTokens(result);
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param {ILexingResult} result The result from the lexer
|
||||||
|
* @returns {TestableToken[]} The tokens
|
||||||
|
*/
|
||||||
|
function getTestableTokens(result) {
|
||||||
|
return result.tokens
|
||||||
|
// Filter out the mode popper. We don't care aobut that for testing
|
||||||
|
.filter(token => token.tokenType.name !== 'EndMode')
|
||||||
|
// Extract relevant properties from tokens for comparison
|
||||||
|
.map(token => ({
|
||||||
|
type: token.tokenType.name,
|
||||||
|
text: token.image
|
||||||
|
}));
|
||||||
|
}
|
Reference in New Issue
Block a user