Fix lexing unknown flags - treat as error

This commit is contained in:
Wolfsblvt
2024-08-09 04:15:42 +02:00
parent ddb317f189
commit b7840eb9cd
2 changed files with 53 additions and 22 deletions

View File

@@ -82,7 +82,7 @@ const Def = {
using(Tokens.WhiteSpace), using(Tokens.WhiteSpace),
// Last fallback, before we need to exit the mode, as we might have characters we falsely haven't defined yet // Last fallback, before we need to exit the mode, as we might have characters we (wrongly) haven't defined yet
using(Tokens.Unknown), using(Tokens.Unknown),
// Args are optional, and we don't know how long, so exit the mode to be able to capture the actual macro end // Args are optional, and we don't know how long, so exit the mode to be able to capture the actual macro end

View File

@@ -1,4 +1,5 @@
/** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */ /** @typedef {import('../../public/lib/chevrotain.js').ILexingResult} ILexingResult */
/** @typedef {import('../../public/lib/chevrotain.js').ILexingError} ILexingError */
/** @typedef {{type: string, text: string}} TestableToken */ /** @typedef {{type: string, text: string}} TestableToken */
describe('MacroLexer', () => { describe('MacroLexer', () => {
@@ -583,14 +584,20 @@ describe('MacroLexer', () => {
expect(tokens).toEqual(expectedTokens); expect(tokens).toEqual(expectedTokens);
}); });
// {{ @unknown }} // {{ @unknown }}
it('do not capture unknown special characters as flag', async () => { it('[Error] do not capture unknown special characters as flag', async () => {
const input = '{{ @unknown }}'; const input = '{{ @unknown }}';
const tokens = await runLexerGetTokens(input); const { tokens, errors } = await runLexerGetTokensAndErrors(input);
const expectedErrors = [
{ message: 'unexpected character: ->@<- at offset: 3, skipped 1 characters.' },
];
expect(errors).toMatchObject(expectedErrors);
const expectedTokens = [ const expectedTokens = [
{ type: 'MacroStart', text: '{{' }, { type: 'MacroStart', text: '{{' },
{ type: 'Unknown', text: '@' }, // Do not capture '@' as anything, as it's a lexer error
{ type: 'Identifier', text: 'unknown' }, { type: 'MacroIdentifier', text: 'unknown' },
{ type: 'MacroEnd', text: '}}' }, { type: 'MacroEnd', text: '}}' },
]; ];
@@ -611,14 +618,19 @@ describe('MacroLexer', () => {
expect(tokens).toEqual(expectedTokens); expect(tokens).toEqual(expectedTokens);
}); });
// {{ 2 cents }} // {{ 2 cents }}
it('do not capture numbers as flag - they are also invalid macro identifiers', async () => { it('[Error] do not capture numbers as flag - they are also invalid macro identifiers', async () => {
const input = '{{ 2 cents }}'; const input = '{{ 2 cents }}';
const tokens = await runLexerGetTokens(input); const { tokens, errors } = await runLexerGetTokensAndErrors(input);
const expectedErrors = [
{ message: 'unexpected character: ->2<- at offset: 3, skipped 1 characters.' },
];
expect(errors).toMatchObject(expectedErrors);
const expectedTokens = [ const expectedTokens = [
{ type: 'MacroStart', text: '{{' }, { type: 'MacroStart', text: '{{' },
{ type: 'Unknown', text: '2' }, // Do not capture '2' as anything, as it's a lexer error
{ type: 'Identifier', text: 'cents' }, { type: 'MacroIdentifier', text: 'cents' },
{ type: 'MacroEnd', text: '}}' }, { type: 'MacroEnd', text: '}}' },
]; ];
@@ -677,7 +689,7 @@ describe('MacroLexer', () => {
}); });
}); });
describe('Error Cases in Macro Lexing', () => { describe('"Error" Cases in Macro Lexing', () => {
// this is an unopened_macro}} and will be done // this is an unopened_macro}} and will be done
it('lexer treats unopened macors as simple plaintext', async () => { it('lexer treats unopened macors as simple plaintext', async () => {
const input = 'this is an unopened_macro}} and will be done'; const input = 'this is an unopened_macro}} and will be done';
@@ -690,7 +702,7 @@ describe('MacroLexer', () => {
expect(tokens).toEqual(expectedTokens); expect(tokens).toEqual(expectedTokens);
}); });
// { { not a macro } } // { { not a macro } }
it('treats opening/clasing with whitspaces between brackets as not macros', async () => { it('treats opening/closing with whitspaces between brackets not as macros', async () => {
const input = '{ { not a macro } }'; const input = '{ { not a macro } }';
const tokens = await runLexerGetTokens(input); const tokens = await runLexerGetTokens(input);
@@ -706,10 +718,33 @@ describe('MacroLexer', () => {
/** /**
* Asynchronously runs the MacroLexer on the given input and returns the tokens. * Asynchronously runs the MacroLexer on the given input and returns the tokens.
* *
* Lexer errors will throw an Error. To test and validate lexer errors, use `runLexerGetTokensAndErrors`.
*
* @param {string} input - The input string to be tokenized. * @param {string} input - The input string to be tokenized.
* @return {Promise<TestableToken[]>} A promise that resolves to an array of tokens. * @returns {Promise<TestableToken[]>} A promise that resolves to an array of tokens.
*/ */
async function runLexerGetTokens(input) { async function runLexerGetTokens(input) {
const { tokens, errors } = await runLexerGetTokensAndErrors(input);
// Make sure that lexer errors get correctly marked as errors during testing, even if the resulting tokens might work.
// If we don't test for errors, the test should fail.
if (errors.length > 0) {
throw new Error('Lexer errors found\n' + errors.map(x => x.message).join('\n'));
}
return tokens;
}
/**
* Asynchronously runs the MacroLexer on the given input and returns the tokens and errors.
*
* Use `runLexerGetTokens` if you don't want to explicitly test against lexer errors
*
* @param {string} input - The input string to be tokenized.
* @returns {Promise<{tokens: TestableToken[], errors: LexerError[]}>} A promise that resolves to an object containing an array of tokens and an array of lexer errors.
*/
async function runLexerGetTokensAndErrors(input) {
const result = await page.evaluate(async (input) => { const result = await page.evaluate(async (input) => {
/** @type {import('../../public/scripts/macros/MacroLexer.js')} */ /** @type {import('../../public/scripts/macros/MacroLexer.js')} */
const { MacroLexer } = await import('./scripts/macros/MacroLexer.js'); const { MacroLexer } = await import('./scripts/macros/MacroLexer.js');
@@ -718,23 +753,17 @@ async function runLexerGetTokens(input) {
return result; return result;
}, input); }, input);
const tokens = getTestableTokens(result); return getTestableTokens(result);
return tokens;
} }
/** /**
* *
* @param {ILexingResult} result The result from the lexer * @param {ILexingResult} result The result from the lexer
* @returns {TestableToken[]} The tokens * @returns {{tokens: TestableToken[], errors: ILexingError[]}} The tokens
*/ */
function getTestableTokens(result) { function getTestableTokens(result) {
// Make sure that lexer errors get correctly marked as errors during testing, even if the resulting tokens might work. const errors = result.errors;
// The lexer should generally be able to parse all kinds of tokens. const tokens = result.tokens
if (result.errors.length > 0) {
throw new Error('Lexer errors found\n' + result.errors.map(x => x.message).join('\n'));
}
return result.tokens
// Filter out the mode popper. We don't care aobut that for testing // Filter out the mode popper. We don't care aobut that for testing
.filter(token => token.tokenType.name !== 'EndMode') .filter(token => token.tokenType.name !== 'EndMode')
// Extract relevant properties from tokens for comparison // Extract relevant properties from tokens for comparison
@@ -742,4 +771,6 @@ function getTestableTokens(result) {
type: token.tokenType.name, type: token.tokenType.name,
text: token.image, text: token.image,
})); }));
return { tokens, errors };
} }