2021-06-07 16:55:13 +02:00
|
|
|
// SPDX-License-Identifier: LGPL-3.0-or-later
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \file lexer.c
|
|
|
|
*
|
|
|
|
* Implements C-compatible text lexer.
|
|
|
|
*
|
|
|
|
* \copyright The DoubleFourteen Code Forge (C) All Rights Reserved
|
|
|
|
* \author Lorenzo Cogotti
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "lexer.h"
|
|
|
|
|
|
|
|
#include "sys/con.h"
|
|
|
|
#include "sys/sys.h"
|
|
|
|
#include "sys/vt100.h"
|
|
|
|
#include "utf/utf.h"
|
|
|
|
#include "argv.h"
|
|
|
|
#include "numlib.h"
|
|
|
|
#include "strlib.h"
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Readability macro, during parsing `subtype` represents the token length,
|
|
|
|
* it is then updated to reflect the actual token subtype after tokenization
|
|
|
|
* is complete.
|
|
|
|
*/
|
|
|
|
#define TOKLEN(tok) ((tok)->subtype)
|
|
|
|
|
|
|
|
// Default punctuation list
|
|
|
|
static const Punctuation p_punctuations[] = {
|
|
|
|
// Binary operators
|
|
|
|
{ ">>=", P_RSHIFT_ASSIGN },
|
|
|
|
{ "<<=", P_LSHIFT_ASSIGN },
|
|
|
|
// Elipsis
|
|
|
|
{ "...", P_PARMS },
|
|
|
|
// Define merge operator
|
|
|
|
{ "##", P_PRECOMPMERGE }, // pre-compiler
|
|
|
|
// Logic operators
|
|
|
|
{ "&&", P_LOGIC_AND }, // pre-compiler
|
|
|
|
{ "||", P_LOGIC_OR }, // pre-compiler
|
|
|
|
{ ">=", P_LOGIC_GEQ }, // pre-compiler
|
|
|
|
{ "<=", P_LOGIC_LEQ }, // pre-compiler
|
|
|
|
{ "==", P_LOGIC_EQ }, // pre-compiler
|
|
|
|
{ "!=", P_LOGIC_UNEQ }, // pre-compiler
|
2021-10-15 11:52:12 +02:00
|
|
|
// Arithmetic operators
|
2021-06-07 16:55:13 +02:00
|
|
|
{ "*=", P_MUL_ASSIGN },
|
|
|
|
{ "/=", P_DIV_ASSIGN },
|
|
|
|
{ "%=", P_MOD_ASSIGN },
|
|
|
|
{ "+=", P_ADD_ASSIGN },
|
|
|
|
{ "-=", P_SUB_ASSIGN },
|
|
|
|
{ "++", P_INC },
|
|
|
|
{ "--", P_DEC },
|
|
|
|
// Binary operators
|
|
|
|
{ "&=", P_BIN_AND_ASSIGN },
|
|
|
|
{ "|=", P_BIN_OR_ASSIGN },
|
|
|
|
{ "^=", P_BIN_XOR_ASSIGN },
|
|
|
|
{ ">>", P_RSHIFT }, // pre-compiler
|
|
|
|
{ "<<", P_LSHIFT }, // pre-compiler
|
|
|
|
// Reference operators
|
|
|
|
{ "->", P_POINTERREF },
|
2021-10-15 11:52:12 +02:00
|
|
|
// Arithmetic operators
|
2021-06-07 16:55:13 +02:00
|
|
|
{ "*", P_MUL }, // pre-compiler
|
|
|
|
{ "/", P_DIV }, // pre-compiler
|
|
|
|
{ "%", P_MOD }, // pre-compiler
|
|
|
|
{ "+", P_ADD }, // pre-compiler
|
|
|
|
{ "-", P_SUB }, // pre-compiler
|
|
|
|
{ "=", P_ASSIGN },
|
|
|
|
// Binary operators
|
|
|
|
{ "&", P_BIN_AND }, // pre-compiler
|
|
|
|
{ "|", P_BIN_OR }, // pre-compiler
|
|
|
|
{ "^", P_BIN_XOR }, // pre-compiler
|
|
|
|
{ "~", P_BIN_NOT }, // pre-compiler
|
|
|
|
// Logic operators
|
|
|
|
{ "!", P_LOGIC_NOT }, // pre-compiler
|
|
|
|
{ ">", P_LOGIC_GREATER }, // pre-compiler
|
|
|
|
{ "<", P_LOGIC_LESS }, // pre-compiler
|
|
|
|
// Reference operator
|
|
|
|
{ ".", P_REF },
|
|
|
|
// Seperators
|
|
|
|
{ ",", P_COMMA }, // pre-compiler
|
|
|
|
{ ";", P_SEMICOLON },
|
|
|
|
// Label indication
|
|
|
|
{ ":", P_COLON }, // pre-compiler
|
|
|
|
// Ternary operator
|
|
|
|
{ "?", P_QUESTIONMARK }, // pre-compiler
|
|
|
|
// Embracements
|
|
|
|
{ "(", P_PARENOPEN }, // pre-compiler
|
|
|
|
{ ")", P_PARENCLOSE }, // pre-compiler
|
|
|
|
{ "{", P_BRACEOPEN }, // pre-compiler
|
|
|
|
{ "}", P_BRACECLOSE }, // pre-compiler
|
|
|
|
{ "[", P_SQBRACKETOPEN },
|
|
|
|
{ "]", P_SQBRACKETCLOSE },
|
|
|
|
// Escaping
|
|
|
|
{ "\\",P_BACKSLASH },
|
|
|
|
// Precompiler operator
|
|
|
|
{ "#", P_PRECOMP }, // pre-compiler
|
|
|
|
{ "$", P_DOLLAR },
|
|
|
|
// End of list
|
|
|
|
{ NULL, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static NORETURN void Lex_Quit(Lex *p, const char *msg, void *obj)
|
|
|
|
{
|
|
|
|
USED(p);
|
|
|
|
USED(obj);
|
|
|
|
|
|
|
|
Sys_Print(STDERR, msg);
|
|
|
|
Sys_Print(STDERR, "\n");
|
|
|
|
if (com_progName) {
|
|
|
|
Sys_Print(STDERR, com_progName);
|
|
|
|
Sys_Print(STDERR, ": ");
|
|
|
|
}
|
|
|
|
Sys_Print(STDERR, "Terminating on unrecoverable parsing error.\n");
|
|
|
|
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void Lex_Warn(Lex *p, const char *msg, void *obj)
|
|
|
|
{
|
|
|
|
USED(p);
|
|
|
|
USED(obj);
|
|
|
|
|
|
|
|
Sys_Print(STDERR, msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance `p` to next rune, handles parsing errors, end of parsing and
|
|
|
|
// makes no assumption over what's next in `p->pos`
|
|
|
|
static Rune NextRune(Lex *p)
|
|
|
|
{
|
|
|
|
if (p->hasError)
|
|
|
|
return (p->nr = '\0'); // refuse any subsequent read requests
|
|
|
|
|
|
|
|
size_t n = p->lim - p->pos;
|
|
|
|
if (n == 0)
|
|
|
|
return (p->nr = '\0');
|
|
|
|
|
|
|
|
if (*(Uint8 *) p->pos < RUNE_SELF) {
|
|
|
|
// fast path for ASCII
|
|
|
|
char c = *p->pos;
|
|
|
|
if (c != '\0') p->pos++;
|
|
|
|
|
|
|
|
return (p->nr = c);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Slow path for UTF8
|
|
|
|
if (!fullrune(p->pos, n)) {
|
|
|
|
LexerError(p, "Incomplete UTF-8 code sequence");
|
|
|
|
return (p->nr = '\0');
|
|
|
|
}
|
|
|
|
|
|
|
|
p->pos += chartorune(&p->nr, p->pos);
|
|
|
|
return p->nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assuming that:
|
|
|
|
// - no error occurred
|
|
|
|
// - next rune is ASCII
|
|
|
|
// - current position is not end of parse
|
|
|
|
//
|
|
|
|
// Advance `p` accordingly using an optimized code path
|
|
|
|
static char NextChar(Lex *p)
|
|
|
|
{
|
|
|
|
assert(!p->hasError);
|
|
|
|
assert( p->pos < p->lim && *p->pos != '\0');
|
|
|
|
assert(*(Uint8 *) p->pos < RUNE_SELF);
|
|
|
|
|
|
|
|
return (p->nr = *p->pos++);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char PeekChar(Lex *p)
|
|
|
|
{
|
|
|
|
assert(!p->hasError);
|
|
|
|
|
|
|
|
return (p->pos < p->lim) ? *p->pos : '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean PeekString(Lex *p, const char *s)
|
|
|
|
{
|
|
|
|
assert(!p->hasError);
|
|
|
|
assert(*s != '\0');
|
|
|
|
|
|
|
|
size_t n1 = strlen(s);
|
|
|
|
size_t n2 = p->lim - p->pos;
|
|
|
|
if (n2 < n1)
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
return strncmp(p->pos, s, n1) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void UngetRune(Lex *p)
|
|
|
|
{
|
|
|
|
if (p->nr == '\0')
|
|
|
|
return; // ignore on end of parse/error
|
|
|
|
if (p->nr == RUNE_ERR) {
|
|
|
|
p->pos--; // only move back one byte on decoding error
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->pos -= runelen(p->nr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean AppendDirty(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
if (tok->flags & TT_TRUNC)
|
|
|
|
return FALSE; // refuse any further attempt to append
|
|
|
|
|
|
|
|
size_t n = runelen(p->nr);
|
|
|
|
if (TOKLEN(tok) + n < MAXTOKLEN) { // NOTE: always leave 1 char for '\0'
|
|
|
|
// All good, no overflow
|
|
|
|
tok->subtype += runetochar(tok->text + TOKLEN(tok), p->nr);
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Truncation
|
|
|
|
tok->flags |= TT_TRUNC;
|
|
|
|
if ((p->flags & L_ALLOWTRUNC) == 0) {
|
|
|
|
tok->text[TOKLEN(tok)] = '\0'; // force NUL termination
|
|
|
|
|
|
|
|
LexerError(p, "'%s...': MAXTOKLEN limit exceeded", tok->text);
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean AppendDirtyString(Lex *p, Tok *tok, size_t n)
|
|
|
|
{
|
|
|
|
if (tok->flags & TT_TRUNC)
|
|
|
|
return FALSE; // refuse any further attempt to append
|
|
|
|
|
|
|
|
if (TOKLEN(tok) + n < MAXTOKLEN) { // NOTE: always leave 1 char for '\0'
|
|
|
|
// Success
|
|
|
|
memcpy(tok->text + TOKLEN(tok), p->pos, n);
|
|
|
|
tok->subtype += n;
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Truncation
|
|
|
|
tok->flags |= TT_TRUNC;
|
|
|
|
if ((p->flags & L_ALLOWTRUNC) == 0) {
|
|
|
|
tok->text[TOKLEN(tok)] = '\0'; // force NUL termination
|
|
|
|
|
|
|
|
LexerError(p, "'%s...': MAXTOKLEN limit exceeded", tok->text);
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void SkipWhitespace(Lex *p)
|
|
|
|
{
|
|
|
|
while (TRUE) {
|
|
|
|
// Fast route to skip ASCII whitespaces
|
|
|
|
Uint8 uc;
|
|
|
|
while ((uc = PeekChar(p)) <= ' ') {
|
|
|
|
if (uc == '\0')
|
|
|
|
break;
|
|
|
|
if (uc == '\n')
|
|
|
|
p->line++;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
NextRune(p);
|
|
|
|
if (isspacerune(p->nr))
|
|
|
|
continue; // ... rare occurrence of UTF8 space
|
|
|
|
|
|
|
|
// Skip comments
|
|
|
|
if (p->nr == '/') {
|
|
|
|
// C++-Style comments
|
|
|
|
char c = PeekChar(p);
|
|
|
|
if (c == '/') {
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
while (NextRune(p) != '\n') {
|
|
|
|
if (p->nr == '\0')
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
p->line++;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
} else if (c == '*') {
|
|
|
|
// C-Style comments
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
while (TRUE) {
|
|
|
|
NextRune(p);
|
|
|
|
if (p->nr == '\n') {
|
|
|
|
p->line++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->nr == '\0') {
|
|
|
|
if (!p->hasError)
|
|
|
|
LexerWarning(p, "Unterminated comment");
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = PeekChar(p);
|
|
|
|
if (p->nr == '*' && c == '/')
|
|
|
|
break; // end of comment
|
|
|
|
|
|
|
|
if (p->nr == '/' && c == '*')
|
|
|
|
LexerWarning(p, "Nested comment");
|
|
|
|
}
|
|
|
|
|
|
|
|
NextChar(p); // skip comment termination
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UngetRune(p);
|
|
|
|
break; // not a comment, break the loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean ReadEscapeCharacter(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
// Determine the escape character
|
|
|
|
char c = PeekChar(p);
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case '\\': r = '\\'; break;
|
|
|
|
case 'n': r = '\n'; break;
|
|
|
|
case 'r': r = '\r'; break;
|
|
|
|
case 't': r = '\t'; break;
|
|
|
|
case 'v': r = '\v'; break;
|
|
|
|
case 'b': r = '\b'; break;
|
|
|
|
case 'f': r = '\f'; break;
|
|
|
|
case 'a': r = '\a'; break;
|
|
|
|
case '\'': r = '\''; break;
|
|
|
|
case '\"': r = '\"'; break;
|
|
|
|
case '\?': r = '\?'; break;
|
|
|
|
case 'x':
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
r = 0;
|
|
|
|
while (TRUE) {
|
|
|
|
char c = PeekChar(p);
|
|
|
|
if (c == '\0')
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (c >= '0' && c <= '9')
|
|
|
|
c = c - '0';
|
|
|
|
else if (c >= 'A' && c <= 'Z')
|
|
|
|
c = c - 'A' + 10;
|
|
|
|
else if (c >= 'a' && c <= 'z')
|
|
|
|
c = c - 'a' + 10;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
r = (r << 4) + c;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
// NOTE: decimal ASCII code, NOT octal
|
|
|
|
if (!Df_isdigit(c)) {
|
|
|
|
if (c == '\0')
|
|
|
|
LexerError(p, "Unexpected end of parse after backslash");
|
|
|
|
else
|
|
|
|
LexerError(p, "Unknown escape char");
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = 0;
|
|
|
|
do {
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
r = r * 10 + (c - '0');
|
|
|
|
|
|
|
|
c = PeekChar(p);
|
|
|
|
} while (Df_isdigit(c));
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (r > 0xff) {
|
|
|
|
LexerWarning(p, "Too large value in escape character");
|
|
|
|
r = 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Succesfully read escape character
|
|
|
|
p->nr = r;
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadString(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
// Leading quote
|
|
|
|
Rune quote = p->nr;
|
|
|
|
if (quote == '\"')
|
|
|
|
tok->type = TT_STRING;
|
|
|
|
else
|
|
|
|
tok->type = TT_LITERAL;
|
|
|
|
|
|
|
|
// Read string body
|
|
|
|
while (TRUE) {
|
|
|
|
NextRune(p);
|
|
|
|
|
|
|
|
if (p->nr == '\\' && (p->flags & L_NOSTRESC) == 0) {
|
|
|
|
// There is an escape character and escape characters are allowed
|
|
|
|
if (!ReadEscapeCharacter(p, tok))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
} else if (p->nr == quote) {
|
|
|
|
// Got a trailing quote
|
|
|
|
|
|
|
|
// If consecutive strings should not be concatenated
|
|
|
|
if ((p->flags & L_NOSTRCAT) &&
|
|
|
|
((p->flags & L_ALLOWBACKSLASHSTRCAT) == 0 || quote != '\"'))
|
|
|
|
break;
|
|
|
|
|
|
|
|
char *tmp_p = p->pos;
|
|
|
|
unsigned tmpline = p->line;
|
|
|
|
// Read white space between two possibly consecutive strings
|
|
|
|
SkipWhitespace(p);
|
|
|
|
|
|
|
|
if (p->flags & L_NOSTRCAT) {
|
|
|
|
if (NextRune(p) != '\\') { // also catches end of parse
|
|
|
|
p->pos = tmp_p;
|
|
|
|
p->line = tmpline;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
SkipWhitespace(p);
|
|
|
|
if (NextRune(p) != quote) {
|
|
|
|
LexerError(p, "Expecting string after ' terminated line");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there's no leading quote
|
|
|
|
if (NextRune(p) != quote) { // also catches end of parse
|
|
|
|
p->pos = tmp_p;
|
|
|
|
p->line = tmpline;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (p->nr == '\0') {
|
|
|
|
if (!p->hasError)
|
|
|
|
LexerError(p, "Missing trailing quote");
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (p->nr == '\n') {
|
|
|
|
LexerError(p, "Newline inside string");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->text[TOKLEN(tok)] = '\0';
|
|
|
|
|
|
|
|
if (tok->type == TT_LITERAL) {
|
|
|
|
if ((p->flags & L_ALLOWMULTICHARLIT) == 0 && TOKLEN(tok) != 1)
|
|
|
|
LexerWarning(p, "Literal is not one character long");
|
|
|
|
|
|
|
|
tok->subtype = tok->text[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean IsNameChar(Rune r, unsigned flags)
|
|
|
|
{
|
|
|
|
if (r < RUNE_SELF) {
|
|
|
|
if (Df_isalnum(r) || r == '_')
|
|
|
|
return TRUE;
|
|
|
|
if (r == '/' || r == '\\' || r == ':' || r == '.')
|
|
|
|
return (flags & L_ALLOWPATHS) != 0;
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !isspacerune(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean IsLetterChar(Rune r)
|
|
|
|
{
|
|
|
|
return Df_isalpha(r) || r == '_' || r >= RUNE_SELF;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void SkipBOM(Lex *p)
|
|
|
|
{
|
|
|
|
if (!fullrune(p->pos, p->lim - p->pos))
|
|
|
|
return;
|
|
|
|
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
size_t n = chartorune(&r, p->pos);
|
|
|
|
if (r == BOM)
|
|
|
|
p->pos += n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadName(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
tok->type = TT_NAME;
|
|
|
|
|
|
|
|
do {
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
NextRune(p);
|
|
|
|
} while (IsNameChar(p->nr, p->flags));
|
|
|
|
|
|
|
|
UngetRune(p);
|
|
|
|
|
|
|
|
tok->text[TOKLEN(tok)] = '\0';
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadNumber(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
tok->type = TT_NUMBER;
|
|
|
|
|
|
|
|
char base = PeekChar(p);
|
|
|
|
|
|
|
|
unsigned dot, len;
|
|
|
|
unsigned subtype;
|
|
|
|
|
|
|
|
if (p->nr == '0' && base != '.') {
|
|
|
|
if (base == 'x' || base == 'X') {
|
|
|
|
// Hexadecimal number
|
|
|
|
AppendDirty(p, tok); NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
while (Df_ishexdigit(PeekChar(p))) {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
subtype = TT_HEX | TT_INT;
|
|
|
|
} else if (base == 'b' || base == 'B') {
|
|
|
|
// Binary number
|
|
|
|
AppendDirty(p, tok); NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
while (TRUE) {
|
|
|
|
char c = PeekChar(p);
|
|
|
|
if (c != '0' && c != '1')
|
|
|
|
break;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
subtype = TT_BIN | TT_INT;
|
|
|
|
} else {
|
|
|
|
// Octal number
|
2021-10-15 11:52:12 +02:00
|
|
|
AppendDirty(p, tok);
|
2021-06-07 16:55:13 +02:00
|
|
|
|
|
|
|
while (TRUE) {
|
|
|
|
char c = PeekChar(p);
|
|
|
|
if (c < '0' || c > '7')
|
|
|
|
break;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
subtype = TT_OCT | TT_INT;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Decimal integer or floating point number
|
|
|
|
dot = 0;
|
|
|
|
|
|
|
|
char c;
|
|
|
|
while (TRUE) {
|
|
|
|
if (p->nr == '.')
|
|
|
|
dot++;
|
|
|
|
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
c = PeekChar(p);
|
|
|
|
if (!Df_isdigit(c) && c != '.')
|
|
|
|
break; // next character is not a digit
|
|
|
|
|
|
|
|
NextChar(p); // consume one more character
|
|
|
|
}
|
|
|
|
if (c == 'e' && dot == 0)
|
|
|
|
dot++; // scientific notation without a decimal point
|
|
|
|
|
|
|
|
if (dot > 1) {
|
|
|
|
LexerError(p, "More than one dot in number");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dot == 1) {
|
|
|
|
// FIXME(micia): How should we deal with stuff like 1.0e<EOF>
|
|
|
|
|
|
|
|
// Floating point number
|
|
|
|
subtype = TT_DEC | TT_FLOAT;
|
|
|
|
|
|
|
|
// Check for floating point exponent
|
|
|
|
if (c == 'e') {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
c = PeekChar(p);
|
|
|
|
if (c == '-' || c == '+') {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
c = PeekChar(p);
|
|
|
|
}
|
|
|
|
while (TRUE) {
|
|
|
|
if (!Df_isdigit(c))
|
|
|
|
break;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
c = PeekChar(p);
|
|
|
|
}
|
|
|
|
} else if (c == '#') {
|
|
|
|
// Check for floating point exception infinite 1.#INF or indefinite 1.#IND or NaN
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
len = 4;
|
|
|
|
if (PeekString(p, "INF"))
|
|
|
|
subtype |= TT_INF;
|
|
|
|
else if (PeekString(p, "IND"))
|
|
|
|
subtype |= TT_INDEF;
|
|
|
|
else if (PeekString(p, "NAN"))
|
|
|
|
subtype |= TT_NAN;
|
|
|
|
else if (PeekString(p, "QNAN")) {
|
|
|
|
subtype |= TT_NAN;
|
|
|
|
len++;
|
|
|
|
} else if (PeekString(p, "SNAN")) {
|
|
|
|
subtype |= TT_NAN;
|
|
|
|
len++;
|
|
|
|
}
|
|
|
|
|
|
|
|
AppendDirtyString(p, tok, len);
|
|
|
|
p->pos += len;
|
|
|
|
|
|
|
|
while (Df_isdigit(PeekChar(p))) {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((p->flags & L_ALLOWFLOATEXC) == 0) {
|
|
|
|
LexerError(p, "Parsed '%*s'", (int) TOKLEN(tok), tok->text);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
subtype = TT_DEC | TT_INT;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for format suffixes
|
|
|
|
char suffix = PeekChar(p);
|
|
|
|
if (subtype & TT_FLOAT) {
|
|
|
|
if (suffix == 'f' || suffix == 'F') {
|
|
|
|
// Single-precision: float
|
|
|
|
subtype |= TT_SINGLE_PREC;
|
|
|
|
NextChar(p);
|
|
|
|
} else if (suffix == 'l' || suffix == 'L') {
|
|
|
|
// Extended-precision: long double
|
|
|
|
subtype |= TT_EXT_PREC;
|
|
|
|
NextChar(p);
|
|
|
|
} else {
|
|
|
|
// Default is double-precision: double
|
|
|
|
subtype |= TT_DOUBLE_PREC;
|
|
|
|
}
|
|
|
|
} else if (subtype & TT_INT) {
|
|
|
|
// Default: signed long
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
|
|
if (suffix == 'l' || suffix == 'L') {
|
|
|
|
if (subtype & TT_LONG)
|
|
|
|
break; // duplicate long suffix
|
|
|
|
|
|
|
|
subtype |= TT_LONG; // long integer
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
suffix = PeekChar(p);
|
|
|
|
if (suffix == 'l' || suffix == 'L') {
|
|
|
|
subtype |= TT_LLONG; // long long integer
|
|
|
|
NextChar(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (suffix == 'u' || suffix == 'U') {
|
|
|
|
if (subtype & TT_UNSIGNED)
|
|
|
|
break; // duplicate unsigned suffix
|
|
|
|
|
|
|
|
subtype |= TT_UNSIGNED; // unsigned integer
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
} else
|
|
|
|
break;
|
|
|
|
|
|
|
|
suffix = PeekChar(p);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->text[TOKLEN(tok)] = '\0';
|
|
|
|
|
|
|
|
tok->subtype = subtype;
|
|
|
|
if (subtype & TT_INT) {
|
|
|
|
tok->intvalue = Atoll(tok->text, NULL, 0, NULL);
|
|
|
|
tok->floatvalue = tok->intvalue;
|
|
|
|
} else if (subtype & TT_FLOAT) {
|
|
|
|
tok->floatvalue = Atof(tok->text, NULL, NULL);
|
|
|
|
tok->intvalue = (long long) tok->floatvalue;
|
|
|
|
}
|
|
|
|
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadIpv4(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
tok->type = TT_NUMBER;
|
|
|
|
|
|
|
|
unsigned subtype = TT_IPADDR | TT_IPV4;
|
|
|
|
|
|
|
|
int dots = 0, mulDots = 0;
|
|
|
|
char lastCh = '\0';
|
|
|
|
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
while (TRUE) {
|
|
|
|
char c = PeekChar(p);
|
|
|
|
if (!Df_isdigit(c) && c != '.')
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (c == '.') {
|
|
|
|
if (lastCh == '.')
|
|
|
|
mulDots++; // don't report error immediately, so bad token may be skipped
|
|
|
|
|
|
|
|
dots++;
|
|
|
|
}
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
lastCh = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mulDots > 0) {
|
|
|
|
LexerError(p, "Multiple consecutive dots inside IPv4 address");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (dots != 3) {
|
|
|
|
LexerError(p, "IPv4 address should have 3 dots");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((p->flags & L_PLAINIPADDRONLY) == 0 && PeekChar(p) == ':') {
|
|
|
|
// Port number available
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
NextChar(p);
|
|
|
|
|
|
|
|
if (!Df_isdigit(PeekChar(p)))
|
|
|
|
LexerError(p, "Missing port number after ':' inside IPv4 address");
|
|
|
|
|
|
|
|
do {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
} while (Df_isdigit(PeekChar(p)));
|
|
|
|
|
|
|
|
subtype |= TT_IPPORT;
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->text[TOKLEN(tok)] = '\0';
|
|
|
|
|
|
|
|
tok->subtype = subtype;
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean IsIpv6ZoneChar(Rune r)
|
|
|
|
{
|
|
|
|
if (r < RUNE_SELF)
|
|
|
|
return Df_isalnum(r) || r == '-' || r == '_' || r == '.' || r == '+' || r == '*';
|
|
|
|
|
|
|
|
return !isspacerune(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadIpv6(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
tok->type = TT_NUMBER;
|
|
|
|
|
|
|
|
unsigned subtype = TT_IPADDR | TT_IPV6;
|
|
|
|
|
|
|
|
int colons = 0, dots = 0, mulDots = 0;
|
|
|
|
Boolean isLiteral = FALSE;
|
|
|
|
char c, lastCh = p->nr;
|
|
|
|
|
|
|
|
if (p->nr == ':')
|
|
|
|
colons++;
|
|
|
|
if (p->nr == '[')
|
|
|
|
isLiteral = TRUE;
|
|
|
|
|
|
|
|
assert(!isLiteral || (p->flags & L_PLAINIPADDRONLY) == 0);
|
|
|
|
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
while (TRUE) {
|
|
|
|
c = PeekChar(p);
|
|
|
|
if (!Df_ishexdigit(c) && c != '.' && c != ':')
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (c == '.') {
|
|
|
|
if (colons == 0)
|
|
|
|
break;
|
|
|
|
if (lastCh == '.')
|
|
|
|
mulDots++; // report error after reading the full token
|
|
|
|
|
|
|
|
dots++;
|
|
|
|
}
|
|
|
|
if (c == ':') {
|
|
|
|
if (dots > 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
colons++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dots > 0 && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
|
|
|
|
break;
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
lastCh = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform basic format checks (still they don't ensure correct address)
|
|
|
|
if (colons < 2) {
|
|
|
|
LexerError(p, "IPv6 address should have at least 2 colons");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (dots > 0) {
|
|
|
|
if (mulDots > 0) {
|
|
|
|
LexerError(p, "IPv4 mapped IPv6 addess has multiple consecutive dots");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (dots != 3) {
|
|
|
|
LexerError(p, "IPv4-mapped IPv6 address should have 3 dots");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (colons > 6) {
|
|
|
|
LexerError(p, "IPv4-mapped IPv6 address should have at most 6 colons");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
} else if (colons > 7) {
|
|
|
|
LexerError(p, "IPv6 address should have at most 7 colons");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((p->flags & L_PLAINIPADDRONLY) == 0 && c == '%') {
|
|
|
|
// Address contains a zone id
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
Rune r = NextRune(p);
|
|
|
|
if (!IsIpv6ZoneChar(r)) {
|
|
|
|
UngetRune(p);
|
|
|
|
|
|
|
|
LexerError(p, "Missing IPv6 address zone id after '%%'");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
do AppendDirty(p, tok); while (IsIpv6ZoneChar(NextRune(p)));
|
|
|
|
|
|
|
|
UngetRune(p);
|
|
|
|
|
|
|
|
subtype |= TT_IPV6ZONE;
|
|
|
|
}
|
|
|
|
if (isLiteral) {
|
|
|
|
// Ensure proper IPv6 literal termination
|
|
|
|
if (PeekChar(p) != ']') {
|
|
|
|
LexerError(p, "IPv6 address literal lacks closing ']'");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
subtype |= TT_IPV6LIT;
|
|
|
|
|
|
|
|
if (PeekChar(p) == ':') {
|
|
|
|
// Has a specific port number
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
|
|
|
|
if (!Df_isdigit(PeekChar(p))) {
|
|
|
|
LexerError(p, "IPv6 address literal lacks port number after ':'");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
NextChar(p);
|
|
|
|
AppendDirty(p, tok);
|
|
|
|
} while (Df_isdigit(PeekChar(p)));
|
|
|
|
|
|
|
|
subtype |= TT_IPPORT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->text[TOKLEN(tok)] = '\0';
|
|
|
|
|
|
|
|
tok->subtype = subtype;
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *ReadPunct(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
const Punctuation *puncts = p->puncts;
|
|
|
|
if (!puncts)
|
|
|
|
puncts = p_punctuations;
|
|
|
|
|
2021-10-15 11:52:12 +02:00
|
|
|
for (size_t i = 0; puncts[i].p; i++) {
|
|
|
|
const Punctuation *punc = &puncts[i];
|
2021-06-07 16:55:13 +02:00
|
|
|
|
|
|
|
size_t len = strlen(punc->p);
|
|
|
|
|
|
|
|
// Check for this punctuation in the script
|
|
|
|
if (PeekString(p, punc->p)) {
|
|
|
|
// Punctuation matches, copy into the token
|
|
|
|
tok->type = TT_PUNCT;
|
|
|
|
AppendDirtyString(p, tok, len);
|
|
|
|
tok->text[len] = '\0';
|
|
|
|
|
|
|
|
tok->subtype = punc->id;
|
|
|
|
|
|
|
|
p->pos += len;
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BeginLexerSession(Lex *p, const char *name, unsigned startLine)
|
|
|
|
{
|
|
|
|
Df_strncpyz(p->name, name, sizeof(p->name));
|
|
|
|
p->line = MAX(startLine, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetLexerTextn(Lex *p, const char *text, size_t n)
|
|
|
|
{
|
|
|
|
p->pos = (char *) text;
|
|
|
|
p->lim = p->pos + n;
|
|
|
|
|
|
|
|
p->hasError = p->hasBufferedToken = FALSE;
|
|
|
|
|
|
|
|
SkipBOM(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean PeekIpv4(Lex *p)
|
|
|
|
{
|
|
|
|
// To unambiguously determine that a token is an IPv4 address
|
|
|
|
// we must encounter at least 2 dots within the first 8 chars
|
|
|
|
// (this is necessary to avoid confusing a double for an IPv4)
|
|
|
|
size_t peekLen = 7; // p->nr tested separately
|
|
|
|
|
|
|
|
if (!Df_isdigit(p->nr))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
if (peekLen > (size_t) (p->lim - p->pos))
|
|
|
|
peekLen = p->lim - p->pos;
|
|
|
|
|
|
|
|
int dots = 0;
|
|
|
|
for (size_t i = 0; i < peekLen; i++) {
|
|
|
|
if (!Df_isdigit(p->pos[i]) && p->pos[i] != '.')
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (p->pos[i] == '.' && ++dots == 2)
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Boolean PeekIpv6(Lex *p)
|
|
|
|
{
|
|
|
|
// In order to unambiguously determine that a token is in fact an
|
|
|
|
// IPv6 address, we have to encounter 2 colons within the first 10 chars
|
|
|
|
// (this is needed to avoid confusing `(a) ? 100:200` for an IPv6 address)
|
|
|
|
//
|
|
|
|
// NOTE: C++ syntax may kills us anyway, `:: function()` would be
|
|
|
|
// misinterpreted like `IPv6` `function`...
|
|
|
|
|
|
|
|
size_t peekLen = 9; // p->nr is tested separately
|
|
|
|
int colons = 0;
|
|
|
|
|
|
|
|
if ((p->flags & L_PLAINIPADDRONLY) == 0 && p->nr == '[')
|
|
|
|
peekLen++; // could be: [address]:port
|
|
|
|
else if (p->nr == ':') {
|
|
|
|
colons++;
|
|
|
|
peekLen = 5;
|
|
|
|
} else if (!Df_ishexdigit(p->nr))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
if (peekLen > (size_t) (p->lim - p->pos))
|
|
|
|
peekLen = p->lim - p->pos;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < peekLen; i++) {
|
|
|
|
if (!Df_ishexdigit(p->pos[i]) && p->pos[i] != ':')
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
if (p->pos[i] == ':' && ++colons == 2)
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_ReadToken(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
if (p->hasError)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (p->hasBufferedToken) {
|
|
|
|
// Return previously stored token
|
|
|
|
memcpy(tok, &p->buf, sizeof(*tok));
|
|
|
|
p->hasBufferedToken = FALSE;
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *tmp_p = p->pos;
|
|
|
|
unsigned tmpline = p->line;
|
|
|
|
|
|
|
|
// Advance to next token position
|
|
|
|
SkipWhitespace(p);
|
|
|
|
|
|
|
|
// Clear token
|
|
|
|
tok->type = 0;
|
|
|
|
tok->flags = 0;
|
|
|
|
tok->subtype = 0; // used as length during parsing
|
|
|
|
tok->linesCrossed = p->line - tmpline;
|
|
|
|
tok->spacesBeforeToken = p->pos - tmp_p;
|
|
|
|
tok->line = p->line;
|
|
|
|
|
|
|
|
tok->intvalue = 0;
|
|
|
|
tok->floatvalue = 0.0;
|
|
|
|
tok->text[0] = '\0';
|
|
|
|
|
|
|
|
NextRune(p);
|
|
|
|
if (p->nr == '\0')
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (p->nr == '\"' || p->nr == '\'')
|
|
|
|
return ReadString(p, tok);
|
|
|
|
|
|
|
|
if (IsLetterChar(p->nr) || (p->flags & L_STRONLY) != 0)
|
|
|
|
return ReadName(p, tok);
|
|
|
|
|
|
|
|
if (p->flags & L_ALLOWIPADDR) {
|
|
|
|
if (PeekIpv4(p))
|
|
|
|
return ReadIpv4(p, tok);
|
|
|
|
if (PeekIpv6(p))
|
|
|
|
return ReadIpv6(p, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Df_isdigit(p->nr) || (p->nr == '.' && Df_isdigit(PeekChar(p))))
|
|
|
|
return ReadNumber(p, tok);
|
|
|
|
|
|
|
|
UngetRune(p);
|
|
|
|
|
|
|
|
if (!ReadPunct(p, tok)) {
|
|
|
|
LexerError(p, "Syntax error");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return tok->text;
|
|
|
|
}
|
|
|
|
|
|
|
|
long long Lex_ParseInt(Lex *p, Boolean optionalSign)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
long long signum = 1LL;
|
|
|
|
if (optionalSign && Lex_CheckTokenType(p, &tok, TT_PUNCT, 0)) {
|
|
|
|
switch (tok.subtype) {
|
|
|
|
case P_ADD: break;
|
|
|
|
case P_SUB: signum = -1LL; break;
|
|
|
|
default: Lex_UngetToken(p, &tok); break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!Lex_MatchTokenType(p, &tok, TT_NUMBER, TT_INT))
|
|
|
|
return 0LL;
|
|
|
|
|
|
|
|
return signum * tok.intvalue;
|
|
|
|
}
|
|
|
|
|
|
|
|
Boolean Lex_ParseBool(Lex *p, Boolean allowNumeric)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
if (allowNumeric && Lex_CheckTokenType(p, &tok, TT_NUMBER, TT_INT))
|
|
|
|
return tok.intvalue != 0;
|
|
|
|
if (!Lex_MatchAnyToken(p, &tok))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
if (strcmp(tok.text, "true") == 0)
|
|
|
|
return TRUE;
|
|
|
|
if (strcmp(tok.text, "false") == 0)
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
LexerError(p, "Read '%s' while expecting boolean value", tok.text);
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
double Lex_ParseFloat(Lex *p, Boolean optionalSign)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
double signum = 1.0;
|
|
|
|
if (optionalSign && Lex_CheckTokenType(p, &tok, TT_PUNCT, 0)) {
|
|
|
|
switch (tok.subtype) {
|
|
|
|
case P_ADD: break;
|
|
|
|
case P_SUB: signum = -1.0; break;
|
|
|
|
default: Lex_UngetToken(p, &tok); break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!Lex_MatchTokenType(p, &tok, TT_NUMBER, TT_FLOAT))
|
|
|
|
return 0.0;
|
|
|
|
|
|
|
|
return signum * tok.floatvalue;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Lex_UngetToken(Lex *p, const Tok *tok)
|
|
|
|
{
|
|
|
|
if (p->hasError)
|
|
|
|
return;
|
|
|
|
if (p->hasBufferedToken)
|
|
|
|
LexerError(p, "%s: BUG: Function called twice", __func__);
|
|
|
|
|
|
|
|
// NOTE: memcpy() only the correct buffer size
|
|
|
|
// a Tok may be partially allocated!
|
|
|
|
size_t n = strlen(tok->text);
|
|
|
|
memcpy(&p->buf, tok, offsetof(Tok, text[n+1]));
|
|
|
|
p->hasBufferedToken = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define WARN_FMT "%s:%u: WARNING: %s\n"
|
|
|
|
#define COLOR_WARN_FMT \
|
|
|
|
VTSGR(VTBLD) "%s" VTSGR(VTNOBLD) ":" VTSGR(VTBLD) "%u" VTSGR(VTNOBLD) ": " \
|
|
|
|
VTSGR(VTYEL) VTSGR(VTBLD) "WARNING" VTSGR(VTFGDFLT) VTSGR(VTNOBLD) ": " \
|
|
|
|
"%s\n"
|
|
|
|
|
|
|
|
#define ERR_FMT "%s:%u: ERROR: %s\n"
|
|
|
|
#define COLOR_ERR_FMT \
|
|
|
|
VTSGR(VTBLD) "%s" VTSGR(VTNOBLD) ":" VTSGR(VTBLD) "%u" VTSGR(VTNOBLD) ": " \
|
|
|
|
VTSGR(VTRED) VTSGR(VTBLD) "ERROR" VTSGR(VTFGDFLT) VTSGR(VTNOBLD) ": " \
|
|
|
|
"%s\n"
|
|
|
|
|
|
|
|
static void EmitMessage(Lex *p,
|
|
|
|
void (*emitf)(Lex *, const char *, void *),
|
|
|
|
const char *fmt,
|
|
|
|
...)
|
|
|
|
{
|
|
|
|
va_list va;
|
|
|
|
char *buf;
|
|
|
|
int n1, n2;
|
|
|
|
|
|
|
|
if (emitf == LEX_QUIT)
|
|
|
|
emitf = Lex_Quit;
|
|
|
|
if (emitf == LEX_WARN)
|
|
|
|
emitf = Lex_Warn;
|
|
|
|
|
|
|
|
va_start(va, fmt);
|
|
|
|
n1 = vsnprintf(NULL, 0, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n1 < 0) {
|
|
|
|
buf = "<Lexer: Can't emit message: vsnprintf() failed!>\n";
|
|
|
|
goto do_emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf = (char *) alloca(n1 + 1);
|
|
|
|
va_start(va, fmt);
|
|
|
|
n2 = vsnprintf(buf, n1 + 1, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n2 < 0) {
|
|
|
|
buf = "<Lexer: Can't emit message: vsnprintf() failed!>\n";
|
|
|
|
goto do_emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(n1 == n2);
|
|
|
|
|
|
|
|
do_emit:
|
|
|
|
emitf(p, buf, p->obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
void LexerError(Lex *p, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
if (p->flags & L_NOERR)
|
|
|
|
return;
|
|
|
|
|
|
|
|
p->hasError = TRUE; // *ONLY* place where error flag is set!
|
|
|
|
|
|
|
|
void (*errf)(Lex *, const char *, void *);
|
|
|
|
va_list va;
|
|
|
|
const char *msgt;
|
|
|
|
char *buf;
|
|
|
|
int n1, n2;
|
|
|
|
|
|
|
|
errf = p->Error;
|
|
|
|
if (errf == LEX_IGN)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(va, fmt);
|
|
|
|
n1 = vsnprintf(NULL, 0, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n1 < 0) {
|
|
|
|
buf = "<Lexer: Can't format error: vsnprintf() failed!>\n";
|
|
|
|
goto emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf = (char *) alloca(n1 + 1);
|
|
|
|
va_start(va, fmt);
|
|
|
|
n2 = vsnprintf(buf, n1 + 1, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n2 < 0) {
|
|
|
|
buf = "<Lexer: Can't format error: vsnprintf() failed!>\n";
|
|
|
|
goto emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(n1 == n2);
|
|
|
|
|
|
|
|
emit:
|
|
|
|
msgt = (p->flags & L_COLORED) ? COLOR_ERR_FMT : ERR_FMT;
|
|
|
|
EmitMessage(p, errf, msgt, p->name, p->line, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void LexerWarning(Lex *p, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
if (p->flags & L_NOWARN)
|
|
|
|
return;
|
|
|
|
|
|
|
|
void (*warnf)(Lex *, const char *, void *);
|
|
|
|
va_list va;
|
|
|
|
const char *msgt;
|
|
|
|
char *buf;
|
|
|
|
int n1, n2;
|
|
|
|
|
|
|
|
warnf = p->Warn;
|
|
|
|
if (warnf == LEX_IGN)
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(va, fmt);
|
|
|
|
n1 = vsnprintf(NULL, 0, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n1 < 0) {
|
|
|
|
buf = "<Lexer: Can't format warning: vsnprintf() failed!>";
|
|
|
|
goto emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf = (char *) alloca(n1 + 1);
|
|
|
|
va_start(va, fmt);
|
|
|
|
n2 = vsnprintf(buf, n1 + 1, fmt, va);
|
|
|
|
va_end(va);
|
|
|
|
if (n2 < 0) {
|
|
|
|
buf = "<Lexer: Can't format warning: vsnprintf() failed!>";
|
|
|
|
goto emit;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(n1 == n2);
|
|
|
|
|
|
|
|
emit:
|
|
|
|
msgt = (p->flags & L_COLORED) ? COLOR_WARN_FMT : WARN_FMT;
|
|
|
|
EmitMessage(p, warnf, msgt, p->name, p->line, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_MatchToken(Lex *p, const char *match)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
char *t = Lex_ReadToken(p, &tok);
|
|
|
|
if (t && strcmp(t, match) == 0)
|
|
|
|
return (char *) match;
|
|
|
|
|
|
|
|
if (p->hasError)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (t)
|
|
|
|
LexerError(p, "Read unexpected token '%s' while expecting '%s'", t, match);
|
|
|
|
else
|
|
|
|
LexerError(p, "Couldn't read expected token '%s'", match);
|
2021-10-15 11:52:12 +02:00
|
|
|
|
2021-06-07 16:55:13 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_MatchTokenType(Lex *p, Tok *tok, int type , unsigned subtype)
|
|
|
|
{
|
|
|
|
char *t = Lex_ReadToken(p, tok);
|
|
|
|
if (t && tok->type == type && (tok->subtype & subtype) == subtype)
|
|
|
|
return t; // success
|
|
|
|
|
|
|
|
if (p->hasError)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
// Make some effort to provide meaningful diagnostic
|
|
|
|
|
|
|
|
char ty[64], sub[256];
|
|
|
|
char *sp;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case TT_STRING: strcpy(ty, "string"); break;
|
|
|
|
case TT_LITERAL: strcpy(ty, "literal"); break;
|
|
|
|
case TT_NUMBER: strcpy(ty, "numeric"); break;
|
|
|
|
case TT_NAME: strcpy(ty, "name"); break;
|
|
|
|
case TT_PUNCT: strcpy(ty, "punctuation"); break;
|
|
|
|
default:
|
|
|
|
sp = strcpy(ty, "<TYPE: 0x"); sp += 9;
|
|
|
|
sp = Xtoa((unsigned) type, sp);
|
|
|
|
*sp++ = '>';
|
|
|
|
*sp = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!t) {
|
|
|
|
LexerError(p, "Reached end while expecting %s token", ty);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report type mismatch
|
|
|
|
if (tok->type != type) {
|
|
|
|
LexerError(p, "Read '%s' while expecting %s token", tok->text, ty);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Report subtype mismatch
|
|
|
|
if (type == TT_NUMBER) {
|
|
|
|
sub[0] = '\0';
|
|
|
|
|
|
|
|
if (subtype & TT_DEC)
|
|
|
|
strcat(sub, "decimal ");
|
|
|
|
if (subtype & TT_HEX) {
|
|
|
|
assert((subtype & (TT_OCT|TT_BIN|TT_DEC|TT_FLOAT)) == 0);
|
|
|
|
strcat(sub, "hex ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_OCT) {
|
|
|
|
assert((subtype & (TT_HEX|TT_BIN|TT_DEC|TT_FLOAT)) == 0);
|
|
|
|
strcat(sub, "octal ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_BIN) {
|
|
|
|
assert((subtype & (TT_HEX|TT_OCT|TT_DEC|TT_FLOAT)) == 0);
|
|
|
|
strcat(sub, "binary ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_UNSIGNED) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_IPADDR)) == 0);
|
|
|
|
strcat(sub, "unsigned ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_LONG) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_IPADDR)) == 0);
|
|
|
|
strcat(sub, "long ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_LLONG) {
|
|
|
|
assert(subtype & TT_LONG);
|
|
|
|
strcat(sub, "long ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_FLOAT) {
|
|
|
|
assert((subtype & (TT_INT|TT_IPADDR)) == 0);
|
|
|
|
strcat(sub, "float ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_INT) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_IPADDR)) == 0);
|
|
|
|
strcat(sub, "integer ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_IPV4) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_INT)) == 0);
|
|
|
|
assert((subtype & TT_IPV6) == 0);
|
|
|
|
strcat(sub, "IPv4 ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_IPV6) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_INT)) == 0);
|
|
|
|
assert((subtype & TT_IPV4) == 0);
|
|
|
|
strcat(sub, "IPv6 ");
|
|
|
|
}
|
|
|
|
if (subtype & TT_IPADDR) {
|
|
|
|
assert((subtype & (TT_FLOAT|TT_INT)) == 0);
|
|
|
|
strcat(sub, "internet address ");
|
|
|
|
}
|
|
|
|
|
|
|
|
Df_strtrimr(sub);
|
|
|
|
if (sub[0] != '\0') {
|
|
|
|
LexerError(p, "Read '%s' while expecting %s value", tok->text, sub);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
} else if (type == TT_PUNCT) {
|
|
|
|
const Punctuation *puncts = p->puncts;
|
|
|
|
if (!puncts)
|
|
|
|
puncts = p_punctuations;
|
|
|
|
|
|
|
|
for (size_t i = 0; puncts[i].p; i++) {
|
|
|
|
if (puncts[i].id == subtype) {
|
|
|
|
LexerError(p, "Read '%s' while expecting %s", tok->text, puncts[i].p);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
LexerError(p, "'%s': Wrong %s token", tok->text, ty);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_MatchAnyToken(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
if (Lex_ReadToken(p, tok))
|
|
|
|
return tok->text;
|
|
|
|
|
|
|
|
if (!p->hasError)
|
|
|
|
LexerError(p, "Couldn't read expected token");
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
Boolean Lex_ParseMatrix1(Lex *p, float *dest, size_t n)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENOPEN);
|
|
|
|
for (size_t i = 0; i < n; i++)
|
|
|
|
dest[i] = Lex_ParseFloat(p, /*optionalSign=*/TRUE);
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENCLOSE);
|
|
|
|
return !p->hasError;
|
|
|
|
}
|
|
|
|
|
|
|
|
Boolean Lex_ParseMatrix2(Lex *p, float *dest, size_t n, size_t m)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENOPEN);
|
|
|
|
for (size_t i = 0; i < m; i++)
|
|
|
|
Lex_ParseMatrix1(p, &dest[i * n], n);
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENCLOSE);
|
|
|
|
return !p->hasError;
|
|
|
|
}
|
|
|
|
|
|
|
|
Boolean Lex_ParseMatrix3(Lex *p, float *dest, size_t n, size_t m, size_t u)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENOPEN);
|
|
|
|
for (size_t i = 0; i < u; i++)
|
|
|
|
Lex_ParseMatrix2(p, &dest[i * n*m], n, m);
|
|
|
|
|
|
|
|
Lex_MatchTokenType(p, &tok, TT_PUNCT, P_PARENCLOSE);
|
|
|
|
return !p->hasError;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define LEX_SAVE(p) \
|
|
|
|
char *_last_pos_ = p->pos; \
|
|
|
|
unsigned _last_line_ = p->line; \
|
|
|
|
Boolean _was_buffered_ = p->hasBufferedToken; \
|
|
|
|
Boolean _had_error_ = p->hasError
|
|
|
|
|
|
|
|
#define LEX_RESTORE(p) do { \
|
|
|
|
p->pos = _last_pos_; \
|
|
|
|
p->line = _last_line_; \
|
|
|
|
p->hasBufferedToken = _was_buffered_; \
|
|
|
|
p->hasError = _had_error_; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
char *Lex_ReadTokenOnLine(Lex *p, Tok *tok)
|
|
|
|
{
|
|
|
|
LEX_SAVE(p);
|
|
|
|
|
|
|
|
if (Lex_ReadToken(p, tok) && tok->linesCrossed == 0)
|
|
|
|
return tok->text;
|
|
|
|
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Lex_SkipLine(Lex *p)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
while (TRUE) {
|
|
|
|
LEX_SAVE(p);
|
|
|
|
|
|
|
|
if (!Lex_ReadToken(p, &tok))
|
|
|
|
break;
|
|
|
|
if (tok.linesCrossed > 0) {
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_SkipUntil(Lex *p, const char *match)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
while (Lex_MatchAnyToken(p, &tok)) {
|
|
|
|
if (strcmp(tok.text, match) == 0)
|
|
|
|
return (char *) match;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
Boolean Lex_SkipBracedSection(Lex *p, Boolean parseFirstBrace)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
if (parseFirstBrace && !Lex_MatchTokenType(p, &tok, TT_PUNCT, P_BRACEOPEN))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
unsigned depth = 1;
|
|
|
|
do {
|
|
|
|
if (!Lex_MatchAnyToken(p, &tok))
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
if (tok.type == TT_PUNCT) {
|
|
|
|
if (tok.subtype == P_BRACEOPEN)
|
|
|
|
depth++;
|
|
|
|
else if (tok.subtype == P_BRACECLOSE)
|
|
|
|
depth--;
|
|
|
|
}
|
|
|
|
} while (depth > 0);
|
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_CheckToken(Lex *p, const char *match)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
|
|
|
|
LEX_SAVE(p);
|
|
|
|
|
|
|
|
char *t = Lex_ReadToken(p, &tok);
|
|
|
|
if (t && strcmp(t, match) == 0)
|
|
|
|
return (char *) match;
|
|
|
|
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_CheckTokenType(Lex *p, Tok *tok, int type, unsigned subtype)
|
|
|
|
{
|
|
|
|
LEX_SAVE(p);
|
|
|
|
|
|
|
|
char *t = Lex_ReadToken(p, tok);
|
|
|
|
if (t && tok->type == type && (tok->subtype & subtype) == subtype)
|
|
|
|
return t;
|
|
|
|
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_PeekToken(Lex *p, const char *match)
|
|
|
|
{
|
|
|
|
Tok tok;
|
|
|
|
char *t;
|
|
|
|
|
|
|
|
LEX_SAVE(p);
|
|
|
|
t = Lex_ReadToken(p, &tok);
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
|
|
|
|
return (t && strcmp(t, match) == 0) ? (char *) match : NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *Lex_PeekTokenType(Lex *p, Tok *tok, int type, unsigned subtype)
|
|
|
|
{
|
|
|
|
char *t;
|
|
|
|
|
|
|
|
LEX_SAVE(p);
|
|
|
|
t = Lex_ReadToken(p, tok);
|
|
|
|
LEX_RESTORE(p);
|
|
|
|
|
|
|
|
if (t && tok->type == type && (tok->subtype & subtype) == subtype)
|
|
|
|
return t;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|