reduce data size by 320 bytes by packing struct lex_state tighter

and reducing amount of them allocated in each “pack” from 31 to 7
since $(…) is recursive anyway (I tried hard to need even only 5)
This commit is contained in:
tg 2011-03-07 20:07:52 +00:00
parent 1f392ab09b
commit bfe34ba110

191
lex.c
View File

@ -22,7 +22,7 @@
#include "sh.h" #include "sh.h"
__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.124 2011/03/06 17:08:12 tg Exp $"); __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.125 2011/03/07 20:07:52 tg Exp $");
/* /*
* states while lexing word * states while lexing word
@ -44,63 +44,44 @@ __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.124 2011/03/06 17:08:12 tg Exp $");
#define SLETARRAY 14 /* inside =( ), just copy */ #define SLETARRAY 14 /* inside =( ), just copy */
#define SADELIM 15 /* like SBASE, looking for delimiter */ #define SADELIM 15 /* like SBASE, looking for delimiter */
#define SHERESTRING 16 /* parsing <<< string */ #define SHERESTRING 16 /* parsing <<< string */
#define SINVALID 255 /* invalid state */
/* /*
* Structure to keep track of the lexing state and the various pieces of info * Structure to keep track of the lexing state and the various pieces of info
* needed for each particular state. * needed for each particular state.
*/ */
typedef struct lex_state Lex_state; typedef struct lex_state {
struct lex_state {
int ls_state;
union { union {
/* $((...)) */ /* point to the next state block */
struct sasparen_info { Lex_state *base;
int nparen; /* count open parenthesis */ /* marks start of $(( in output string */
int start; /* marks start of $(( in output str */ int start;
#define ls_sasparen ls_info.u_sasparen /* SBQUOTE: true if in double quotes: "`...`" */
} u_sasparen; /* SEQUOTE: got NUL, ignore rest of string */
bool abool;
/* ((...)) */ /* SADELIM information */
struct sletparen_info { struct {
int nparen; /* count open parenthesis */ /* SADELIM_BASH, SADELIM_MAKE */
#define ls_sletparen ls_info.u_sletparen unsigned char style;
} u_sletparen; /* character to search for */
unsigned char delimiter;
/* `...` */ /* max. number of delimiters */
struct sbquote_info { unsigned char num;
int indquotes; /* true if in double quotes: "`...`" */ /* ofs. into sadelim_flags[] */
#define ls_sbquote ls_info.u_sbquote unsigned char flags;
} u_sbquote; } adelim;
} u;
#ifndef MKSH_SMALL /* count open parentheses */
/* =(...) */ short nparen;
struct sletarray_info { /* type of this state */
int nparen; /* count open parentheses */ uint8_t type;
#define ls_sletarray ls_info.u_sletarray } Lex_state;
} u_sletarray; #define ls_base u.base
#endif #define ls_start u.start
#define ls_bool u.abool
/* ADELIM */ #define ls_adelim u.adelim
struct sadelim_info {
unsigned char nparen; /* count open parentheses */
#define SADELIM_BASH 0 #define SADELIM_BASH 0
#define SADELIM_MAKE 1 #define SADELIM_MAKE 1
unsigned char style;
unsigned char delimiter;
unsigned char num;
unsigned char flags; /* ofs. into sadelim_flags[] */
#define ls_sadelim ls_info.u_sadelim
} u_sadelim;
/* $'...' */
struct sequote_info {
bool got_NUL; /* ignore rest of string */
#define ls_sequote ls_info.u_sequote
} u_sequote;
Lex_state *base; /* used to point to next state block */
} ls_info;
};
typedef struct { typedef struct {
Lex_state *base; Lex_state *base;
@ -154,18 +135,18 @@ getsc_(void)
#define getsc_() _getsc_() #define getsc_() _getsc_()
#endif #endif
#define STATE_BSIZE 32 #define STATE_BSIZE 8
#define PUSH_STATE(s) do { \ #define PUSH_STATE(s) do { \
if (++statep == state_info.end) \ if (++statep == state_info.end) \
statep = push_state_(&state_info, statep); \ statep = push_state_(&state_info, statep); \
state = statep->ls_state = (s); \ state = statep->type = (s); \
} while (0) } while (0)
#define POP_STATE() do { \ #define POP_STATE() do { \
if (--statep == state_info.base) \ if (--statep == state_info.base) \
statep = pop_state_(&state_info, statep); \ statep = pop_state_(&state_info, statep); \
state = statep->ls_state; \ state = statep->type; \
} while (0) } while (0)
/** /**
@ -187,8 +168,8 @@ yylex(int cf)
char *sp, *dp; char *sp, *dp;
Again: Again:
states[0].ls_state = -1; states[0].type = SINVALID;
states[0].ls_info.base = NULL; states[0].ls_base = NULL;
statep = &states[1]; statep = &states[1];
state_info.base = states; state_info.base = states;
state_info.end = &state_info.base[STATE_BSIZE]; state_info.end = &state_info.base[STATE_BSIZE];
@ -204,11 +185,11 @@ yylex(int cf)
/* enclose arguments in (double) quotes */ /* enclose arguments in (double) quotes */
*wp++ = OQUOTE; *wp++ = OQUOTE;
state = SLETPAREN; state = SLETPAREN;
statep->ls_sletparen.nparen = 0; statep->nparen = 0;
#ifndef MKSH_SMALL #ifndef MKSH_SMALL
} else if (cf&LETARRAY) { } else if (cf&LETARRAY) {
state = SLETARRAY; state = SLETARRAY;
statep->ls_sletarray.nparen = 0; statep->nparen = 0;
#endif #endif
} else { } else {
/* normal lexing */ /* normal lexing */
@ -230,7 +211,7 @@ yylex(int cf)
} }
/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
statep->ls_state = state; statep->type = state;
/* check for here string */ /* check for here string */
if (state == SHEREDELIM) { if (state == SHEREDELIM) {
@ -255,14 +236,14 @@ yylex(int cf)
switch (state) { switch (state) {
case SADELIM: case SADELIM:
if (c == '(') if (c == '(')
statep->ls_sadelim.nparen++; statep->nparen++;
else if (c == ')') else if (c == ')')
statep->ls_sadelim.nparen--; statep->nparen--;
else if (statep->ls_sadelim.nparen == 0 && else if (statep->nparen == 0 &&
(c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) { (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
*wp++ = ADELIM; *wp++ = ADELIM;
*wp++ = c; *wp++ = c;
if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0) if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
POP_STATE(); POP_STATE();
if (c == /*{*/ '}') if (c == /*{*/ '}')
POP_STATE(); POP_STATE();
@ -376,8 +357,8 @@ yylex(int cf)
c = getsc(); c = getsc();
if (c == '(') /*)*/ { if (c == '(') /*)*/ {
PUSH_STATE(SASPAREN); PUSH_STATE(SASPAREN);
statep->ls_sasparen.nparen = 2; statep->nparen = 2;
statep->ls_sasparen.start = statep->ls_start =
Xsavepos(ws, wp); Xsavepos(ws, wp);
*wp++ = EXPRSUB; *wp++ = EXPRSUB;
} else { } else {
@ -407,10 +388,10 @@ yylex(int cf)
*wp++ = ':'; *wp++ = ':';
PUSH_STATE(SBRACE); PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM); PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH; statep->ls_adelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':'; statep->ls_adelim.delimiter = ':';
statep->ls_sadelim.num = 1; statep->ls_adelim.num = 1;
statep->ls_sadelim.nparen = 0; statep->nparen = 0;
break; break;
} else if (ksh_isdigit(c) || } else if (ksh_isdigit(c) ||
c == '('/*)*/ || c == ' ' || c == '('/*)*/ || c == ' ' ||
@ -424,10 +405,10 @@ yylex(int cf)
ungetsc(c); ungetsc(c);
PUSH_STATE(SBRACE); PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM); PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH; statep->ls_adelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = ':'; statep->ls_adelim.delimiter = ':';
statep->ls_sadelim.num = 2; statep->ls_adelim.num = 2;
statep->ls_sadelim.nparen = 0; statep->nparen = 0;
break; break;
} }
} else if (c == '/') { } else if (c == '/') {
@ -440,10 +421,10 @@ yylex(int cf)
ungetsc(c); ungetsc(c);
PUSH_STATE(SBRACE); PUSH_STATE(SBRACE);
PUSH_STATE(SADELIM); PUSH_STATE(SADELIM);
statep->ls_sadelim.style = SADELIM_BASH; statep->ls_adelim.style = SADELIM_BASH;
statep->ls_sadelim.delimiter = '/'; statep->ls_adelim.delimiter = '/';
statep->ls_sadelim.num = 1; statep->ls_adelim.num = 1;
statep->ls_sadelim.nparen = 0; statep->nparen = 0;
break; break;
} }
/* /*
@ -485,7 +466,7 @@ yylex(int cf)
*wp++ = OQUOTE; *wp++ = OQUOTE;
ignore_backslash_newline++; ignore_backslash_newline++;
PUSH_STATE(SEQUOTE); PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false; statep->ls_bool = false;
break; break;
} else if (c == '"' && (state == SBASE)) { } else if (c == '"' && (state == SBASE)) {
/* XXX which other states are valid? */ /* XXX which other states are valid? */
@ -522,19 +503,19 @@ yylex(int cf)
* literal meaning, except when followed by * literal meaning, except when followed by
* $ ` \."). * $ ` \.").
*/ */
statep->ls_sbquote.indquotes = 0; statep->ls_bool = false;
s2 = statep; s2 = statep;
base = state_info.base; base = state_info.base;
while (1) { while (1) {
for (; s2 != base; s2--) { for (; s2 != base; s2--) {
if (s2->ls_state == SDQUOTE) { if (s2->type == SDQUOTE) {
statep->ls_sbquote.indquotes = 1; statep->ls_bool = true;
break; break;
} }
} }
if (s2 != base) if (s2 != base)
break; break;
if (!(s2 = s2->ls_info.base)) if (!(s2 = s2->ls_base))
break; break;
base = s2-- - STATE_BSIZE; base = s2-- - STATE_BSIZE;
} }
@ -562,8 +543,8 @@ yylex(int cf)
if ((c2 = unbksl(true, s_get, s_put)) == -1) if ((c2 = unbksl(true, s_get, s_put)) == -1)
c2 = s_get(); c2 = s_get();
if (c2 == 0) if (c2 == 0)
statep->ls_sequote.got_NUL = true; statep->ls_bool = true;
if (!statep->ls_sequote.got_NUL) { if (!statep->ls_bool) {
char ts[4]; char ts[4];
if ((unsigned int)c2 < 0x100) { if ((unsigned int)c2 < 0x100) {
@ -578,7 +559,7 @@ yylex(int cf)
} }
} }
} }
} else if (!statep->ls_sequote.got_NUL) { } else if (!statep->ls_bool) {
*wp++ = QCHAR; *wp++ = QCHAR;
*wp++ = c; *wp++ = c;
} }
@ -610,10 +591,10 @@ yylex(int cf)
* (embed "...", $(...), etc.) * (embed "...", $(...), etc.)
*/ */
if (c == '(') if (c == '(')
statep->ls_sasparen.nparen++; statep->nparen++;
else if (c == ')') { else if (c == ')') {
statep->ls_sasparen.nparen--; statep->nparen--;
if (statep->ls_sasparen.nparen == 1) { if (statep->nparen == 1) {
if ((c2 = getsc()) == /*(*/')') { if ((c2 = getsc()) == /*(*/')') {
POP_STATE(); POP_STATE();
/* end of EXPRSUB */ /* end of EXPRSUB */
@ -630,7 +611,7 @@ yylex(int cf)
*/ */
*wp = EOS; *wp = EOS;
wp = Xrestpos(ws, wp, wp = Xrestpos(ws, wp,
statep->ls_sasparen.start); statep->ls_start);
POP_STATE(); POP_STATE();
/* dp = $((blah))\0 */ /* dp = $((blah))\0 */
dp = wdstrip(wp, true, false); dp = wdstrip(wp, true, false);
@ -713,7 +694,7 @@ yylex(int cf)
*wp++ = c; *wp++ = c;
break; break;
case '"': case '"':
if (statep->ls_sbquote.indquotes) { if (statep->ls_bool) {
*wp++ = c; *wp++ = c;
break; break;
} }
@ -737,8 +718,8 @@ yylex(int cf)
/* LETEXPR: (( ... )) */ /* LETEXPR: (( ... )) */
case SLETPAREN: case SLETPAREN:
if (c == /*(*/ ')') { if (c == /*(*/ ')') {
if (statep->ls_sletparen.nparen > 0) if (statep->nparen > 0)
--statep->ls_sletparen.nparen; --statep->nparen;
else if ((c2 = getsc()) == /*(*/ ')') { else if ((c2 = getsc()) == /*(*/ ')') {
c = 0; c = 0;
*wp++ = CQUOTE; *wp++ = CQUOTE;
@ -763,20 +744,20 @@ yylex(int cf)
} }
} else if (c == '(') } else if (c == '(')
/* /*
* parenthesis inside quotes and * parentheses inside quotes and
* backslashes are lost, but AT&T ksh * backslashes are lost, but AT&T ksh
* doesn't count them either * doesn't count them either
*/ */
++statep->ls_sletparen.nparen; ++statep->nparen;
goto Sbase2; goto Sbase2;
#ifndef MKSH_SMALL #ifndef MKSH_SMALL
/* LETARRAY: =( ... ) */ /* LETARRAY: =( ... ) */
case SLETARRAY: case SLETARRAY:
if (c == '('/*)*/) if (c == '('/*)*/)
++statep->ls_sletarray.nparen; ++statep->nparen;
else if (c == /*(*/')') else if (c == /*(*/')')
if (statep->ls_sletarray.nparen-- == 0) { if (statep->nparen-- == 0) {
c = 0; c = 0;
goto Done; goto Done;
} }
@ -799,7 +780,7 @@ yylex(int cf)
} else if (c == '$') { } else if (c == '$') {
if ((c2 = getsc()) == '\'') { if ((c2 = getsc()) == '\'') {
PUSH_STATE(SEQUOTE); PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false; statep->ls_bool = false;
goto sherestring_quoted; goto sherestring_quoted;
} else if (c2 == '"') } else if (c2 == '"')
goto sherestring_dquoted; goto sherestring_dquoted;
@ -814,7 +795,7 @@ yylex(int cf)
Xstring(ws, wp)[0] = QCHAR; Xstring(ws, wp)[0] = QCHAR;
} else if (c == '"') { } else if (c == '"') {
sherestring_dquoted: sherestring_dquoted:
state = statep->ls_state = SHEREDQUOTE; state = statep->type = SHEREDQUOTE;
*wp++ = OQUOTE; *wp++ = OQUOTE;
/* just don't IFS split; no quoting mode */ /* just don't IFS split; no quoting mode */
} else { } else {
@ -846,7 +827,7 @@ yylex(int cf)
} else if (c == '$') { } else if (c == '$') {
if ((c2 = getsc()) == '\'') { if ((c2 = getsc()) == '\'') {
PUSH_STATE(SEQUOTE); PUSH_STATE(SEQUOTE);
statep->ls_sequote.got_NUL = false; statep->ls_bool = false;
goto sheredelim_quoted; goto sheredelim_quoted;
} else if (c2 == '"') } else if (c2 == '"')
goto sheredelim_dquoted; goto sheredelim_dquoted;
@ -859,7 +840,7 @@ yylex(int cf)
ignore_backslash_newline++; ignore_backslash_newline++;
} else if (c == '"') { } else if (c == '"') {
sheredelim_dquoted: sheredelim_dquoted:
state = statep->ls_state = SHEREDQUOTE; state = statep->type = SHEREDQUOTE;
*wp++ = OQUOTE; *wp++ = OQUOTE;
} else { } else {
sheredelim_regular: sheredelim_regular:
@ -872,7 +853,7 @@ yylex(int cf)
case SHEREDQUOTE: case SHEREDQUOTE:
if (c == '"') { if (c == '"') {
*wp++ = CQUOTE; *wp++ = CQUOTE;
state = statep->ls_state = state = statep->type =
/* dp[1] == '<' means here string */ /* dp[1] == '<' means here string */
Xstring(ws, wp)[1] == '<' ? Xstring(ws, wp)[1] == '<' ?
SHERESTRING : SHEREDELIM; SHERESTRING : SHEREDELIM;
@ -920,7 +901,7 @@ yylex(int cf)
yyerror("no closing quote\n"); yyerror("no closing quote\n");
#ifndef MKSH_SMALL #ifndef MKSH_SMALL
if (state == SLETARRAY && statep->ls_sletarray.nparen != -1) if (state == SLETARRAY && statep->nparen != -1)
yyerror("%s: %s\n", T_synerr, "missing )"); yyerror("%s: %s\n", T_synerr, "missing )");
#endif #endif
@ -1752,7 +1733,7 @@ push_state_(State_info *si, Lex_state *old_end)
{ {
Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP); Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
news[0].ls_info.base = old_end; news[0].ls_base = old_end;
si->base = &news[0]; si->base = &news[0];
si->end = &news[STATE_BSIZE]; si->end = &news[STATE_BSIZE];
return (&news[1]); return (&news[1]);
@ -1763,8 +1744,8 @@ pop_state_(State_info *si, Lex_state *old_end)
{ {
Lex_state *old_base = si->base; Lex_state *old_base = si->base;
si->base = old_end->ls_info.base - STATE_BSIZE; si->base = old_end->ls_base - STATE_BSIZE;
si->end = old_end->ls_info.base; si->end = old_end->ls_base;
afree(old_base, ATEMP); afree(old_base, ATEMP);