jehanne/sys/src/cmd/rc/lex.c

/*
 * This file is part of the UCB release of Plan 9. It is subject to the license
 * terms in the LICENSE file found in the top-level directory of this
 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
 * part of the UCB release of Plan 9, including this file, may be copied,
 * modified, propagated, or distributed except according to the terms contained
 * in the LICENSE file.
 */

#include "rc.h"
#include "exec.h"
#include "io.h"
#include "getflags.h"
#include "fns.h"
int getnext(void);

int
wordchr(int c)
{
	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
}

int
idchr(int c)
{
	/*
	 * Formerly:
	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
	 *	|| c=='_' || c=='*';
	 */
	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
}
int future = EOF;
int doprompt = 1;
int inquote;
int incomm;
/*
 * Look ahead in the input stream
 */

int
nextc(void)
{
	if(future==EOF)
		future = getnext();
	return future;
}
/*
 * Consume the lookahead character.
 */

int
advance(void)
{
	int c = nextc();
	lastc = future;
	future = EOF;
	return c;
}
/*
 * read a character from the input stream
 */	

int
getnext(void)
{
	int c;
	static int peekc = EOF;
	if(peekc!=EOF){
		c = peekc;
		peekc = EOF;
		return c;
	}
	if(runq->eof)
		return EOF;
	if(doprompt)
		pprompt();
	c = rchr(runq->cmdfd);
	if(!inquote && c=='\\'){
		c = rchr(runq->cmdfd);
		if(c=='\n' && !incomm){		/* don't continue a comment */
			doprompt = 1;
			c=' ';
		}
		else{
			peekc = c;
			c='\\';
		}
	}
	doprompt = doprompt || c=='\n' || c==EOF;
	if(c==EOF)
		runq->eof++;
	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
	return c;
}

void
pprompt(void)
{
	var *prompt;
	if(runq->iflag){
		pstr(err, promptstr);
		flush(err);
		if(newwdir){
			char dir[4096];
			int fd;
			if((fd=open("/dev/wdir", OWRITE))>=0){
				getwd(dir, sizeof(dir));
				write(fd, dir, strlen(dir));
				close(fd);
			}
			newwdir = 0;
		}
		prompt = vlook(ENV_PROMPT);
		if(prompt->val && prompt->val->next)
			promptstr = prompt->val->next->word;
		else
			promptstr="\t";
	}
	runq->lineno++;
	doprompt = 0;
}

void
skipwhite(void)
{
	int c;
	for(;;){
		c = nextc();
		/* Why did this used to be  if(!inquote && c=='#') ?? */
		if(c=='#'){
			incomm = 1;
			for(;;){
				c = nextc();
				if(c=='\n' || c==EOF) {
					incomm = 0;
					break;
				}
				advance();
			}
		}
		if(c==' ' || c=='\t')
			advance();
		else return;
	}
}

void
skipnl(void)
{
	int c;
	for(;;){
		skipwhite();
		c = nextc();
		if(c!='\n')
			return;
		advance();
	}
}

int
nextis(int c)
{
	if(nextc()==c){
		advance();
		return 1;
	}
	return 0;
}

char*
addtok(char *p, int val)
{
	if(p==0)
		return 0;
	if(p == &tok[NTOK-1]){
		*p = 0;
		yyerror("token buffer too short");
		return 0;
	}
	*p++=val;
	return p;
}

char*
addutf(char *p, int c)
{
	uint8_t b, m;
	int i;

	p = addtok(p, c);	/* 1-byte UTF runes are special */
	if(onebyte(c))
		return p;

	m = 0xc0;
	b = 0x80;
	for(i=1; i < UTFmax; i++){
		if((c&m) == b)
			break;
		p = addtok(p, advance());
		b = m;
		m = (m >> 1)|0x80;
	}
	return p;
}

int lastdol;	/* was the last token read '$' or '$#' or '"'? */
int lastword;	/* was the last token read a word or compound word terminator? */

int
yylex(void)
{
	int c, d = nextc();
	char *w = tok;
	struct tree *t;
	yylval.tree = 0;
	/*
	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
	 * WORD then we alter the meaning of what follows.  If the next character
	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
	 * if the next character is the first character of a simple or compound word,
	 * we insert a `^' before it.
	 */
	if(lastword){
		lastword = 0;
		if(d=='('){
			advance();
			strcpy(tok, "( [SUB]");
			return SUB;
		}
		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
			strcpy(tok, "^");
			return '^';
		}
	}
	inquote = 0;
	skipwhite();
	switch(c = advance()){
	case EOF:
		lastdol = 0;
		strcpy(tok, "EOF");
		return EOF;
	case '$':
		lastdol = 1;
		if(nextis('#')){
			strcpy(tok, "$#");
			return COUNT;
		}
		if(nextis('"')){
			strcpy(tok, "$\"");
			return '"';
		}
		strcpy(tok, "$");
		return '$';
	case '&':
		lastdol = 0;
		if(nextis('&')){
			skipnl();
			strcpy(tok, "&&");
			return ANDAND;
		}
		strcpy(tok, "&");
		return '&';
	case '|':
		lastdol = 0;
		if(nextis(c)){
			skipnl();
			strcpy(tok, "||");
			return OROR;
		}
	case '<':
	case '>':
		lastdol = 0;
		/*
		 * funny redirection tokens:
		 *	redir:	arrow | arrow '[' fd ']'
		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
		 *	fd:	digit | digit '=' | digit '=' digit
		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
		 * some possibilities are nonsensical and get a message.
		 */
		*w++=c;
		t = newtree();
		switch(c){
		case '|':
			t->type = PIPE;
			t->fd0 = 1;
			t->fd1 = 0;
			break;
		case '>':
			t->type = REDIR;
			if(nextis(c)){
				t->rtype = APPEND;
				*w++=c;
			}
			else t->rtype = WRITE;
			t->fd0 = 1;
			break;
		case '<':
			t->type = REDIR;
			if(nextis(c)){
				t->rtype = HERE;
				*w++=c;
			} else if (nextis('>')){
				t->rtype = RDWR;
				*w++=c;
			} else t->rtype = READ;
			t->fd0 = 0;
			break;
		}
		if(nextis('[')){
			*w++='[';
			c = advance();
			*w++=c;
			if(c<'0' || '9'<c){
			RedirErr:
				*w = 0;
				yyerror(t->type==PIPE?"pipe syntax"
						:"redirection syntax");
				return EOF;
			}
			t->fd0 = 0;
			do{
				t->fd0 = t->fd0*10+c-'0';
				*w++=c;
				c = advance();
			}while('0'<=c && c<='9');
			if(c=='='){
				*w++='=';
				if(t->type==REDIR)
					t->type = DUP;
				c = advance();
				if('0'<=c && c<='9'){
					t->rtype = DUPFD;
					t->fd1 = t->fd0;
					t->fd0 = 0;
					do{
						t->fd0 = t->fd0*10+c-'0';
						*w++=c;
						c = advance();
					}while('0'<=c && c<='9');
				}
				else{
					if(t->type==PIPE)
						goto RedirErr;
					t->rtype = CLOSE;
				}
			}
			if(c!=']'
			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
				goto RedirErr;
			*w++=']';
		}
		*w='\0';
		yylval.tree = t;
		if(t->type==PIPE)
			skipnl();
		return t->type;
	case '\'':
		lastdol = 0;
		lastword = 1;
		inquote = 1;
		for(;;){
			c = advance();
			if(c==EOF)
				break;
			if(c=='\''){
				if(nextc()!='\'')
					break;
				advance();
			}
			w = addutf(w, c);
		}
		if(w!=0)
			*w='\0';
		t = token(tok, WORD);
		t->quoted = 1;
		yylval.tree = t;
		return t->type;
	}
	if(!wordchr(c)){
		lastdol = 0;
		tok[0] = c;
		tok[1]='\0';
		return c;
	}
	for(;;){
		if(c=='*' || c=='[' || c=='?' || c==GLOB)
			w = addtok(w, GLOB);
		w = addutf(w, c);
		c = nextc();
		if(lastdol?!idchr(c):!wordchr(c)) break;
		advance();
	}

	lastword = 1;
	lastdol = 0;
	if(w!=0)
		*w='\0';
	t = klook(tok);
	if(t->type!=WORD)
		lastword = 0;
	t->quoted = 0;
	yylval.tree = t;
	return t->type;
}
first usable version of kernel and commands After an year of hard work, this is a first "usable" version of Jehanne. 2016-11-25 16:18:40 +00:00			`/*`
			`* This file is part of the UCB release of Plan 9. It is subject to the license`
			`* terms in the LICENSE file found in the top-level directory of this`
			`* distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No`
			`* part of the UCB release of Plan 9, including this file, may be copied,`
			`* modified, propagated, or distributed except according to the terms contained`
			`* in the LICENSE file.`
			`*/`

			`#include "rc.h"`
			`#include "exec.h"`
			`#include "io.h"`
			`#include "getflags.h"`
			`#include "fns.h"`
			`int getnext(void);`

			`int`
			`wordchr(int c)`
			`{`
			return !strchr("\n \t#;&\|^$=`'{}()<>", c) && c!=EOF;
			`}`

			`int`
			`idchr(int c)`
			`{`
			`/*`
			`* Formerly:`
			`* return 'a'<=c && c<='z' \|\| 'A'<=c && c<='Z' \|\| '0'<=c && c<='9'`
			`* \|\| c=='_' \|\| c=='*';`
			`*/`
			return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{\|}~", c);
			`}`
			`int future = EOF;`
			`int doprompt = 1;`
			`int inquote;`
			`int incomm;`
			`/*`
			`* Look ahead in the input stream`
			`*/`

			`int`
			`nextc(void)`
			`{`
			`if(future==EOF)`
			`future = getnext();`
			`return future;`
			`}`
			`/*`
			`* Consume the lookahead character.`
			`*/`

			`int`
			`advance(void)`
			`{`
			`int c = nextc();`
			`lastc = future;`
			`future = EOF;`
			`return c;`
			`}`
			`/*`
			`* read a character from the input stream`
			`*/`

			`int`
			`getnext(void)`
			`{`
			`int c;`
			`static int peekc = EOF;`
			`if(peekc!=EOF){`
			`c = peekc;`
			`peekc = EOF;`
			`return c;`
			`}`
			`if(runq->eof)`
			`return EOF;`
			`if(doprompt)`
			`pprompt();`
			`c = rchr(runq->cmdfd);`
			`if(!inquote && c=='\\'){`
			`c = rchr(runq->cmdfd);`
			`if(c=='\n' && !incomm){ /* don't continue a comment */`
			`doprompt = 1;`
			`c=' ';`
			`}`
			`else{`
			`peekc = c;`
			`c='\\';`
			`}`
			`}`
			`doprompt = doprompt \|\| c=='\n' \|\| c==EOF;`
			`if(c==EOF)`
			`runq->eof++;`
			`else if(flag['V'] \|\| ndot>=2 && flag['v']) pchr(err, c);`
			`return c;`
			`}`

			`void`
			`pprompt(void)`
			`{`
			`var *prompt;`
			`if(runq->iflag){`
			`pstr(err, promptstr);`
			`flush(err);`
rc: import 9front's improvements 2017-10-18 01:10:06 +02:00			`if(newwdir){`
			`char dir[4096];`
			`int fd;`
			`if((fd=open("/dev/wdir", OWRITE))>=0){`
			`getwd(dir, sizeof(dir));`
			`write(fd, dir, strlen(dir));`
			`close(fd);`
			`}`
			`newwdir = 0;`
			`}`
rc: define constants for rc variable names 2017-10-18 22:38:30 +02:00			`prompt = vlook(ENV_PROMPT);`
first usable version of kernel and commands After an year of hard work, this is a first "usable" version of Jehanne. 2016-11-25 16:18:40 +00:00			`if(prompt->val && prompt->val->next)`
			`promptstr = prompt->val->next->word;`
			`else`
			`promptstr="\t";`
			`}`
			`runq->lineno++;`
			`doprompt = 0;`
			`}`

			`void`
			`skipwhite(void)`
			`{`
			`int c;`
			`for(;;){`
			`c = nextc();`
			`/* Why did this used to be if(!inquote && c=='#') ?? */`
			`if(c=='#'){`
			`incomm = 1;`
			`for(;;){`
			`c = nextc();`
			`if(c=='\n' \|\| c==EOF) {`
			`incomm = 0;`
			`break;`
			`}`
			`advance();`
			`}`
			`}`
			`if(c==' ' \|\| c=='\t')`
			`advance();`
			`else return;`
			`}`
			`}`

			`void`
			`skipnl(void)`
			`{`
			`int c;`
			`for(;;){`
			`skipwhite();`
			`c = nextc();`
			`if(c!='\n')`
			`return;`
			`advance();`
			`}`
			`}`

			`int`
			`nextis(int c)`
			`{`
			`if(nextc()==c){`
			`advance();`
			`return 1;`
			`}`
			`return 0;`
			`}`

			`char*`
			`addtok(char *p, int val)`
			`{`
			`if(p==0)`
			`return 0;`
rc: import 9front's improvements 2017-10-18 01:10:06 +02:00			`if(p == &tok[NTOK-1]){`
first usable version of kernel and commands After an year of hard work, this is a first "usable" version of Jehanne. 2016-11-25 16:18:40 +00:00			`*p = 0;`
			`yyerror("token buffer too short");`
			`return 0;`
			`}`
			`*p++=val;`
			`return p;`
			`}`

			`char*`
			`addutf(char *p, int c)`
			`{`
			`uint8_t b, m;`
			`int i;`

			`p = addtok(p, c); /* 1-byte UTF runes are special */`
rc: import 9front's improvements 2017-10-18 01:10:06 +02:00			`if(onebyte(c))`
first usable version of kernel and commands After an year of hard work, this is a first "usable" version of Jehanne. 2016-11-25 16:18:40 +00:00			`return p;`

			`m = 0xc0;`
			`b = 0x80;`
			`for(i=1; i < UTFmax; i++){`
			`if((c&m) == b)`
			`break;`
			`p = addtok(p, advance());`
			`b = m;`
			`m = (m >> 1)\|0x80;`
			`}`
			`return p;`
			`}`

			`int lastdol; /* was the last token read '$' or '$#' or '"'? */`
			`int lastword; /* was the last token read a word or compound word terminator? */`

			`int`
			`yylex(void)`
			`{`
			`int c, d = nextc();`
			`char *w = tok;`
			`struct tree *t;`
			`yylval.tree = 0;`
			`/*`
			`* Embarassing sneakiness: if the last token read was a quoted or unquoted`
			`* WORD then we alter the meaning of what follows. If the next character`
			* is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
			`* if the next character is the first character of a simple or compound word,`
			* we insert a `^' before it.
			`*/`
			`if(lastword){`
			`lastword = 0;`
			`if(d=='('){`
			`advance();`
			`strcpy(tok, "( [SUB]");`
			`return SUB;`
			`}`
			if(wordchr(d) \|\| d=='\'' \|\| d=='`' \|\| d=='$' \|\| d=='"'){
			`strcpy(tok, "^");`
			`return '^';`
			`}`
			`}`
			`inquote = 0;`
			`skipwhite();`
			`switch(c = advance()){`
			`case EOF:`
			`lastdol = 0;`
			`strcpy(tok, "EOF");`
			`return EOF;`
			`case '$':`
			`lastdol = 1;`
			`if(nextis('#')){`
			`strcpy(tok, "$#");`
			`return COUNT;`
			`}`
			`if(nextis('"')){`
			`strcpy(tok, "$\"");`
			`return '"';`
			`}`
			`strcpy(tok, "$");`
			`return '$';`
			`case '&':`
			`lastdol = 0;`
			`if(nextis('&')){`
			`skipnl();`
			`strcpy(tok, "&&");`
			`return ANDAND;`
			`}`
			`strcpy(tok, "&");`
			`return '&';`
			`case '\|':`
			`lastdol = 0;`
			`if(nextis(c)){`
			`skipnl();`
			`strcpy(tok, "\|\|");`
			`return OROR;`
			`}`
			`case '<':`
			`case '>':`
			`lastdol = 0;`
			`/*`
			`* funny redirection tokens:`
			`* redir: arrow \| arrow '[' fd ']'`
			`* arrow: '<' \| '<<' \| '>' \| '>>' \| '\|'`
			`* fd: digit \| digit '=' \| digit '=' digit`
			`* digit: '0'\|'1'\|'2'\|'3'\|'4'\|'5'\|'6'\|'7'\|'8'\|'9'`
			`* some possibilities are nonsensical and get a message.`
			`*/`
			`*w++=c;`
			`t = newtree();`
			`switch(c){`
			`case '\|':`
			`t->type = PIPE;`
			`t->fd0 = 1;`
			`t->fd1 = 0;`
			`break;`
			`case '>':`
			`t->type = REDIR;`
			`if(nextis(c)){`
			`t->rtype = APPEND;`
			`*w++=c;`
			`}`
			`else t->rtype = WRITE;`
			`t->fd0 = 1;`
			`break;`
			`case '<':`
			`t->type = REDIR;`
			`if(nextis(c)){`
			`t->rtype = HERE;`
			`*w++=c;`
			`} else if (nextis('>')){`
			`t->rtype = RDWR;`
			`*w++=c;`
			`} else t->rtype = READ;`
			`t->fd0 = 0;`
			`break;`
			`}`
			`if(nextis('[')){`
			`*w++='[';`
			`c = advance();`
			`*w++=c;`
			`if(c<'0' \|\| '9'<c){`
			`RedirErr:`
			`*w = 0;`
			`yyerror(t->type==PIPE?"pipe syntax"`
			`:"redirection syntax");`
			`return EOF;`
			`}`
			`t->fd0 = 0;`
			`do{`
			`t->fd0 = t->fd0*10+c-'0';`
			`*w++=c;`
			`c = advance();`
			`}while('0'<=c && c<='9');`
			`if(c=='='){`
			`*w++='=';`
			`if(t->type==REDIR)`
			`t->type = DUP;`
			`c = advance();`
			`if('0'<=c && c<='9'){`
			`t->rtype = DUPFD;`
			`t->fd1 = t->fd0;`
			`t->fd0 = 0;`
			`do{`
			`t->fd0 = t->fd0*10+c-'0';`
			`*w++=c;`
			`c = advance();`
			`}while('0'<=c && c<='9');`
			`}`
			`else{`
			`if(t->type==PIPE)`
			`goto RedirErr;`
			`t->rtype = CLOSE;`
			`}`
			`}`
			`if(c!=']'`
			`\|\| t->type==DUP && (t->rtype==HERE \|\| t->rtype==APPEND))`
			`goto RedirErr;`
			`*w++=']';`
			`}`
			`*w='\0';`
			`yylval.tree = t;`
			`if(t->type==PIPE)`
			`skipnl();`
			`return t->type;`
			`case '\'':`
			`lastdol = 0;`
			`lastword = 1;`
			`inquote = 1;`
			`for(;;){`
			`c = advance();`
			`if(c==EOF)`
			`break;`
			`if(c=='\''){`
			`if(nextc()!='\'')`
			`break;`
			`advance();`
			`}`
			`w = addutf(w, c);`
			`}`
			`if(w!=0)`
			`*w='\0';`
			`t = token(tok, WORD);`
			`t->quoted = 1;`
			`yylval.tree = t;`
			`return t->type;`
			`}`
			`if(!wordchr(c)){`
			`lastdol = 0;`
			`tok[0] = c;`
			`tok[1]='\0';`
			`return c;`
			`}`
			`for(;;){`
			`if(c=='*' \|\| c=='[' \|\| c=='?' \|\| c==GLOB)`
			`w = addtok(w, GLOB);`
			`w = addutf(w, c);`
			`c = nextc();`
			`if(lastdol?!idchr(c):!wordchr(c)) break;`
			`advance();`
			`}`

			`lastword = 1;`
			`lastdol = 0;`
			`if(w!=0)`
			`*w='\0';`
			`t = klook(tok);`
			`if(t->type!=WORD)`
			`lastword = 0;`
			`t->quoted = 0;`
			`yylval.tree = t;`
			`return t->type;`
			`}`