From 6240d26beb52747638ed274ff6a60a8b4b8eb5b7 Mon Sep 17 00:00:00 2001 From: Evan Gates Date: Thu, 13 Nov 2014 15:07:15 -0800 Subject: [PATCH] new expr using shunting-yard instead of recursive descent (this time with tabs) --- expr.c | 658 +++++++++++++++++---------------------------------------- 1 file changed, 191 insertions(+), 467 deletions(-) diff --git a/expr.c b/expr.c index 9457d39..28ca387 100644 --- a/expr.c +++ b/expr.c @@ -1,517 +1,241 @@ -/* $OpenBSD: src/bin/expr/expr.c,v 1.19 2013/11/21 15:54:45 deraadt Exp $ */ -/* $NetBSD: expr.c,v 1.3.6.1 1996/06/04 20:41:47 cgd Exp $ */ - -/* - * Written by J.T. Conklin . - * Public domain. - */ - +#include +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include +#include "util.h" -static struct val *make_int(int); -static struct val *make_str(char *); -static void free_value(struct val *); -static int is_integer(struct val *, int *); -static int to_integer(struct val *); -static void to_string(struct val *); -static int is_zero_or_null(struct val *); -static void nexttoken(int); -static void error(void); -static struct val *eval6(void); -static struct val *eval5(void); -static struct val *eval4(void); -static struct val *eval3(void); -static struct val *eval2(void); -static struct val *eval1(void); -static struct val *eval0(void); - -enum token { - OR, AND, EQ, LT, GT, ADD, SUB, MUL, DIV, MOD, MATCH, RP, LP, - NE, LE, GE, OPERAND, EOI +enum { + VAL = CHAR_MAX + 1, GE, LE, NE }; -struct val { - enum { - integer, - string - } type; +typedef struct { + char *s; + intmax_t n; +} Val; - union { - char *s; - int i; - } u; -}; -static enum token token; -static struct val *tokval; -static char **av; +static void doop(int*, int**, Val*, Val**); +static Val match(Val, Val); +static void num(Val); +static int valcmp(Val, Val); +static char *valstr(Val, char*); +static int yylex(void); +static int yyparse(int); -static struct val * -make_int(int i) +static char **args; +static size_t intlen; +static Val yylval; + +// otop points to one past last op +// vtop points to one past last val +// guaranteed otop != ops +// pop two vals, pop op, apply op, push val +static void +doop(int *ops, int **otop, Val *vals, Val **vtop) { - struct val *vp; + Val ret, a, b; + int op; - vp = (struct val *) malloc(sizeof(*vp)); - if (vp == NULL) { - err(3, NULL); + if((*otop)[-1] == '(') + enprintf(2, "syntax error: extra (\n"); + if(*vtop - vals < 2) + enprintf(2, "syntax error: missing expression or extra operator\n"); + + a = (*vtop)[-2]; + b = (*vtop)[-1]; + op = (*otop)[-1]; + + switch (op) { + case '|': + if ( a.s && *a.s) ret = (Val){ a.s , 0 }; + else if(!a.s && a.n) ret = (Val){ NULL, a.n }; + else if( b.s && *b.s) ret = (Val){ b.s , 0 }; + else ret = (Val){ NULL, b.n }; + break; + + case '&': + if(((a.s && *a.s) || a.n) && + ((b.s && *b.s) || b.n)) ret = a; + else ret = (Val){ NULL, 0 }; + break; + + case '=': ret = (Val){ NULL, valcmp(a, b) == 0 }; break; + case '>': ret = (Val){ NULL, valcmp(a, b) > 0 }; break; + case GE : ret = (Val){ NULL, valcmp(a, b) >= 0 }; break; + case '<': ret = (Val){ NULL, valcmp(a, b) < 0 }; break; + case LE : ret = (Val){ NULL, valcmp(a, b) <= 0 }; break; + case NE : ret = (Val){ NULL, valcmp(a, b) != 0 }; break; + + case '+': num(a); num(b); ret = (Val){ NULL, a.n + b.n }; break; + case '-': num(a); num(b); ret = (Val){ NULL, a.n - b.n }; break; + case '*': num(a); num(b); ret = (Val){ NULL, a.n * b.n }; break; + case '/': num(a); num(b); ret = (Val){ NULL, a.n / b.n }; break; + case '%': num(a); num(b); ret = (Val){ NULL, a.n % b.n }; break; + + case ':': ret = match(a, b); break; } - vp->type = integer; - vp->u.i = i; - return vp; + + (*vtop)[-2] = ret; + (*otop)--; + (*vtop)--; } -static struct val * -make_str(char *s) +static Val +match(Val vstr, Val vregx) { - struct val *vp; + char b1[intlen], *str = valstr(vstr , b1); + char b2[intlen], *regx = valstr(vregx, b2); - vp = (struct val *) malloc(sizeof(*vp)); - if (vp == NULL || ((vp->u.s = strdup(s)) == NULL)) { - err(3, NULL); + regex_t re; + regmatch_t matches[2]; + char anchreg[strlen(regx) + 2]; + + sprintf(anchreg, "^%s", regx); + + if(regcomp(&re, anchreg, 0)) + enprintf(3, "regcomp failed\n"); + + if(regexec(&re, str, 2, matches, 0)) + return (Val){ (re.re_nsub ? "" : NULL), 0 }; + + if(re.re_nsub) { + intmax_t d; + char *ret, *p; + regoff_t len = matches[1].rm_eo - matches[1].rm_so + 1; + + if(!(ret = malloc(len))) // FIXME: free + enprintf(3, "malloc failed\n"); + + d = strtoimax(ret, &p, 10); + strlcpy(ret, str + matches[1].rm_so, len); + + if(*ret && !*p) + return (Val){ NULL, d }; + return (Val){ ret, 0 }; } - vp->type = string; - return vp; + return (Val){ NULL, matches[0].rm_eo - matches[0].rm_so }; } static void -free_value(struct val *vp) +num(Val v) { - if (vp->type == string) - free(vp->u.s); - free(vp); -} - -/* determine if vp is an integer; if so, return it's value in *r */ -static int -is_integer(struct val *vp, int *r) -{ - char *s; - int neg; - int i; - - if (vp->type == integer) { - *r = vp->u.i; - return 1; - } - - /* - * POSIX.2 defines an "integer" as an optional unary minus - * followed by digits. - */ - s = vp->u.s; - i = 0; - - neg = (*s == '-'); - if (neg) - s++; - - while (*s) { - if (!isdigit((unsigned char)*s)) - return 0; - - i *= 10; - i += *s - '0'; - - s++; - } - - if (neg) - i *= -1; - - *r = i; - return 1; -} - -/* coerce to vp to an integer */ -static int -to_integer(struct val *vp) -{ - int r; - - if (vp->type == integer) - return 1; - - if (is_integer(vp, &r)) { - free(vp->u.s); - vp->u.i = r; - vp->type = integer; - return 1; - } - - return 0; -} - -/* coerce to vp to an string */ -static void -to_string(struct val *vp) -{ - char *tmp; - - if (vp->type == string) - return; - - if (asprintf(&tmp, "%d", vp->u.i) == -1) - err(3, NULL); - - vp->type = string; - vp->u.s = tmp; + if(v.s) + enprintf(2, "syntax error: expected integer got `%s'\n", v.s); } static int -is_zero_or_null(struct val *vp) +valcmp(Val a, Val b) { - if (vp->type == integer) { - return (vp->u.i == 0); - } else { - return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0)); - } - /* NOTREACHED */ + char b1[intlen], *p = valstr(a, b1); + char b2[intlen], *q = valstr(b, b2); + + if(!a.s && !b.s) + return (a.n > b.n) - (a.n < b.n); + return strcmp(p, q); } -static void -nexttoken(int pat) +static char * +valstr(Val val, char *buf) { - char *p; - - if ((p = *av) == NULL) { - token = EOI; - return; + char *p = val.s; + if(!p) { + sprintf(buf, "%"PRIdMAX, val.n); + p = buf; } - av++; - - if (pat == 0 && p[0] != '\0') { - if (p[1] == '\0') { - const char *x = "|&=<>+-*/%:()"; - char *i; /* index */ - - if ((i = strchr(x, *p)) != NULL) { - token = i - x; - return; - } - } else if (p[1] == '=' && p[2] == '\0') { - switch (*p) { - case '<': - token = LE; - return; - case '>': - token = GE; - return; - case '!': - token = NE; - return; - } - } - } - tokval = make_str(p); - token = OPERAND; - return; + return p; } -static void -error(void) +static int +yylex(void) { - errx(2, "syntax error"); - /* NOTREACHED */ -} + intmax_t d; + char *q, *p, *ops = "|&=><+-*/%():"; -static struct val * -eval6(void) -{ - struct val *v; + if(!(p = *args++)) + return 0; - if (token == OPERAND) { - nexttoken(0); - return tokval; - - } else if (token == RP) { - nexttoken(0); - v = eval0(); - - if (token != LP) { - error(); - /* NOTREACHED */ - } - nexttoken(0); - return v; - } else { - error(); - } - /* NOTREACHED */ - return NULL; -} - -/* Parse and evaluate match (regex) expressions */ -static struct val * -eval5(void) -{ - regex_t rp; - regmatch_t rm[2]; - char errbuf[256]; - int eval; - struct val *l, *r; - struct val *v; - - l = eval6(); - while (token == MATCH) { - nexttoken(1); - r = eval6(); - - /* coerce to both arguments to strings */ - to_string(l); - to_string(r); - - /* compile regular expression */ - if ((eval = regcomp(&rp, r->u.s, 0)) != 0) { - regerror(eval, &rp, errbuf, sizeof(errbuf)); - errx(2, "%s", errbuf); - } - - /* compare string against pattern -- remember that patterns - are anchored to the beginning of the line */ - if (regexec(&rp, l->u.s, 2, rm, 0) == 0 && rm[0].rm_so == 0) { - if (rm[1].rm_so >= 0) { - *(l->u.s + rm[1].rm_eo) = '\0'; - v = make_str(l->u.s + rm[1].rm_so); - - } else { - v = make_int((int)(rm[0].rm_eo - rm[0].rm_so)); - } - } else { - if (rp.re_nsub == 0) { - v = make_int(0); - } else { - v = make_str(""); - } - } - - /* free arguments and pattern buffer */ - free_value(l); - free_value(r); - regfree(&rp); - - l = v; + d = strtoimax(p, &q, 10); + if(*p && !*q) { + yylval = (Val){ NULL, d }; + return VAL; } - return l; + if(*p && !p[1] && strchr(ops, *p)) + return *p; + + if(strcmp(p, ">=") == 0) return GE; + if(strcmp(p, "<=") == 0) return LE; + if(strcmp(p, "!=") == 0) return NE; + + yylval = (Val){ p, 0 }; + return VAL; } -/* Parse and evaluate multiplication and division expressions */ -static struct val * -eval4(void) +static int +yyparse(int argc) { - struct val *l, *r; - enum token op; + Val vals[argc], *vtop = vals; + int ops [argc], *otop = ops; + int type, last = 0; + char prec[] = { + ['|'] = 1, + ['&'] = 2, + ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3, + ['+'] = 4, ['-'] = 4, + ['*'] = 5, ['/'] = 5, ['%'] = 5, + [':'] = 6, + }; - l = eval5(); - while ((op = token) == MUL || op == DIV || op == MOD) { - nexttoken(0); - r = eval5(); - - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); + while((type = yylex())) { + switch (type) { + case VAL: *vtop++ = yylval; break; + case '(': *otop++ = '(' ; break; + case ')': + if(last == '(') + enprintf(2, "syntax error: empty ( )\n"); + while(otop > ops && otop[-1] != '(') + doop(ops, &otop, vals, &vtop); + if(otop == ops) + enprintf(2, "syntax error: extra )\n"); + otop--; + break; + default : + if(prec[last]) + enprintf(2, "syntax error: extra operator\n"); + while(otop > ops && prec[otop[-1]] >= prec[type]) + doop(ops, &otop, vals, &vtop); + *otop++ = type; + break; } - - if (op == MUL) { - l->u.i *= r->u.i; - } else { - if (r->u.i == 0) { - errx(2, "division by zero"); - } - if (op == DIV) { - if (l->u.i != INT_MIN || r->u.i != -1) - l->u.i /= r->u.i; - } else { - if (l->u.i != INT_MIN || r->u.i != -1) - l->u.i %= r->u.i; - else - l->u.i = 0; - } - } - - free_value(r); + last = type; } + while(otop > ops) + doop(ops, &otop, vals, &vtop); - return l; + if(vtop == vals) + enprintf(2, "syntax error: missing expression\n"); + if(vtop - vals > 1) + enprintf(2, "syntax error: extra expression\n"); + + vtop--; + if(vtop->s) printf("%s\n" , vtop->s); + else printf("%"PRIdMAX"\n", vtop->n); + + return (vtop->s && *vtop->s) || vtop->n; } -/* Parse and evaluate addition and subtraction expressions */ -static struct val * -eval3(void) -{ - struct val *l, *r; - enum token op; - - l = eval4(); - while ((op = token) == ADD || op == SUB) { - nexttoken(0); - r = eval4(); - - if (!to_integer(l) || !to_integer(r)) { - errx(2, "non-numeric argument"); - } - - if (op == ADD) { - l->u.i += r->u.i; - } else { - l->u.i -= r->u.i; - } - - free_value(r); - } - - return l; -} - -/* Parse and evaluate comparison expressions */ -static struct val * -eval2(void) -{ - struct val *l, *r; - enum token op; - int v = 0, li, ri; - - l = eval3(); - while ((op = token) == EQ || op == NE || op == LT || op == GT || - op == LE || op == GE) { - nexttoken(0); - r = eval3(); - - if (is_integer(l, &li) && is_integer(r, &ri)) { - switch (op) { - case GT: - v = (li > ri); - break; - case GE: - v = (li >= ri); - break; - case LT: - v = (li < ri); - break; - case LE: - v = (li <= ri); - break; - case EQ: - v = (li == ri); - break; - case NE: - v = (li != ri); - break; - default: - break; - } - } else { - to_string(l); - to_string(r); - - switch (op) { - case GT: - v = (strcoll(l->u.s, r->u.s) > 0); - break; - case GE: - v = (strcoll(l->u.s, r->u.s) >= 0); - break; - case LT: - v = (strcoll(l->u.s, r->u.s) < 0); - break; - case LE: - v = (strcoll(l->u.s, r->u.s) <= 0); - break; - case EQ: - v = (strcoll(l->u.s, r->u.s) == 0); - break; - case NE: - v = (strcoll(l->u.s, r->u.s) != 0); - break; - default: - break; - } - } - - free_value(l); - free_value(r); - l = make_int(v); - } - - return l; -} - -/* Parse and evaluate & expressions */ -static struct val * -eval1(void) -{ - struct val *l, *r; - - l = eval2(); - while (token == AND) { - nexttoken(0); - r = eval2(); - - if (is_zero_or_null(l) || is_zero_or_null(r)) { - free_value(l); - free_value(r); - l = make_int(0); - } else { - free_value(r); - } - } - - return l; -} - -/* Parse and evaluate | expressions */ -static struct val * -eval0(void) -{ - struct val *l, *r; - - l = eval1(); - while (token == OR) { - nexttoken(0); - r = eval1(); - - if (is_zero_or_null(l)) { - free_value(l); - l = r; - } else { - free_value(r); - } - } - - return l; -} - - int -main(int argc, char *argv[]) +main(int argc, char **argv) { - struct val *vp; + if(!(intlen = snprintf(NULL, 0, "%"PRIdMAX, INTMAX_MIN) + 1)) + enprintf(3, "failed to get max digits\n"); - (void) setlocale(LC_ALL, ""); + args = argv + 1; + if(*args && !strcmp("--", *args)) + ++args; - if (argc > 1 && !strcmp(argv[1], "--")) - argv++; - - av = argv + 1; - - nexttoken(0); - vp = eval0(); - - if (token != EOI) { - error(); - /* NOTREACHED */ - } - - if (vp->type == integer) - printf("%d\n", vp->u.i); - else - printf("%s\n", vp->u.s); - - exit(is_zero_or_null(vp)); + return !yyparse(argc); }