Compare commits

...

2 Commits

Author SHA1 Message Date
Randy Palamar 270ca025ce expr: don't evaluate matched substr as a number
POSIX specifies that if the pattern contains a subexpression then
the first matched subexpression should be returned if it exists.

This fixes things like the following:

./expr 00003 : '\(.*\)'
Before: 3
After: 00003
2024-01-16 09:11:46 +01:00
Randy Palamar e50d533d59 expr: treat expressions as strs until evaluation
Comparison operations (>, <, =, etc.) and matching operations must
operate originally provided string not one that has gone back and
forth through string formatting. This caused operations such as
the following to give incorrect results:

./expr 00003 : '.*'
Before: 1
After: 5

This commit fixes that issue.
2024-01-16 09:11:40 +01:00
1 changed files with 26 additions and 58 deletions

84
expr.c
View File

@ -18,11 +18,13 @@ struct val {
};
static void
enan(struct val *v)
tonum(struct val *v)
{
if (!v->str)
return;
enprintf(2, "syntax error: expected integer, got %s\n", v->str);
const char *errstr;
long long d = strtonum(v->str, LLONG_MIN, LLONG_MAX, &errstr);
if (errstr)
enprintf(2, "error: expected integer, got %s\n", v->str);
v->num = d;
}
static void
@ -37,16 +39,14 @@ static int
valcmp(struct val *a, struct val *b)
{
int ret;
char buf[BUFSIZ];
const char *err1, *err2;
long long d1, d2;
if (!a->str && !b->str) {
ret = (a->num > b->num) - (a->num < b->num);
} else if (a->str && !b->str) {
snprintf(buf, sizeof(buf), "%lld", b->num);
ret = strcmp(a->str, buf);
} else if (!a->str && b->str) {
snprintf(buf, sizeof(buf), "%lld", a->num);
ret = strcmp(buf, b->str);
d1 = strtonum(a->str, LLONG_MIN, LLONG_MAX, &err1);
d2 = strtonum(b->str, LLONG_MIN, LLONG_MAX, &err2);
if (!err1 && !err2) {
ret = (d1 > d2) - (d1 < d2);
} else {
ret = strcmp(a->str, b->str);
}
@ -59,25 +59,9 @@ match(struct val *vstr, struct val *vregx, struct val *ret)
{
regex_t re;
regmatch_t matches[2];
long long d;
size_t anchlen;
char strbuf[BUFSIZ], regxbuf[BUFSIZ],
*s, *p, *anchreg, *str, *regx;
const char *errstr;
if (!vstr->str) {
snprintf(strbuf, sizeof(strbuf), "%lld", vstr->num);
str = strbuf;
} else {
str = vstr->str;
}
if (!vregx->str) {
snprintf(regxbuf, sizeof(regxbuf), "%lld", vregx->num);
regx = regxbuf;
} else {
regx = vregx->str;
}
char *s, *p, *anchreg;
char *str = vstr->str, *regx = vregx->str;
/* anchored regex */
anchlen = strlen(regx) + 1 + 1;
@ -97,15 +81,8 @@ match(struct val *vstr, struct val *vregx, struct val *ret)
s = str + matches[1].rm_so;
p = str + matches[1].rm_eo;
*p = '\0';
d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
if (!errstr) {
ret->num = d;
return;
} else {
ret->str = enstrdup(3, s);
return;
}
ret->str = enstrdup(3, s);
return;
} else {
regfree(&re);
str += matches[0].rm_so;
@ -152,11 +129,11 @@ doop(int *ophead, int *opp, struct val *valhead, struct val *valp)
case LE : ret.num = (valcmp(a, b) <= 0); break;
case NE : ret.num = (valcmp(a, b) != 0); break;
case '+': enan(a); enan(b); ret.num = a->num + b->num; break;
case '-': enan(a); enan(b); ret.num = a->num - b->num; break;
case '*': enan(a); enan(b); ret.num = a->num * b->num; break;
case '/': enan(a); enan(b); ezero(b); ret.num = a->num / b->num; break;
case '%': enan(a); enan(b); ezero(b); ret.num = a->num % b->num; break;
case '+': tonum(a); tonum(b); ret.num = a->num + b->num; break;
case '-': tonum(a); tonum(b); ret.num = a->num - b->num; break;
case '*': tonum(a); tonum(b); ret.num = a->num * b->num; break;
case '/': tonum(a); tonum(b); ezero(b); ret.num = a->num / b->num; break;
case '%': tonum(a); tonum(b); ezero(b); ret.num = a->num % b->num; break;
case ':': match(a, b, &ret); break;
}
@ -167,25 +144,15 @@ doop(int *ophead, int *opp, struct val *valhead, struct val *valp)
static int
lex(char *s, struct val *v)
{
long long d;
int type = VAL;
char *ops = "|&=><+-*/%():";
const char *errstr;
d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
if (!errstr) {
/* integer */
v->num = d;
} else if (s[0] && strchr(ops, s[0]) && !s[1]) {
if (s[0] && strchr(ops, s[0]) && !s[1]) {
/* one-char operand */
type = s[0];
} else if (s[0] && strchr("><!", s[0]) && s[1] == '=' && !s[2]) {
/* two-char operand */
type = (s[0] == '>') ? GE : (s[0] == '<') ? LE : NE;
} else {
/* string */
v->str = s;
}
return type;
@ -211,8 +178,9 @@ parse(char *expr[], int numexpr)
for (; *expr; expr++) {
switch ((type = lex(*expr, &v))) {
case VAL:
valp->str = v.str;
valp->num = v.num;
/* treatment of *expr is not known until
* doop(); treat as a string for now */
valp->str = *expr;
valp++;
break;
case '(':