Audit expr(1)

No bugs found, but I changed intmax_t to long long to make it more
predictable and removed some of the kitchen-sinking.
Don't return structs themselves, as this is not very elegant.
Do it like functions like stat(), which take a pointer to a
struct to fill.
This commit is contained in:
FRIGN 2015-03-22 14:32:56 +01:00
parent 1f0f1dd320
commit d49f6f2044
2 changed files with 152 additions and 159 deletions

2
README
View File

@ -30,7 +30,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| echo yes none =*| echo yes none
=*| env yes none =*| env yes none
#*| expand yes none #*| expand yes none
#* expr yes none #*| expr yes none
=*| false yes none =*| false yes none
= find yes none = find yes none
#*| fold yes none #*| fold yes none

303
expr.c
View File

@ -1,205 +1,203 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <inttypes.h> #include <limits.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include "utf.h" #include "utf.h"
#include "util.h" #include "util.h"
/* token types for lexing/parsing /* tokens, one-character operators represent themselves */
* single character operators represent themselves */
enum { enum {
VAL = CHAR_MAX + 1, GE, LE, NE VAL = CHAR_MAX + 1, GE, LE, NE
}; };
struct val { struct val {
char *s; /* iff s is NULL, val is an integer */ char *str;
intmax_t n; long long num;
}; };
static size_t intlen; static size_t maxdigits;
static void static void
enan(struct val v) enan(struct val *v)
{ {
if (v.s) if (!v->str)
enprintf(2, "syntax error: expected integer got `%s'\n", v.s); return;
enprintf(2, "syntax error: expected integer, got %s\n", v->str);
} }
static void static void
ezero(intmax_t n) ezero(struct val *v)
{ {
if (n == 0) if (v->num != 0)
return;
enprintf(2, "division by zero\n"); enprintf(2, "division by zero\n");
} }
static char *
valstr(struct val val, char *buf, size_t bufsiz)
{
if (val.s)
return val.s;
snprintf(buf, bufsiz, "%"PRIdMAX, val.n);
return buf;
}
static int static int
valcmp(struct val a, struct val b) valcmp(struct val *a, struct val *b)
{ {
char buf1[intlen], buf2[intlen]; int ret;
char *astr = valstr(a, buf1, sizeof(buf1)); char buf[maxdigits];
char *bstr = valstr(b, buf2, sizeof(buf2));
if (!a.s && !b.s) if (!a->str && !b->str) {
return (a.n > b.n) - (a.n < b.n); ret = (a->num > b->num) - (a->num < b->num);
return strcmp(astr, bstr); } else if (a->str && !b->str) {
snprintf(buf, sizeof(buf), "%lld", b->num);
ret = strcmp(a->str, buf);
} else if (!a->str && b->str) {
snprintf(buf, sizeof(buf), "%lld", a->num);
ret = strcmp(buf, b->str);
} else {
ret = strcmp(a->str, b->str);
}
return ret;
} }
/* match vstr against BRE vregx (treat both values as strings) static void
* if there is at least one subexpression \(...\) match(struct val *vstr, struct val *vregx, struct val *ret)
* then return the text matched by it \1 (empty string for no match)
* else return number of characters matched (0 for no match)
*/
static struct val
match(struct val vstr, struct val vregx)
{ {
regex_t re; regex_t re;
regmatch_t matches[2]; regmatch_t matches[2];
intmax_t d; long long d;
char *s, *p, buf1[intlen], buf2[intlen]; char strbuf[maxdigits + 1], regxbuf[maxdigits + 1],
char *str = valstr(vstr, buf1, sizeof(buf1)); *s, *p, *anchreg, *str, *regx;
char *regx = valstr(vregx, buf2, sizeof(buf2));; const char *errstr;
char anchreg[strlen(regx) + 2];
/* expr(1p) "all patterns are anchored to the beginning of the string" */ if (!vstr->str) {
snprintf(anchreg, sizeof(anchreg), "^%s", regx); snprintf(strbuf, sizeof(strbuf), "%lld", vstr->num);
str = strbuf;
} else {
str = vstr->str;
}
if (!vregx->str) {
snprintf(regxbuf, sizeof(regxbuf), "%lld", vregx->num);
regx = regxbuf;
} else {
regx = vregx->str;
}
/* anchored regex */
anchreg = emalloc(strlen(regx) + 2);
estrlcpy(anchreg, "^", sizeof(anchreg));
estrlcat(anchreg, regx, sizeof(anchreg));
enregcomp(3, &re, anchreg, 0); enregcomp(3, &re, anchreg, 0);
free(anchreg);
if (regexec(&re, str, 2, matches, 0)) { if (regexec(&re, str, 2, matches, 0)) {
regfree(&re); regfree(&re);
return (struct val){ (re.re_nsub ? "" : NULL), 0 }; ret->str = re.re_nsub ? "" : NULL;
} return;
} else if (re.re_nsub) {
if (re.re_nsub) {
regfree(&re); regfree(&re);
s = str + matches[1].rm_so; s = str + matches[1].rm_so;
p = str + matches[1].rm_eo; p = str + matches[1].rm_eo;
*p = '\0'; *p = '\0';
d = strtoimax(s, &p, 10);
if (*s && !*p) /* string matched by subexpression is an integer */
return (struct val){ NULL, d };
/* FIXME? string is never free()d, worth fixing? d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
* need to allocate as it could be in buf1 instead of vstr.s */ if (!errstr) {
return (struct val){ enstrdup(3, s), 0 }; ret->num = d;
return;
} else {
ret->str = enstrdup(3, s);
return;
} }
} else {
regfree(&re); regfree(&re);
str += matches[0].rm_so; str += matches[0].rm_so;
return (struct val){ NULL, utfnlen(str, matches[0].rm_eo - matches[0].rm_so) }; ret->num = utfnlen(str, matches[0].rm_eo - matches[0].rm_so);
return;
}
} }
/* ops points to a stack of operators, opp points to one past the last op
* vals points to a stack of values , valp points to one past the last val
* guaranteed that opp != ops
* ops is unused here, but still included for parity with vals
* pop operator, pop two values, apply operator, push result
*/
static void static void
doop(int *ops, int **opp, struct val *vals, struct val **valp) doop(int *ophead, int *opp, struct val *valhead, struct val *valp)
{ {
struct val ret, a, b; struct val ret = { .str = NULL, .num = 0 }, *a, *b;
int op; int op;
/* For an operation, we need a valid operator /* an operation "a op b" needs an operator and two values */
* and two values on the stack */ if (opp[-1] == '(')
if ((*opp)[-1] == '(')
enprintf(2, "syntax error: extra (\n"); enprintf(2, "syntax error: extra (\n");
if (*valp - vals < 2) if (valp - valhead < 2)
enprintf(2, "syntax error: missing expression or extra operator\n"); enprintf(2, "syntax error: missing expression or extra operator\n");
a = (*valp)[-2]; a = valp - 2;
b = (*valp)[-1]; b = valp - 1;
op = (*opp)[-1]; op = opp[-1];
switch (op) { switch (op) {
case '|': case '|':
if ( a.s && *a.s) ret = (struct val){ a.s , 0 }; if ( a->str && *a->str) ret.str = a->str;
else if (!a.s && a.n) ret = (struct val){ NULL, a.n }; else if (!a->str && a->num) ret.num = a->num;
else if ( b.s && *b.s) ret = (struct val){ b.s , 0 }; else if ( b->str && *b->str) ret.str = b->str;
else ret = (struct val){ NULL, b.n }; else ret.num = b->num;
break; break;
case '&': case '&':
if (((a.s && *a.s) || a.n) && ((b.s && *b.s) || b.n)) if (((a->str && *a->str) || a->num) &&
ret = a; ((b->str && *b->str) || b->num)) {
else ret.str = a->str;
ret = (struct val){ NULL, 0 }; ret.num = a->num;
}
break; break;
case '=': ret = (struct val){ NULL, valcmp(a, b) == 0 }; break; case '=': ret.num = (valcmp(a, b) == 0); break;
case '>': ret = (struct val){ NULL, valcmp(a, b) > 0 }; break; case '>': ret.num = (valcmp(a, b) > 0); break;
case GE : ret = (struct val){ NULL, valcmp(a, b) >= 0 }; break; case GE : ret.num = (valcmp(a, b) >= 0); break;
case '<': ret = (struct val){ NULL, valcmp(a, b) < 0 }; break; case '<': ret.num = (valcmp(a, b) < 0); break;
case LE : ret = (struct val){ NULL, valcmp(a, b) <= 0 }; break; case LE : ret.num = (valcmp(a, b) <= 0); break;
case NE : ret = (struct val){ NULL, valcmp(a, b) != 0 }; break; case NE : ret.num = (valcmp(a, b) != 0); break;
case '+': enan(a); enan(b); ret = (struct val){ NULL, a.n + b.n }; break; case '+': enan(a); enan(b); ret.num = a->num + b->num; break;
case '-': enan(a); enan(b); ret = (struct val){ NULL, a.n - b.n }; break; case '-': enan(a); enan(b); ret.num = a->num - b->num; break;
case '*': enan(a); enan(b); ret = (struct val){ NULL, a.n * b.n }; break; case '*': enan(a); enan(b); ret.num = a->num * b->num; break;
case '/': enan(a); enan(b); ezero(b.n); ret = (struct val){ NULL, a.n / b.n }; break; case '/': enan(a); enan(b); ezero(b); ret.num = a->num / b->num; break;
case '%': enan(a); enan(b); ezero(b.n); ret = (struct val){ NULL, a.n % b.n }; break; case '%': enan(a); enan(b); ezero(b); ret.num = a->num % b->num; break;
case ':': ret = match(a, b); break; case ':': match(a, b, &ret); break;
} }
(*valp)[-2] = ret; valp[-2] = ret;
(*opp)--;
(*valp)--;
} }
/* retrn the type of the next token, s
* if it is a value, place the value in v for use by parser
*/
static int static int
lex(char *s, struct val *v) lex(char *s, struct val *v)
{ {
intmax_t d; long long d;
char *p, *ops = "|&=><+-*/%():"; int type = VAL;
char *ops = "|&=><+-*/%():";
const char *errstr;
/* clean integer */ d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
d = strtoimax(s, &p, 10);
if (*s && !*p) { if (!errstr) {
*v = (struct val){ NULL, d }; /* integer */
return VAL; v->num = d;
} else if (s[0] && strchr(ops, s[0]) && !s[1]) {
/* one-char operand */
type = s[0];
} else if (s[0] && strchr("><!", s[0]) && s[1] == '=' && !s[2]) {
/* two-char operand */
type = (s[0] == '>') ? GE : (s[0] == '<') ? LE : NE;
} else {
/* string */
v->str = s;
} }
/* one-char operand */ return type;
if (*s && !s[1] && strchr(ops, *s))
return *s;
/* two-char operand */
if (!strcmp(s, ">=")) return GE;
if (!strcmp(s, "<=")) return LE;
if (!strcmp(s, "!=")) return NE;
/* nothing matched, treat as string */
*v = (struct val){ s, 0 };
return VAL;
} }
/* using shunting-yard to convert from infix to rpn
* https://en.wikipedia.org/wiki/Shunting-yard_algorithm
* instead of creating rpn output to evaluate later, evaluate it immediately as
* it is created
* vals is the value stack, valp points to one past last value on the stack
* ops is the operator stack, opp points to one past last op on the stack
*/
static int static int
parse(char *expr[], int exprlen) parse(char *expr[], int numexpr)
{ {
struct val vals[exprlen], *valp = vals, v; struct val valhead[numexpr], *valp = valhead, v = { .str = NULL, .num = 0 };
int ops[exprlen], *opp = ops; int ophead[numexpr], *opp = ophead, type, lasttype = 0;
int i, type, lasttype = 0; char prec[] = {
char prec[] = { /* precedence of operators */ [ 0 ] = 0, [VAL] = 0, ['('] = 0, [')'] = 0,
['|'] = 1, ['|'] = 1,
['&'] = 2, ['&'] = 2,
['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3, ['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3,
@ -208,70 +206,65 @@ parse(char *expr[], int exprlen)
[':'] = 6, [':'] = 6,
}; };
for (i = 0; i < exprlen; i++) { for (; *expr; expr++) {
switch ((type = lex(expr[i], &v))) { switch ((type = lex(*expr, &v))) {
case VAL: case VAL:
*valp++ = v; (*valp).str = v.str;
(*valp).num = v.num;
valp++;
break; break;
case '(': case '(':
*opp++ = '('; *opp++ = type;
break; break;
case ')': case ')':
if (lasttype == '(') if (lasttype == '(')
enprintf(2, "syntax error: empty ( )\n"); enprintf(2, "syntax error: empty ( )\n");
while (opp > ops && opp[-1] != '(') while (opp > ophead && opp[-1] != '(')
doop(ops, &opp, vals, &valp); doop(ophead, opp--, valhead, valp--);
if (opp == ops) if (opp == ophead)
enprintf(2, "syntax error: extra )\n"); enprintf(2, "syntax error: extra )\n");
opp--; opp--;
break; break;
default: /* operator */ default: /* operator */
if (prec[lasttype]) if (prec[lasttype])
enprintf(2, "syntax error: extra operator\n"); enprintf(2, "syntax error: extra operator\n");
while (opp > ops && prec[opp[-1]] >= prec[type]) while (opp > ophead && prec[opp[-1]] >= prec[type])
doop(ops, &opp, vals, &valp); doop(ophead, opp--, valhead, valp--);
*opp++ = type; *opp++ = type;
break; break;
} }
lasttype = type; lasttype = type;
v.str = NULL;
v.num = 0;
} }
while (opp > ops) while (opp > ophead)
doop(ops, &opp, vals, &valp); doop(ophead, opp--, valhead, valp--);
if (valp == valhead)
if (valp == vals)
enprintf(2, "syntax error: missing expression\n"); enprintf(2, "syntax error: missing expression\n");
if (--valp != vals) if (--valp > valhead)
enprintf(2, "syntax error: extra expression\n"); enprintf(2, "syntax error: extra expression\n");
if (valp->s) if (valp->str)
printf("%s\n", valp->s); puts(valp->str);
else else
printf("%"PRIdMAX"\n", valp->n); printf("%lld\n", valp->num);
return (valp->s && *valp->s) || valp->n; return (valp->str && *valp->str) || valp->num;
} }
/* the only way to get usage() is if the user didn't supply -- and expression
* begins with a -
* expr(1p): "... the conforming application must employ the -- construct ...
* if there is any chance the first operand might be a negative integer (or any
* string with a leading minus"
*/
static void static void
usage(void) usage(void)
{ {
enprintf(3, "usage: %s [--] expression\n" enprintf(3, "usage: %s expression\n", argv0);
"note : the -- is mandatory if expression begins with a -\n", argv0);
} }
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
intmax_t n = INTMAX_MIN; long long n = LLONG_MIN;
/* Get the maximum number of digits (+ sign) */ /* maximum number of digits + sign */
for (intlen = (n < 0); n; n /= 10, ++intlen) for (maxdigits = (n < 0); n; n /= 10, ++maxdigits);
;
ARGBEGIN { ARGBEGIN {
default: default: