Audit expr(1)

No bugs found, but I changed intmax_t to long long to make it more predictable and removed some of the kitchen-sinking. Don't return structs themselves, as this is not very elegant. Do it like functions like stat(), which take a pointer to a struct to fill.
2015-03-22 14:32:56 +01:00
parent 1f0f1dd320
commit d49f6f2044
2 changed files with 152 additions and 159 deletions
--- a/2
+++ b/2
@@ -30,7 +30,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
 =*| echo            yes                          none
 =*| env             yes                          none
 #*| expand          yes                          none
-#*  expr            yes                          none
+#*| expr            yes                          none
 =*| false           yes                          none
 =   find            yes                          none
 #*| fold            yes                          none
--- a/expr.c
+++ b/expr.c
@@ -1,205 +1,203 @@
 /* See LICENSE file for copyright and license details. */
-#include <inttypes.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "utf.h"
 #include "util.h"
-/* token types for lexing/parsing
+/* tokens, one-character operators represent themselves */
 * single character operators represent themselves */
 enum {
 	VAL = CHAR_MAX + 1, GE, LE, NE
 };
 struct val {
-	char *s; /* iff s is NULL, val is an integer */
+	char *str;
-	intmax_t n;
+	long long num;
 };
-static size_t intlen;
+static size_t maxdigits;
 static void
-enan(struct val v)
+enan(struct val *v)
 {
-	if (v.s)
+	if (!v->str)
-		enprintf(2, "syntax error: expected integer got `%s'\n", v.s);
+		return;
 	enprintf(2, "syntax error: expected integer, got %s\n", v->str);
 }
 static void
-ezero(intmax_t n)
+ezero(struct val *v)
 {
-	if (n == 0)
+	if (v->num != 0)
 		return;
 	enprintf(2, "division by zero\n");
 }
 static char *
 valstr(struct val val, char *buf, size_t bufsiz)
 {
 	if (val.s)
 		return val.s;
 	snprintf(buf, bufsiz, "%"PRIdMAX, val.n);
 	return buf;
 }
 static int
-valcmp(struct val a, struct val b)
+valcmp(struct val *a, struct val *b)
 {
-	char buf1[intlen], buf2[intlen];
+	int ret;
-	char *astr = valstr(a, buf1, sizeof(buf1));
+	char buf[maxdigits];
 	char *bstr = valstr(b, buf2, sizeof(buf2));
-	if (!a.s && !b.s)
+	if (!a->str && !b->str) {
-		return (a.n > b.n) - (a.n < b.n);
+		ret = (a->num > b->num) - (a->num < b->num);
-	return strcmp(astr, bstr);
+	} else if (a->str && !b->str) {
 		snprintf(buf, sizeof(buf), "%lld", b->num);
 		ret = strcmp(a->str, buf);
 	} else if (!a->str && b->str) {
 		snprintf(buf, sizeof(buf), "%lld", a->num);
 		ret = strcmp(buf, b->str);
 	} else {
 		ret = strcmp(a->str, b->str);
 	}
 	return ret;
 }
-/* match vstr against BRE vregx (treat both values as strings)
+static void
- * if there is at least one subexpression \(...\)
+match(struct val *vstr, struct val *vregx, struct val *ret)
 * then return the text matched by it \1 (empty string for no match)
 * else return number of characters matched (0 for no match)
 */
 static struct val
 match(struct val vstr, struct val vregx)
 {
 	regex_t re;
 	regmatch_t matches[2];
-	intmax_t d;
+	long long d;
-	char *s, *p, buf1[intlen], buf2[intlen];
+	char strbuf[maxdigits + 1], regxbuf[maxdigits + 1],
-	char *str = valstr(vstr, buf1, sizeof(buf1));
+	     *s, *p, *anchreg, *str, *regx;
-	char *regx = valstr(vregx, buf2, sizeof(buf2));;
+	const char *errstr;
 	char anchreg[strlen(regx) + 2];
-	/* expr(1p) "all patterns are anchored to the beginning of the string" */
+	if (!vstr->str) {
-	snprintf(anchreg, sizeof(anchreg), "^%s", regx);
+		snprintf(strbuf, sizeof(strbuf), "%lld", vstr->num);
 		str = strbuf;
 	} else {
 		str = vstr->str;
 	}
 	if (!vregx->str) {
 		snprintf(regxbuf, sizeof(regxbuf), "%lld", vregx->num);
 		regx = regxbuf;
 	} else {
 		regx = vregx->str;
 	}
 	/* anchored regex */
 	anchreg = emalloc(strlen(regx) + 2);
 	estrlcpy(anchreg, "^", sizeof(anchreg));
 	estrlcat(anchreg, regx, sizeof(anchreg));
 	enregcomp(3, &re, anchreg, 0);
 	free(anchreg);
 	if (regexec(&re, str, 2, matches, 0)) {
 		regfree(&re);
-		return (struct val){ (re.re_nsub ? "" : NULL), 0 };
+		ret->str = re.re_nsub ? "" : NULL;
-	}
+		return;
-
+	} else if (re.re_nsub) {
 	if (re.re_nsub) {
 		regfree(&re);
 		s = str + matches[1].rm_so;
 		p = str + matches[1].rm_eo;
 		*p = '\0';
 		d = strtoimax(s, &p, 10);
 		if (*s && !*p) /* string matched by subexpression is an integer */
 			return (struct val){ NULL, d };
-		/* FIXME? string is never free()d, worth fixing?
+		d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
-		 * need to allocate as it could be in buf1 instead of vstr.s */
+		if (!errstr) {
-		return (struct val){ enstrdup(3, s), 0 };
+			ret->num = d;
 			return;
 		} else {
 			ret->str = enstrdup(3, s);
 			return;
 		}
 	} else {
 		regfree(&re);
 		str += matches[0].rm_so;
-	return (struct val){ NULL, utfnlen(str, matches[0].rm_eo - matches[0].rm_so) };
+		ret->num = utfnlen(str, matches[0].rm_eo - matches[0].rm_so);
 		return;
 	}
 }
 /* ops  points to a stack of operators, opp  points to one past the last op
 * vals points to a stack of values   , valp points to one past the last val
 * guaranteed that opp != ops
 * ops is unused here, but still included for parity with vals
 * pop operator, pop two values, apply operator, push result
 */
 static void
-doop(int *ops, int **opp, struct val *vals, struct val **valp)
+doop(int *ophead, int *opp, struct val *valhead, struct val *valp)
 {
-	struct val ret, a, b;
+	struct val ret = { .str = NULL, .num = 0 }, *a, *b;
 	int op;
-	/* For an operation, we need a valid operator
+	/* an operation "a op b" needs an operator and two values */
-	 * and two values on the stack */
+	if (opp[-1] == '(')
 	if ((*opp)[-1] == '(')
 		enprintf(2, "syntax error: extra (\n");
-	if (*valp - vals < 2)
+	if (valp - valhead < 2)
 		enprintf(2, "syntax error: missing expression or extra operator\n");
-	a = (*valp)[-2];
+	a = valp - 2;
-	b = (*valp)[-1];
+	b = valp - 1;
-	op = (*opp)[-1];
+	op = opp[-1];
 	switch (op) {
 	case '|':
-		if      ( a.s && *a.s) ret = (struct val){ a.s ,   0 };
+		if      ( a->str && *a->str) ret.str = a->str;
-		else if (!a.s &&  a.n) ret = (struct val){ NULL, a.n };
+		else if (!a->str &&  a->num) ret.num = a->num;
-		else if ( b.s && *b.s) ret = (struct val){ b.s ,   0 };
+		else if ( b->str && *b->str) ret.str = b->str;
-		else                   ret = (struct val){ NULL, b.n };
+		else                         ret.num = b->num;
 		break;
 	case '&':
-		if (((a.s && *a.s) || a.n) && ((b.s && *b.s) || b.n))
+		if (((a->str && *a->str) || a->num) &&
-			ret = a;
+		    ((b->str && *b->str) || b->num)) {
-		else
+			ret.str = a->str;
-			ret = (struct val){ NULL, 0 };
+			ret.num = a->num;
 		}
 		break;
-	case '=': ret = (struct val){ NULL, valcmp(a, b) == 0 }; break;
+	case '=': ret.num = (valcmp(a, b) == 0); break;
-	case '>': ret = (struct val){ NULL, valcmp(a, b) >  0 }; break;
+	case '>': ret.num = (valcmp(a, b) >  0); break;
-	case GE : ret = (struct val){ NULL, valcmp(a, b) >= 0 }; break;
+	case GE : ret.num = (valcmp(a, b) >= 0); break;
-	case '<': ret = (struct val){ NULL, valcmp(a, b) <  0 }; break;
+	case '<': ret.num = (valcmp(a, b) <  0); break;
-	case LE : ret = (struct val){ NULL, valcmp(a, b) <= 0 }; break;
+	case LE : ret.num = (valcmp(a, b) <= 0); break;
-	case NE : ret = (struct val){ NULL, valcmp(a, b) != 0 }; break;
+	case NE : ret.num = (valcmp(a, b) != 0); break;
-	case '+': enan(a); enan(b);             ret = (struct val){ NULL, a.n + b.n }; break;
+	case '+': enan(a); enan(b);           ret.num = a->num + b->num; break;
-	case '-': enan(a); enan(b);             ret = (struct val){ NULL, a.n - b.n }; break;
+	case '-': enan(a); enan(b);           ret.num = a->num - b->num; break;
-	case '*': enan(a); enan(b);             ret = (struct val){ NULL, a.n * b.n }; break;
+	case '*': enan(a); enan(b);           ret.num = a->num * b->num; break;
-	case '/': enan(a); enan(b); ezero(b.n); ret = (struct val){ NULL, a.n / b.n }; break;
+	case '/': enan(a); enan(b); ezero(b); ret.num = a->num / b->num; break;
-	case '%': enan(a); enan(b); ezero(b.n); ret = (struct val){ NULL, a.n % b.n }; break;
+	case '%': enan(a); enan(b); ezero(b); ret.num = a->num % b->num; break;
-	case ':': ret = match(a, b); break;
+	case ':': match(a, b, &ret); break;
 	}
-	(*valp)[-2] = ret;
+	valp[-2] = ret;
 	(*opp)--;
 	(*valp)--;
 }
 /* retrn the type of the next token, s
 * if it is a value, place the value in v for use by parser
 */
 static int
 lex(char *s, struct val *v)
 {
-	intmax_t d;
+	long long d;
-	char *p, *ops = "|&=><+-*/%():";
+	int type = VAL;
 	char *ops = "|&=><+-*/%():";
 	const char *errstr;
-	/* clean integer */
+	d = strtonum(s, LLONG_MIN, LLONG_MAX, &errstr);
-	d = strtoimax(s, &p, 10);
+
-	if (*s && !*p) {
+	if (!errstr) {
-		*v = (struct val){ NULL, d };
+		/* integer */
-		return VAL;
+		v->num = d;
 	} else if (s[0] && strchr(ops, s[0]) && !s[1]) {
 		/* one-char operand */
 		type = s[0];
 	} else if (s[0] && strchr("><!", s[0]) && s[1] == '=' && !s[2]) {
 		/* two-char operand */
 		type = (s[0] == '>') ? GE : (s[0] == '<') ? LE : NE;
 	} else {
 		/* string */
 		v->str = s;
 	}
-	/* one-char operand */
+	return type;
 	if (*s && !s[1] && strchr(ops, *s))
 		return *s;
 	/* two-char operand */
 	if (!strcmp(s, ">=")) return GE;
 	if (!strcmp(s, "<=")) return LE;
 	if (!strcmp(s, "!=")) return NE;
 	/* nothing matched, treat as string */
 	*v = (struct val){ s, 0 };
 	return VAL;
 }
 /* using shunting-yard to convert from infix to rpn
 * https://en.wikipedia.org/wiki/Shunting-yard_algorithm
 * instead of creating rpn output to evaluate later, evaluate it immediately as
 * it is created
 * vals is the value    stack, valp points to one past last value on the stack
 * ops  is the operator stack, opp  points to one past last op    on the stack
 */
 static int
-parse(char *expr[], int exprlen)
+parse(char *expr[], int numexpr)
 {
-	struct val vals[exprlen], *valp = vals, v;
+	struct val valhead[numexpr], *valp = valhead, v = { .str = NULL, .num = 0 };
-	int ops[exprlen], *opp = ops;
+	int ophead[numexpr], *opp = ophead, type, lasttype = 0;
-	int i, type, lasttype = 0;
+	char prec[] = {
-	char prec[] = { /* precedence of operators */
+		[ 0 ] = 0, [VAL] = 0, ['('] = 0, [')'] = 0,
 		['|'] = 1,
 		['&'] = 2,
 		['='] = 3, ['>'] = 3, [GE] = 3, ['<'] = 3, [LE] = 3, [NE] = 3,
@@ -208,70 +206,65 @@ parse(char *expr[], int exprlen)
 		[':'] = 6,
 	};
-	for (i = 0; i < exprlen; i++) {
+	for (; *expr; expr++) {
-		switch ((type = lex(expr[i], &v))) {
+		switch ((type = lex(*expr, &v))) {
 		case VAL:
-			*valp++ = v;
+			(*valp).str = v.str;
 			(*valp).num = v.num;
 			valp++;
 			break;
 		case '(':
-			*opp++ = '(';
+			*opp++ = type;
 			break;
 		case ')':
 			if (lasttype == '(')
 				enprintf(2, "syntax error: empty ( )\n");
-			while (opp > ops && opp[-1] != '(')
+			while (opp > ophead && opp[-1] != '(')
-				doop(ops, &opp, vals, &valp);
+				doop(ophead, opp--, valhead, valp--);
-			if (opp == ops)
+			if (opp == ophead)
 				enprintf(2, "syntax error: extra )\n");
 			opp--;
 			break;
 		default: /* operator */
 			if (prec[lasttype])
 				enprintf(2, "syntax error: extra operator\n");
-			while (opp > ops && prec[opp[-1]] >= prec[type])
+			while (opp > ophead && prec[opp[-1]] >= prec[type])
-				doop(ops, &opp, vals, &valp);
+				doop(ophead, opp--, valhead, valp--);
 			*opp++ = type;
 			break;
 		}
 		lasttype = type;
 		v.str = NULL;
 		v.num = 0;
 	}
-	while (opp > ops)
+	while (opp > ophead)
-		doop(ops, &opp, vals, &valp);
+		doop(ophead, opp--, valhead, valp--);
-
+	if (valp == valhead)
 	if (valp == vals)
 		enprintf(2, "syntax error: missing expression\n");
-	if (--valp != vals)
+	if (--valp > valhead)
 		enprintf(2, "syntax error: extra expression\n");
-	if (valp->s)
+	if (valp->str)
-		printf("%s\n", valp->s);
+		puts(valp->str);
 	else
-		printf("%"PRIdMAX"\n", valp->n);
+		printf("%lld\n", valp->num);
-	return (valp->s && *valp->s) || valp->n;
+	return (valp->str && *valp->str) || valp->num;
 }
 /* the only way to get usage() is if the user didn't supply -- and expression
 * begins with a -
 * expr(1p): "... the conforming application must employ the -- construct ...
 * if there is any chance the first operand might be a negative integer (or any
 * string with a leading minus"
 */
 static void
 usage(void)
 {
-	enprintf(3, "usage: %s [--] expression\n"
+	enprintf(3, "usage: %s expression\n", argv0);
 	            "note : the -- is mandatory if expression begins with a -\n", argv0);
 }
 int
 main(int argc, char *argv[])
 {
-	intmax_t n = INTMAX_MIN;
+	long long n = LLONG_MIN;
-	/* Get the maximum number of digits (+ sign) */
+	/* maximum number of digits + sign */
-	for (intlen = (n < 0); n; n /= 10, ++intlen)
+	for (maxdigits = (n < 0); n; n /= 10, ++maxdigits);
 		;
 	ARGBEGIN {
 	default: