sbase/tr.c
FRIGN 3c33abc520 Implement mallocarray()
A function used only in the OpenBSD-Kernel as of now, but it surely
provides a helpful interface when you just don't want to make sure
the incoming pointer to erealloc() is really NULL so it behaves
like malloc, making it a bit more safer.

Talking about *allocarray(): It's definitely a major step in code-
hardening. Especially as a system administrator, you should be
able to trust your core tools without having to worry about segfaults
like this, which can easily lead to privilege escalation.

How do the GNU coreutils handle this?
$ strings -n 4611686018427387903
strings: invalid minimum string length -1
$ strings -n 4611686018427387904
strings: invalid minimum string length 0

They silently overflow...

In comparison, sbase:

$ strings -n 4611686018427387903
mallocarray: out of memory
$ strings -n 4611686018427387904
mallocarray: out of memory

The first out of memory is actually a true OOM returned by malloc,
whereas the second one is a detected overflow, which is not marked
in a special way.
Now tell me which diagnostic error-messages are easier to understand.
2015-03-10 22:19:19 +01:00

277 lines
5.7 KiB
C

/* See LICENSE file for copyright and license details. */
#include <stdlib.h>
#include "utf.h"
#include "util.h"
static int cflag = 0;
static int dflag = 0;
static int sflag = 0;
struct range {
Rune start;
Rune end;
size_t quant;
};
static struct {
char *name;
int (*check)(Rune);
} classes[] = {
{ "alnum", isalnumrune },
{ "alpha", isalpharune },
{ "blank", isblankrune },
{ "cntrl", iscntrlrune },
{ "digit", isdigitrune },
{ "graph", isgraphrune },
{ "lower", islowerrune },
{ "print", isprintrune },
{ "punct", ispunctrune },
{ "space", isspacerune },
{ "upper", isupperrune },
{ "xdigit", isxdigitrune },
};
static struct range *set1 = NULL;
static size_t set1ranges = 0;
static int (*set1check)(Rune) = NULL;
static struct range *set2 = NULL;
static size_t set2ranges = 0;
static int (*set2check)(Rune) = NULL;
static size_t
rangelen(struct range r)
{
return (r.end - r.start + 1) * r.quant;
}
static size_t
setlen(struct range *set, size_t setranges)
{
size_t len = 0, i;
for (i = 0; i < setranges; i++)
len += rangelen(set[i]);
return len;
}
static int
rstrmatch(Rune *r, char *s, size_t n)
{
size_t i;
for (i = 0; i < n; i++)
if (r[i] != s[i])
return 0;
return 1;
}
static size_t
makeset(char *str, struct range **set, int (**check)(Rune))
{
Rune *rstr;
size_t len, i, j, m, n;
size_t q, setranges = 0;
int factor, base;
/* rstr defines at most len ranges */
unescape(str);
rstr = emallocarray(utflen(str) + 1, sizeof(*rstr));
len = utftorunestr(str, rstr);
*set = emallocarray(len, sizeof(**set));
for (i = 0; i < len; i++) {
if (rstr[i] == '[') {
j = i;
nextbrack:
if (j == len)
goto literal;
for (m = j; m < len; m++)
if (rstr[m] == ']') {
j = m;
break;
}
if (j == i)
goto literal;
/* CLASSES [=EQUIV=] (skip) */
if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
if (j - i != 4)
goto literal;
(*set)[setranges].start = rstr[i + 2];
(*set)[setranges].end = rstr[i + 2];
(*set)[setranges].quant = 1;
setranges++;
i = j;
continue;
}
/* CLASSES [:CLASS:] */
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
for (n = 0; n < LEN(classes); n++) {
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
*check = classes[n].check;
return 0;
}
}
eprintf("Invalid character class.\n");
}
/* REPEAT [_*n] (only allowed in set2) */
if (j - i > 2 && rstr[i + 2] == '*' && set1ranges > 0) {
/* check if right side of '*' is a number */
q = 0;
factor = 1;
base = (rstr[i + 3] == '0') ? 8 : 10;
for (n = j - 1; n > i + 2; n--) {
if (rstr[n] < '0' && rstr[n] > '9') {
n = 0;
break;
}
q += (rstr[n] - '0') * factor;
factor *= base;
}
if (n == 0) {
j = m + 1;
goto nextbrack;
}
(*set)[setranges].start = rstr[i + 1];
(*set)[setranges].end = rstr[i + 1];
(*set)[setranges].quant = q ? q : setlen(set1, set1ranges);
setranges++;
i = j;
continue;
}
j = m + 1;
goto nextbrack;
}
literal:
/* RANGES [_-__-_], _-__-_ */
/* LITERALS _______ */
(*set)[setranges].start = rstr[i];
if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
i += 2;
(*set)[setranges].end = rstr[i];
(*set)[setranges].quant = 1;
setranges++;
}
free(rstr);
return setranges;
}
static void
usage(void)
{
eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
}
int
main(int argc, char *argv[])
{
Rune r = 0, lastrune = 0;
size_t off1, off2, i, m;
ARGBEGIN {
case 'c':
case 'C':
cflag = 1;
break;
case 'd':
dflag = 1;
break;
case 's':
sflag = 1;
break;
default:
usage();
} ARGEND;
if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag))
usage();
set1ranges = makeset(argv[0], &set1, &set1check);
if (argc == 2)
set2ranges = makeset(argv[1], &set2, &set2check);
if (dflag == sflag && !set2ranges && !set2check)
eprintf("set2 must be non-empty.\n");
if (set2check && set2check != islowerrune && set2check != isupperrune)
eprintf("set2 can only be the 'lower' or 'upper' class.\n");
if (set2check && cflag && !dflag)
eprintf("set2 can't be imaged to from a complement.\n");
read:
if (!efgetrune(&r, stdin, "<stdin>"))
return 0;
off1 = off2 = 0;
for (i = 0; i < set1ranges; i++) {
if (set1[i].start <= r && r <= set1[i].end) {
if (dflag) {
if (!cflag || (sflag && r == lastrune))
goto read;
else
goto write;
}
if (sflag) {
if (r == lastrune)
goto read;
else
goto write;
}
for (m = 0; m < i; m++)
off1 += rangelen(set1[m]);
off1 += r - set1[m].start;
if (off1 > setlen(set2, set2ranges) - 1) {
r = set2[set2ranges - 1].end;
goto write;
}
for (m = 0; m < set2ranges; m++) {
if (off2 + rangelen(set2[m]) > off1) {
m++;
break;
}
off2 += rangelen(set2[m]);
}
m--;
r = set2[m].start + (off1 - off2) / set2[m].quant;
goto write;
}
}
if (set1check && set1check(r)) {
if (dflag) {
if (!cflag || (sflag && r == lastrune))
goto read;
else
goto write;
}
if (sflag) {
if (r == lastrune)
goto read;
else
goto write;
}
if (set1check == isupperrune && set2check == islowerrune)
r = tolowerrune(r);
else if (set1check == islowerrune && set2check == isupperrune)
r = toupperrune(r);
else if (set2ranges > 0)
r = cflag ? r : set2[set2ranges - 1].end;
else
eprintf("Misaligned character classes.\n");
} else if (cflag && set2ranges > 0) {
r = set2[set2ranges - 1].end;
}
if (dflag && cflag)
goto read;
if (dflag && sflag && r == lastrune)
goto read;
write:
lastrune = r;
efputrune(&r, stdout, "<stdout>");
goto read;
}