Compare commits

..

2 Commits

Author SHA1 Message Date
Michael Forney
e6b6f34506 find: Fix buffer overflow in token stack
The stack is used for two purposes: storing operators for the
shunting yard algorithm, and storing primitives when arranging the
operators into a tree. The number of operators is bounded by the
number of arguments, since we only insert at most one extra operator
per primitive. However, the number of primitives may be as high as
argc + 1, since -print may have been added implicitly.

This can triggered with an empty expression, `find .`, since in
this case argc is 0, but we still try to store -print in the stack.

Detected with musl's WIP allocator, mallocng-draft.
2020-05-12 20:01:43 -07:00
Michael Forney
6ff6bb57ce Add implementation of dd(1) 2020-05-12 19:58:21 -07:00
5 changed files with 329 additions and 3 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@
/cron /cron
/cut /cut
/date /date
/dd
/dirname /dirname
/du /du
/echo /echo

View File

@ -100,6 +100,7 @@ BIN =\
cron\ cron\
cut\ cut\
date\ date\
dd\
dirname\ dirname\
du\ du\
echo\ echo\

91
dd.1 Normal file
View File

@ -0,0 +1,91 @@
.Dd 2020-04-28
.Dt DD 1
.Os sbase
.Sh NAME
.Nm dd
.Nd convert and copy a file
.Sh SYNOPSIS
.Nm
.Op Ar operand Ns ...
.Sh DESCRIPTION
.Nm
copies its input to its output, possibly after conversion, using
the specified block sizes,
.Pp
The following operands are available:
.Bl -tag -width ibs=expr
.It Cm if= Ns Ar file
Read from the file named by
.Ar file
instead of standard input.
.It Cm of= Ns Ar file
Write to the file named by
.Ar file
instead of standard output.
.It Cm ibs= Ns Ar expr
Set the input block size to
.Ar expr
(defaults to 512).
.It Cm obs= Ns Ar expr
Set the output block size to
.Ar expr
(defaults to 512).
.It Cm bs= Ns Ar expr
Set the input and output block sizes to
.Ar expr .
Additionally, if no conversion other than
.Cm noerror ,
.Cm notrunc ,
or
.Cm sync
is specified, input blocks are copied as single output blocks, even
when the input block is short.
.It Cm skip= Ns Ar n
Skip
.Ar n
input blocks before starting to copy.
.It Cm seek= Ns Ar n
Skip
.Ar n
output blocks before starting to copy.
.It Cm count= Ns Ar n
Copy at most
.Ar n
input blocks.
.It Cm conv= Ns Ar value Ns Op , Ns Ar value Ns ...
Apply the conversions specified by
.Ar value .
.Bl -tag -width Ds
.It Cm lcase
Map uppercase characters to the corresponding lowercase character
using
.Fn tolower .
.It Cm ucase
Map lowercase characters to the corresponding uppercase character
using
.Fn toupper .
.It Cm swab
Swap each pair of bytes in the input block.
If there is an odd number of bytes in a block, the last one is
unmodified.
.It Cm noerror
In case of an error reading from the input, do not fail.
Instead, print a diagnostic message and a summary of the current
status.
.It Cm notrunc
Do not truncate the output file.
.It Cm sync
In case of a partial input block, pad with null bytes to form a
complete block.
.El
.El
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification, except that it does not implement the
.Cm block
and
.Cm unblock
conversions.

234
dd.c Normal file
View File

@ -0,0 +1,234 @@
/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include "util.h"
static off_t ifull, ofull, ipart, opart;
static void
usage(void)
{
eprintf("usage: %s [operand...]\n", argv0);
}
static size_t
parsesize(char *expr)
{
char *s = expr;
size_t n = 1;
for (;;) {
n *= strtoumax(s, &s, 10);
switch (*s) {
case 'k': n <<= 10; s++; break;
case 'b': n <<= 9; s++; break;
}
if (*s != 'x' || !s[1])
break;
s++;
}
if (*s || n == 0)
eprintf("invalid block size expression '%s'\n", expr);
return n;
}
static void
bswap(unsigned char *buf, size_t len)
{
int c;
for (len &= ~1; len > 0; buf += 2, len -= 2) {
c = buf[0];
buf[0] = buf[1];
buf[1] = c;
}
}
static void
lcase(unsigned char *buf, size_t len)
{
for (; len > 0; buf++, len--)
buf[0] = tolower(buf[0]);
}
static void
ucase(unsigned char *buf, size_t len)
{
for (; len > 0; buf++, len--)
buf[0] = toupper(buf[0]);
}
static void
summary(void)
{
fprintf(stderr, "%"PRIdMAX"+%"PRIdMAX" records in\n", (intmax_t)ifull, (intmax_t)ipart);
fprintf(stderr, "%"PRIdMAX"+%"PRIdMAX" records out\n", (intmax_t)ofull, (intmax_t)opart);
}
int
main(int argc, char *argv[])
{
enum {
LCASE = 1 << 0,
UCASE = 1 << 1,
SWAB = 1 << 2,
NOERROR = 1 << 3,
NOTRUNC = 1 << 4,
SYNC = 1 << 5,
} conv = 0;
char *arg, *val, *end;
const char *iname = "-", *oname = "-";
int ifd = 0, ofd = 1, eof = 0;
size_t len, bs = 0, ibs = 512, obs = 512, ipos = 0, opos = 0;
off_t skip = 0, seek = 0, count = -1;
ssize_t ret;
unsigned char *buf;
argv0 = argc ? (argc--, *argv++) : "dd";
for (; argc > 0; argc--, argv++) {
arg = *argv;
val = strchr(arg, '=');
if (!val)
usage();
*val++ = '\0';
if (strcmp(arg, "if") == 0) {
iname = val;
} else if (strcmp(arg, "of") == 0) {
oname = val;
} else if (strcmp(arg, "ibs") == 0) {
ibs = parsesize(val);
} else if (strcmp(arg, "obs") == 0) {
obs = parsesize(val);
} else if (strcmp(arg, "bs") == 0) {
bs = parsesize(val);
} else if (strcmp(arg, "skip") == 0) {
skip = estrtonum(val, 0, LLONG_MAX);
} else if (strcmp(arg, "seek") == 0) {
seek = estrtonum(val, 0, LLONG_MAX);
} else if (strcmp(arg, "count") == 0) {
count = estrtonum(val, 0, LLONG_MAX);
} else if (strcmp(arg, "conv") == 0) {
do {
end = strchr(val, ',');
if (end)
*end++ = '\0';
if (strcmp(val, "lcase") == 0)
conv |= LCASE;
else if (strcmp(val, "ucase") == 0)
conv |= UCASE;
else if (strcmp(val, "swab") == 0)
conv |= SWAB;
else if (strcmp(val, "noerror") == 0)
conv |= NOERROR;
else if (strcmp(val, "notrunc") == 0)
conv |= NOTRUNC;
else if (strcmp(val, "sync") == 0)
conv |= SYNC;
else
eprintf("unknown conv flag '%s'\n", val);
val = end;
} while (val);
} else {
weprintf("unknown operand '%s'\n", arg);
usage();
}
}
if (bs)
ibs = obs = bs;
if (strcmp(iname, "-") != 0) {
ifd = open(iname, O_RDONLY);
if (ifd < 0)
eprintf("open %s:", iname);
}
if (strcmp(oname, "-") != 0) {
ofd = open(oname, O_WRONLY | O_CREAT | (conv & NOTRUNC || seek ? 0 : O_TRUNC), 0666);
if (ofd < 0)
eprintf("open %s:", oname);
}
len = MAX(ibs, obs) + ibs;
buf = emalloc(len);
if (skip && lseek(ifd, skip * ibs, SEEK_SET) < 0) {
while (skip--) {
ret = read(ifd, buf, ibs);
if (ret < 0)
eprintf("read:");
if (ret == 0) {
eof = 1;
break;
}
}
}
if (seek) {
if (!(conv & NOTRUNC) && ftruncate(ofd, seek * ibs) != 0)
eprintf("ftruncate:");
if (lseek(ofd, seek * ibs, SEEK_SET) < 0)
eprintf("lseek:");
/* XXX: handle non-seekable files */
}
while (!eof && (count == -1 || ifull + ipart < count)) {
while (ipos - opos < obs) {
ret = read(ifd, buf + ipos, ibs);
if (ret == 0) {
eof = 1;
break;
}
if (ret < 0) {
weprintf("read:");
if (!(conv & NOERROR))
return 1;
summary();
if (!(conv & SYNC))
continue;
ret = 0;
}
if (ret < ibs) {
ipart++;
if (conv & SYNC) {
memset(buf + ipos + ret, 0, ibs - ret);
ret = ibs;
}
} else {
ifull++;
}
if (conv & SWAB)
bswap(buf + ipos, ret);
if (conv & LCASE)
lcase(buf + ipos, ret);
if (conv & UCASE)
ucase(buf + ipos, ret);
ipos += ret;
if (bs && !(conv & (SWAB | LCASE | UCASE)))
break;
}
if (ipos == opos)
break;
do {
ret = write(ofd, buf + opos, MIN(obs, ipos - opos));
if (ret < 0)
eprintf("write:");
if (ret == 0)
eprintf("write returned 0\n");
if (ret < obs)
opart++;
else
ofull++;
opos += ret;
} while ((eof && ipos < opos) || (!eof && ipos - opos >= obs));
if (len - ipos < ibs) {
memmove(buf, buf + opos, ipos - opos);
ipos -= opos;
opos = 0;
}
}
summary();
return 0;
}

5
find.c
View File

@ -785,12 +785,10 @@ parse(int argc, char **argv)
size_t ntok = 0; size_t ntok = 0;
struct tok and = { .u.oinfo = find_op("-a"), .type = AND }; struct tok and = { .u.oinfo = find_op("-a"), .type = AND };
infix = ereallocarray(NULL, 2 * argc + 1, sizeof(*infix));
stack = ereallocarray(NULL, argc, sizeof(*stack));
gflags.print = 1; gflags.print = 1;
/* convert argv to infix expression of tok, inserting in *tok */ /* convert argv to infix expression of tok, inserting in *tok */
infix = ereallocarray(NULL, 2 * argc + 1, sizeof(*infix));
for (arg = argv, tok = infix; *arg; arg++, tok++) { for (arg = argv, tok = infix; *arg; arg++, tok++) {
pri = find_primary(*arg); pri = find_primary(*arg);
@ -833,6 +831,7 @@ parse(int argc, char **argv)
* read from infix, resulting rpn ends up in rpn, next position in rpn is out * read from infix, resulting rpn ends up in rpn, next position in rpn is out
* push operators onto stack, next position in stack is top */ * push operators onto stack, next position in stack is top */
rpn = ereallocarray(NULL, ntok + gflags.print, sizeof(*rpn)); rpn = ereallocarray(NULL, ntok + gflags.print, sizeof(*rpn));
stack = ereallocarray(NULL, argc + gflags.print, sizeof(*stack));
for (tok = infix, out = rpn, top = stack; tok->type != END; tok++) { for (tok = infix, out = rpn, top = stack; tok->type != END; tok++) {
switch (tok->type) { switch (tok->type) {
case PRIM: *out++ = *tok; break; case PRIM: *out++ = *tok; break;