sbase/uniq.c
FRIGN 0545d32ce9 Handle '-' consistently
In general, POSIX does not define /dev/std{in, out, err} because it
does not want to depend on the dev-filesystem.
For utilities, it thus introduced the '-'-keyword to denote standard
input (and output in some cases) and the programs have to deal with
it accordingly.

Sadly, the design of many tools doesn't allow strict shell-redirections
and many scripts don't even use this feature when possible.

Thus, we made the decision to implement it consistently across all
tools where it makes sense (namely those which read files).

Along the way, I spotted some behavioural bugs in libutil/crypt.c and
others where it was forgotten to fshut the files after use.
2015-05-16 13:34:00 +01:00

147 lines
2.5 KiB
C

/* See LICENSE file for copyright and license details. */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"
static const char *countfmt = "";
static int dflag = 0;
static int uflag = 0;
static int fskip = 0;
static int sskip = 0;
static char *prevline = NULL;
static char *prevoffset = NULL;
static long prevlinecount = 0;
static size_t prevlinesiz = 0;
static const char *
uniqskip(const char *l)
{
const char *lo = l;
int f = fskip, s = sskip;
for (; f; --f) {
while (isblank(*lo))
lo++;
while (*lo && !isblank(*lo))
lo++;
}
for (; s && *lo && *lo != '\n'; --s, ++lo);
return lo;
}
static void
uniqline(FILE *ofp, const char *l, size_t len)
{
const char *loffset = l ? uniqskip(l) : l;
int linesequel = l && prevoffset &&
!strcmp(loffset, prevoffset);
if (linesequel) {
++prevlinecount;
return;
}
if (prevoffset) {
if ((prevlinecount == 1 && !dflag) ||
(prevlinecount != 1 && !uflag)) {
if (*countfmt)
fprintf(ofp, countfmt, prevlinecount);
fputs(prevline, ofp);
}
prevoffset = NULL;
}
if (l) {
if (!prevline || len >= prevlinesiz) {
prevlinesiz = len + 1;
prevline = erealloc(prevline, prevlinesiz);
}
memcpy(prevline, l, len);
prevline[len] = '\0';
prevoffset = prevline + (loffset - l);
}
prevlinecount = 1;
}
static void
uniq(FILE *fp, FILE *ofp)
{
char *buf = NULL;
size_t size = 0;
ssize_t len;
while ((len = getline(&buf, &size, fp)) > 0)
uniqline(ofp, buf, (size_t)len);
}
static void
uniqfinish(FILE *ofp)
{
uniqline(ofp, NULL, 0);
}
static void
usage(void)
{
eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]"
" [input [output]]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp, *ofp;
ARGBEGIN {
case 'c':
countfmt = "%7ld ";
break;
case 'd':
dflag = 1;
break;
case 'u':
uflag = 1;
break;
case 'f':
fskip = estrtonum(EARGF(usage()), 0, INT_MAX);
break;
case 's':
sskip = estrtonum(EARGF(usage()), 0, INT_MAX);
break;
default:
usage();
} ARGEND;
if (argc > 2)
usage();
if (!argc) {
uniq(stdin, stdout);
} else {
if (argv[0][0] == '-' && !argv[0][1]) {
argv[0] = "<stdin>";
fp = stdin;
} else if (!(fp = fopen(argv[0], "r"))) {
eprintf("fopen %s:", argv[0]);
}
if (argc == 1 || (argv[1][0] == '-' && !argv[1][1])) {
argv[1] = "<stdout>";
ofp = stdout;
} else if (!(ofp = fopen(argv[1], "w"))) {
eprintf("fopen %s:", argv[1]);
}
uniq(fp, ofp);
}
uniqfinish(ofp);
efshut(fp, argv[0]);
efshut(ofp, argv[1]);
return 0;
}