2012-05-21 23:24:28 +00:00
|
|
|
/* See LICENSE file for copyright and license details. */
|
2015-02-14 20:02:41 +00:00
|
|
|
#include <ctype.h>
|
2012-05-21 23:24:28 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2013-03-05 20:35:55 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
#include "text.h"
|
2012-05-21 23:24:28 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
|
|
|
static const char *countfmt = "";
|
2014-11-13 20:24:47 +00:00
|
|
|
static int dflag = 0;
|
|
|
|
static int uflag = 0;
|
2015-02-11 06:02:54 +00:00
|
|
|
static int fskip = 0;
|
|
|
|
static int sskip = 0;
|
2012-05-21 23:24:28 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
static struct line prevl;
|
|
|
|
static ssize_t prevoff = -1;
|
2014-01-20 10:47:46 +00:00
|
|
|
static long prevlinecount = 0;
|
2012-05-21 23:24:28 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
static size_t
|
|
|
|
uniqskip(struct line *l)
|
2015-02-11 06:02:54 +00:00
|
|
|
{
|
2016-03-10 06:02:04 +00:00
|
|
|
size_t i;
|
2015-02-11 06:02:54 +00:00
|
|
|
int f = fskip, s = sskip;
|
2015-02-11 12:02:33 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
for (i = 0; i < l->len && f; --f) {
|
|
|
|
while (isblank(l->data[i]))
|
|
|
|
i++;
|
|
|
|
while (i < l->len && !isblank(l->data[i]))
|
|
|
|
i++;
|
2015-02-11 06:02:54 +00:00
|
|
|
}
|
2016-03-10 06:02:04 +00:00
|
|
|
for (; s && i < l->len && l->data[i] != '\n'; --s, i++)
|
|
|
|
;
|
2015-03-17 22:59:09 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
return i;
|
2015-02-11 06:02:54 +00:00
|
|
|
}
|
|
|
|
|
2014-06-01 12:59:47 +00:00
|
|
|
static void
|
2016-03-10 06:02:04 +00:00
|
|
|
uniqline(FILE *ofp, struct line *l)
|
2012-05-21 23:24:28 +00:00
|
|
|
{
|
2016-03-10 06:02:04 +00:00
|
|
|
size_t loff;
|
2015-02-11 06:02:54 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
if (l) {
|
|
|
|
loff = uniqskip(l);
|
2012-05-21 23:24:28 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) &&
|
|
|
|
!memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) {
|
|
|
|
++prevlinecount;
|
|
|
|
return;
|
|
|
|
}
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
if (prevoff >= 0) {
|
2014-11-13 17:29:30 +00:00
|
|
|
if ((prevlinecount == 1 && !dflag) ||
|
|
|
|
(prevlinecount != 1 && !uflag)) {
|
2015-03-07 13:04:04 +00:00
|
|
|
if (*countfmt)
|
|
|
|
fprintf(ofp, countfmt, prevlinecount);
|
2016-03-10 06:02:04 +00:00
|
|
|
fwrite(prevl.data, 1, prevl.len, ofp);
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
2016-03-10 06:02:04 +00:00
|
|
|
prevoff = -1;
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
|
|
|
|
2015-03-27 21:47:15 +00:00
|
|
|
if (l) {
|
2016-03-10 06:02:04 +00:00
|
|
|
if (!prevl.data || l->len >= prevl.len) {
|
|
|
|
prevl.len = l->len;
|
|
|
|
prevl.data = erealloc(prevl.data, prevl.len);
|
2015-03-27 21:47:15 +00:00
|
|
|
}
|
2016-03-10 06:02:04 +00:00
|
|
|
memcpy(prevl.data, l->data, prevl.len);
|
|
|
|
prevoff = loff;
|
2015-03-27 21:47:15 +00:00
|
|
|
}
|
2014-01-20 10:47:46 +00:00
|
|
|
prevlinecount = 1;
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
|
|
|
|
2014-06-01 12:59:47 +00:00
|
|
|
static void
|
2015-02-11 14:56:16 +00:00
|
|
|
uniq(FILE *fp, FILE *ofp)
|
2012-05-21 23:24:28 +00:00
|
|
|
{
|
2016-03-10 06:02:04 +00:00
|
|
|
static struct line line;
|
|
|
|
static size_t size;
|
2015-03-08 13:41:05 +00:00
|
|
|
ssize_t len;
|
2012-05-21 23:24:28 +00:00
|
|
|
|
2016-03-10 06:02:04 +00:00
|
|
|
while ((len = getline(&line.data, &size, fp)) > 0) {
|
|
|
|
line.len = len;
|
|
|
|
uniqline(ofp, &line);
|
|
|
|
}
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
|
|
|
|
2014-06-01 12:59:47 +00:00
|
|
|
static void
|
2015-02-11 14:56:16 +00:00
|
|
|
uniqfinish(FILE *ofp)
|
2012-05-21 23:24:28 +00:00
|
|
|
{
|
2016-03-10 06:02:04 +00:00
|
|
|
uniqline(ofp, NULL);
|
2012-05-21 23:24:28 +00:00
|
|
|
}
|
2015-03-07 13:02:04 +00:00
|
|
|
|
|
|
|
static void
|
|
|
|
usage(void)
|
|
|
|
{
|
|
|
|
eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]"
|
|
|
|
" [input [output]]\n", argv0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2015-05-19 13:30:09 +00:00
|
|
|
FILE *fp[2] = { stdin, stdout };
|
2015-05-24 23:33:19 +00:00
|
|
|
int ret = 0, i;
|
2015-05-19 13:30:09 +00:00
|
|
|
char *fname[2] = { "<stdin>", "<stdout>" };
|
2015-03-07 13:02:04 +00:00
|
|
|
|
|
|
|
ARGBEGIN {
|
|
|
|
case 'c':
|
|
|
|
countfmt = "%7ld ";
|
|
|
|
break;
|
|
|
|
case 'd':
|
|
|
|
dflag = 1;
|
|
|
|
break;
|
|
|
|
case 'u':
|
|
|
|
uflag = 1;
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
fskip = estrtonum(EARGF(usage()), 0, INT_MAX);
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
sskip = estrtonum(EARGF(usage()), 0, INT_MAX);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
usage();
|
2015-11-01 10:16:49 +00:00
|
|
|
} ARGEND
|
2015-03-07 13:02:04 +00:00
|
|
|
|
|
|
|
if (argc > 2)
|
|
|
|
usage();
|
|
|
|
|
2015-05-19 13:30:09 +00:00
|
|
|
for (i = 0; i < argc; i++) {
|
|
|
|
if (strcmp(argv[i], "-")) {
|
|
|
|
fname[i] = argv[i];
|
|
|
|
if (!(fp[i] = fopen(argv[i], (i == 0) ? "r" : "w")))
|
|
|
|
eprintf("fopen %s:", argv[i]);
|
2015-05-15 11:28:39 +00:00
|
|
|
}
|
2015-03-17 22:59:09 +00:00
|
|
|
}
|
2015-03-07 13:02:04 +00:00
|
|
|
|
2015-05-19 13:30:09 +00:00
|
|
|
uniq(fp[0], fp[1]);
|
|
|
|
uniqfinish(fp[1]);
|
|
|
|
|
2015-05-24 23:33:19 +00:00
|
|
|
ret |= fshut(fp[0], fname[0]) | fshut(fp[1], fname[1]);
|
|
|
|
|
|
|
|
return ret;
|
2015-03-07 13:02:04 +00:00
|
|
|
}
|