Finish up wc(1)

Use size_t for all counts, fix the manpage and refactor the code.
Here's yet another place where GNU coreutils fail:

sbase:
$ echo "GNU/Turd sucks" | wc -cm
    15

coreutils:
$ echo "GNU/Turd sucks" | wc -cm
     15      15

Take a bloody guess which behaviour is correct[0].

[0]: http://pubs.opengroup.org/onlinepubs/009604499/utilities/wc.html
This commit is contained in:
FRIGN
2015-02-01 03:01:11 +01:00
parent d75cc2e556
commit 8ab096d2a4
3 changed files with 66 additions and 68 deletions

97
wc.c
View File

@@ -6,18 +6,58 @@
#include "util.h"
static void output(const char *, long, long, long);
static void wc(FILE *, const char *);
static int lflag = 0;
static int wflag = 0;
static char cmode = 0;
static size_t tc = 0, tl = 0, tw = 0;
static int lflag = 0;
static int wflag = 0;
static char cmode = 0;
static long tc = 0, tl = 0, tw = 0;
void
output(const char *str, size_t nc, size_t nl, size_t nw)
{
int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5zu", nl);
if (wflag || noflags)
printf(" %5zu", nw);
if (cmode || noflags)
printf(" %5zu", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
size_t nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}
static void
usage(void)
{
eprintf("usage: %s [-clmw] [files...]\n", argv0);
eprintf("usage: %s [-c | -m] [-lw] [file ...]\n", argv0);
}
int
@@ -59,46 +99,3 @@ main(int argc, char *argv[])
}
return 0;
}
void
output(const char *str, long nc, long nl, long nw)
{
int noflags = !cmode && !lflag && !wflag;
if (lflag || noflags)
printf(" %5ld", nl);
if (wflag || noflags)
printf(" %5ld", nw);
if (cmode || noflags)
printf(" %5ld", nc);
if (str)
printf(" %s", str);
putchar('\n');
}
void
wc(FILE *fp, const char *str)
{
int word = 0;
int c;
long nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) {
if (cmode != 'm' || UTF8_POINT(c))
nc++;
if (c == '\n')
nl++;
if (!isspace(c))
word = 1;
else if (word) {
word = 0;
nw++;
}
}
if (word)
nw++;
tc += nc;
tl += nl;
tw += nw;
output(str, nc, nl, nw);
}