From 8ab096d2a4213d9676f9531474afda0c73a7c04a Mon Sep 17 00:00:00 2001 From: FRIGN Date: Sun, 1 Feb 2015 03:01:11 +0100 Subject: [PATCH] Finish up wc(1) Use size_t for all counts, fix the manpage and refactor the code. Here's yet another place where GNU coreutils fail: sbase: $ echo "GNU/Turd sucks" | wc -cm 15 coreutils: $ echo "GNU/Turd sucks" | wc -cm 15 15 Take a bloody guess which behaviour is correct[0]. [0]: http://pubs.opengroup.org/onlinepubs/009604499/utilities/wc.html --- README | 2 +- wc.1 | 35 +++++++++++---------- wc.c | 97 ++++++++++++++++++++++++++++------------------------------ 3 files changed, 66 insertions(+), 68 deletions(-) diff --git a/README b/README index f465574..12ffd9c 100644 --- a/README +++ b/README @@ -82,7 +82,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, = sha1sum non-posix none = sha256sum non-posix none = sha512sum non-posix none - wc yes none +#* wc yes none = xargs no -I, -L, -p, -s, -t, -x =* yes non-posix none diff --git a/wc.1 b/wc.1 index 772d814..c18b210 100644 --- a/wc.1 +++ b/wc.1 @@ -1,4 +1,4 @@ -.Dd January 30, 2015 +.Dd February 1, 2015 .Dt WC 1 .Os sbase .Sh NAME @@ -6,26 +6,27 @@ .Nd word count .Sh SYNOPSIS .Nm -.Op Fl clmw +.Op Fl c | Fl m +.Op Fl lw .Op Ar file ... .Sh DESCRIPTION .Nm -prints the number of lines, words, and bytes in each file. If any flags are -given, +prints the number of lines, words and bytes in each +.Ar file , +unless set differently with flags. +If no +.Ar file +is given .Nm -will print only the requested information. If no -.Ar files -are given, -.Nm -reads stdin. +reads from stdin. .Sh OPTIONS .Bl -tag -width Ds -.It Fl c -print the number of bytes. -.It Fl l -print the number of lines. -.It Fl m -print the number of characters, not bytes. -.It Fl w -print the number of words. +.It Fl c | Fl l | Fl m | Fl w +Print the number of bytes | lines | characters | words. .El +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. diff --git a/wc.c b/wc.c index f2a7206..f283e1b 100644 --- a/wc.c +++ b/wc.c @@ -6,18 +6,58 @@ #include "util.h" -static void output(const char *, long, long, long); -static void wc(FILE *, const char *); +static int lflag = 0; +static int wflag = 0; +static char cmode = 0; +static size_t tc = 0, tl = 0, tw = 0; -static int lflag = 0; -static int wflag = 0; -static char cmode = 0; -static long tc = 0, tl = 0, tw = 0; +void +output(const char *str, size_t nc, size_t nl, size_t nw) +{ + int noflags = !cmode && !lflag && !wflag; + + if (lflag || noflags) + printf(" %5zu", nl); + if (wflag || noflags) + printf(" %5zu", nw); + if (cmode || noflags) + printf(" %5zu", nc); + if (str) + printf(" %s", str); + putchar('\n'); +} + +void +wc(FILE *fp, const char *str) +{ + int word = 0; + int c; + size_t nc = 0, nl = 0, nw = 0; + + while ((c = getc(fp)) != EOF) { + if (cmode != 'm' || UTF8_POINT(c)) + nc++; + if (c == '\n') + nl++; + if (!isspace(c)) + word = 1; + else if (word) { + word = 0; + nw++; + } + } + if (word) + nw++; + tc += nc; + tl += nl; + tw += nw; + output(str, nc, nl, nw); +} static void usage(void) { - eprintf("usage: %s [-clmw] [files...]\n", argv0); + eprintf("usage: %s [-c | -m] [-lw] [file ...]\n", argv0); } int @@ -59,46 +99,3 @@ main(int argc, char *argv[]) } return 0; } - -void -output(const char *str, long nc, long nl, long nw) -{ - int noflags = !cmode && !lflag && !wflag; - - if (lflag || noflags) - printf(" %5ld", nl); - if (wflag || noflags) - printf(" %5ld", nw); - if (cmode || noflags) - printf(" %5ld", nc); - if (str) - printf(" %s", str); - putchar('\n'); -} - -void -wc(FILE *fp, const char *str) -{ - int word = 0; - int c; - long nc = 0, nl = 0, nw = 0; - - while ((c = getc(fp)) != EOF) { - if (cmode != 'm' || UTF8_POINT(c)) - nc++; - if (c == '\n') - nl++; - if (!isspace(c)) - word = 1; - else if (word) { - word = 0; - nw++; - } - } - if (word) - nw++; - tc += nc; - tl += nl; - tw += nw; - output(str, nc, nl, nw); -}