Add even stricter UTF-8-support to wc(1)

using readrune() and iswspace().
musl for instance doesn't differentiate between iswspace() and
isspace(), but when it does, the code will be ready.
It goes without saying that GNU coreutils don't use iswspace()[0].

[0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c
This commit is contained in:
FRIGN 2015-02-01 04:06:06 +01:00
parent 696bb992c3
commit 986a9de51a

14
wc.c
View File

@ -3,7 +3,9 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
#include <wctype.h>
#include "utf.h"
#include "util.h" #include "util.h"
static int lflag = 0; static int lflag = 0;
@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
void void
wc(FILE *fp, const char *str) wc(FILE *fp, const char *str)
{ {
int word = 0; int word = 0, read;
int c; Rune c;
size_t nc = 0, nl = 0, nw = 0; size_t nc = 0, nl = 0, nw = 0;
while ((c = getc(fp)) != EOF) { while ((read = readrune(str, fp, &c))) {
if (cmode != 'm' || UTF8_POINT(c)) nc += (cmode == 'c') ? read :
nc++; (c != Runeerror) ? 1 : 0;
if (c == '\n') if (c == '\n')
nl++; nl++;
if (!isspace(c)) if (!iswspace(c))
word = 1; word = 1;
else if (word) { else if (word) {
word = 0; word = 0;