Add even stricter UTF-8-support to wc(1)
using readrune() and iswspace(). musl for instance doesn't differentiate between iswspace() and isspace(), but when it does, the code will be ready. It goes without saying that GNU coreutils don't use iswspace()[0]. [0]: http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/wc.c
This commit is contained in:
parent
696bb992c3
commit
986a9de51a
14
wc.c
14
wc.c
|
@ -3,7 +3,9 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <wctype.h>
|
||||||
|
|
||||||
|
#include "utf.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
static int lflag = 0;
|
static int lflag = 0;
|
||||||
|
@ -30,16 +32,16 @@ output(const char *str, size_t nc, size_t nl, size_t nw)
|
||||||
void
|
void
|
||||||
wc(FILE *fp, const char *str)
|
wc(FILE *fp, const char *str)
|
||||||
{
|
{
|
||||||
int word = 0;
|
int word = 0, read;
|
||||||
int c;
|
Rune c;
|
||||||
size_t nc = 0, nl = 0, nw = 0;
|
size_t nc = 0, nl = 0, nw = 0;
|
||||||
|
|
||||||
while ((c = getc(fp)) != EOF) {
|
while ((read = readrune(str, fp, &c))) {
|
||||||
if (cmode != 'm' || UTF8_POINT(c))
|
nc += (cmode == 'c') ? read :
|
||||||
nc++;
|
(c != Runeerror) ? 1 : 0;
|
||||||
if (c == '\n')
|
if (c == '\n')
|
||||||
nl++;
|
nl++;
|
||||||
if (!isspace(c))
|
if (!iswspace(c))
|
||||||
word = 1;
|
word = 1;
|
||||||
else if (word) {
|
else if (word) {
|
||||||
word = 0;
|
word = 0;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user