Add Unicode character class support

Thinking about it long enough, the solution seems almost trivial.
This commit is contained in:
FRIGN 2015-01-11 20:26:20 +01:00 committed by sin
parent 369bb01eb1
commit 09704afc24
1 changed files with 43 additions and 34 deletions

77
tr.c
View File

@ -1,3 +1,4 @@
#include <wctype.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -14,34 +15,31 @@ struct range {
size_t quant; size_t quant;
}; };
#define DIGIT "0-9"
#define UPPER "A-Z"
#define LOWER "a-z"
#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
#define ALNUM DIGIT UPPER LOWER
static struct { static struct {
char *name; char *name;
char *str; int (*check)(wint_t);
} classes[] = { } classes[] = {
{ "alnum", ALNUM }, { "alnum", iswalnum },
{ "alpha", UPPER LOWER }, { "alpha", iswalpha },
{ "blank", " \t" }, { "blank", iswblank },
{ "cntrl", "\000-\037\177" }, { "cntrl", iswcntrl },
{ "digit", DIGIT }, { "digit", iswdigit },
{ "graph", ALNUM PUNCT }, { "graph", iswgraph },
{ "lower", LOWER }, { "lower", iswlower },
{ "print", ALNUM PUNCT " " }, { "print", iswlower },
{ "punct", PUNCT }, { "punct", iswpunct },
{ "space", "\t\n\v\f\r" }, { "space", iswspace },
{ "upper", UPPER }, { "upper", iswupper },
{ "xdigit", DIGIT "A-Fa-f" }, { "xdigit", iswxdigit },
}; };
static struct range *set1 = NULL; static struct range *set1 = NULL;
static size_t set1ranges = 0; static size_t set1ranges = 0;
static struct range *set2 = NULL; static int (*set1check)(wint_t) = NULL;
static size_t set2ranges = 0; static struct range *set2 = NULL;
static size_t set2ranges = 0;
static int (*set2check)(wint_t) = NULL;
static size_t static size_t
rangelen(struct range r) rangelen(struct range r)
@ -72,16 +70,13 @@ rstrmatch(Rune *r, char *s, size_t n)
} }
static size_t static size_t
makeset(char *str, struct range **set) makeset(char *str, struct range **set, int (**check)(wint_t))
{ {
Rune *rstr; Rune *rstr;
size_t len, i, j, m, n; size_t len, i, j, m, n;
size_t q, setranges; size_t q, setranges = 0;
int factor, base; int factor, base;
reset:
setranges = 0;
/* rstr defines at most len ranges */ /* rstr defines at most len ranges */
len = chartorunearr(str, &rstr); len = chartorunearr(str, &rstr);
*set = emalloc(len * sizeof(**set)); *set = emalloc(len * sizeof(**set));
@ -111,8 +106,8 @@ nextbrack:
if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') { if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
for (n = 0; n < LEN(classes); n++) { for (n = 0; n < LEN(classes); n++) {
if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) { if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
str = classes[n].str; *check = classes[n].check;
goto reset; return 0;
} }
} }
eprintf("Invalid character class\n"); eprintf("Invalid character class\n");
@ -193,10 +188,10 @@ main(int argc, char *argv[])
if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag)) if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag))
usage(); usage();
set1ranges = makeset(argv[0], &set1); set1ranges = makeset(argv[0], &set1, &set1check);
if (argc == 2) if (argc == 2)
set2ranges = makeset(argv[1], &set2); set2ranges = makeset(argv[1], &set2, &set2check);
if (!dflag && !set2ranges) if (dflag == sflag && !set2ranges && !set2check)
eprintf("set2 must be non-empty\n"); eprintf("set2 must be non-empty\n");
read: read:
if (!readrune("<stdin>", stdin, &r)) if (!readrune("<stdin>", stdin, &r))
@ -232,6 +227,20 @@ read:
goto write; goto write;
} }
} }
if (set1check && set1check(r)) {
if (dflag && !cflag)
goto read;
if (sflag) {
if (r == lastrune)
goto read;
else
goto write;
}
if (set1check == iswupper && set2check == iswlower)
r = towlower(r);
if (set1check == iswlower && set2check == iswupper)
r = towupper(r);
}
if (dflag && cflag) if (dflag && cflag)
goto read; goto read;
if (dflag && sflag && r == lastrune) if (dflag && sflag && r == lastrune)