Make sort(1) utf-compliant and update README

Make it clear that <blank> characters just are spaces or tabs and
not a special group which needs special treatment for wide characters.

Also, and that was the only problem here, correctly calculate the
offset given by the key definitions for the start- and end-characters
using libutf-utility-functions.

Mark the progress in the README and put parentheses around the missing
flags which are insane to implement for no real gain.
This commit is contained in:
FRIGN 2015-08-03 17:35:01 +02:00 committed by sin
parent 1622089a21
commit e153447657
2 changed files with 19 additions and 8 deletions

2
README
View File

@ -69,7 +69,7 @@ The following tools are implemented:
=*|x sha256sum . =*|x sha256sum .
=*|x sha512sum . =*|x sha512sum .
=*|o sleep . =*|o sleep .
sort -d, -f, -i # sort (-d, -f, -i)
=*|o split . =*|o split .
=*|x sponge . =*|x sponge .
#*|o strings . #*|o strings .

25
sort.c
View File

@ -6,6 +6,7 @@
#include "queue.h" #include "queue.h"
#include "text.h" #include "text.h"
#include "utf.h"
#include "util.h" #include "util.h"
struct keydef { struct keydef {
@ -43,7 +44,7 @@ static size_t col1siz, col2siz;
static char * static char *
skipblank(char *s) skipblank(char *s)
{ {
while (isblank(*s)) while (*s == ' ' || *s == '\t')
s++; s++;
return s; return s;
} }
@ -51,7 +52,7 @@ skipblank(char *s)
static char * static char *
skipnonblank(char *s) skipnonblank(char *s)
{ {
while (*s && *s != '\n' && !isblank(*s)) while (*s && *s != '\n' && *s != ' ' && *s != '\t')
s++; s++;
return s; return s;
} }
@ -74,25 +75,35 @@ skipcolumn(char *s, char *eol, int next_col)
static size_t static size_t
columns(char *line, const struct keydef *kd, char **col, size_t *colsiz) columns(char *line, const struct keydef *kd, char **col, size_t *colsiz)
{ {
Rune r;
char *start, *end, *eol = strchr(line, '\n'); char *start, *end, *eol = strchr(line, '\n');
size_t len; size_t len, utflen, rlen;
int i; int i;
for (i = 1, start = line; i < kd->start_column; i++) for (i = 1, start = line; i < kd->start_column; i++)
start = skipcolumn(start, eol, 1); start = skipcolumn(start, eol, 1);
if (kd->flags & MOD_STARTB) if (kd->flags & MOD_STARTB)
start = skipblank(start); start = skipblank(start);
start = MIN(eol, start + kd->start_char - 1); for (utflen = 0; start < eol && utflen < kd->start_char - 1;) {
rlen = chartorune(&r, start);
start += rlen;
utflen++;
}
if (kd->end_column) { if (kd->end_column) {
for (i = 1, end = line; i < kd->end_column; i++) for (i = 1, end = line; i < kd->end_column; i++)
end = skipcolumn(end, eol, 1); end = skipcolumn(end, eol, 1);
if (kd->flags & MOD_ENDB) if (kd->flags & MOD_ENDB)
end = skipblank(end); end = skipblank(end);
if (kd->end_char) if (kd->end_char) {
end = MIN(eol, end + kd->end_char); for (utflen = 0; end < eol && utflen < kd->end_char;) {
else rlen = chartorune(&r, end);
end += rlen;
utflen++;
}
} else {
end = skipcolumn(end, eol, 0); end = skipcolumn(end, eol, 0);
}
} else { } else {
end = eol; end = eol;
} }