Make sort(1) utf-compliant and update README

Make it clear that <blank> characters just are spaces or tabs and not a special group which needs special treatment for wide characters. Also, and that was the only problem here, correctly calculate the offset given by the key definitions for the start- and end-characters using libutf-utility-functions. Mark the progress in the README and put parentheses around the missing flags which are insane to implement for no real gain.
2015-08-03 17:35:01 +02:00
parent 1622089a21
commit e153447657
2 changed files with 19 additions and 8 deletions
@@ -69,7 +69,7 @@ The following tools are implemented:
 =*|x sha256sum   .
 =*|x sha512sum   .
 =*|o sleep       .
-     sort        -d, -f, -i
+#    sort        (-d, -f, -i)
 =*|o split       .
 =*|x sponge      .
 #*|o strings     .
@@ -6,6 +6,7 @@
 #include "queue.h"
 #include "text.h"
 #include "utf.h"
 #include "util.h"
 struct keydef {
@@ -43,7 +44,7 @@ static size_t col1siz, col2siz;
 static char *
 skipblank(char *s)
 {
-	while (isblank(*s))
+	while (*s == ' ' || *s == '\t')
 		s++;
 	return s;
 }
@@ -51,7 +52,7 @@ skipblank(char *s)
 static char *
 skipnonblank(char *s)
 {
-	while (*s && *s != '\n' && !isblank(*s))
+	while (*s && *s != '\n' && *s != ' ' && *s != '\t')
 		s++;
 	return s;
 }
@@ -74,25 +75,35 @@ skipcolumn(char *s, char *eol, int next_col)
 static size_t
 columns(char *line, const struct keydef *kd, char **col, size_t *colsiz)
 {
 	Rune r;
 	char *start, *end, *eol = strchr(line, '\n');
-	size_t len;
+	size_t len, utflen, rlen;
 	int i;
 	for (i = 1, start = line; i < kd->start_column; i++)
 		start = skipcolumn(start, eol, 1);
 	if (kd->flags & MOD_STARTB)
 		start = skipblank(start);
-	start = MIN(eol, start + kd->start_char - 1);
+	for (utflen = 0; start < eol && utflen < kd->start_char - 1;) {
 		rlen = chartorune(&r, start);
 		start += rlen;
 		utflen++;
 	}
 	if (kd->end_column) {
 		for (i = 1, end = line; i < kd->end_column; i++)
 			end = skipcolumn(end, eol, 1);
 		if (kd->flags & MOD_ENDB)
 			end = skipblank(end);
-		if (kd->end_char)
+		if (kd->end_char) {
-			end = MIN(eol, end + kd->end_char);
+			for (utflen = 0; end < eol && utflen < kd->end_char;) {
-		else
+				rlen = chartorune(&r, end);
 				end += rlen;
 				utflen++;
 			}
 		} else {
 			end = skipcolumn(end, eol, 0);
 		}
 	} else {
 		end = eol;
 	}