Add tablist support and a mandoc-manpage to expand(1)

and mark it as finished in the README.

This is another example showing how broken the GNU coreutils are:

$ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
äää    üüü    ööö
$ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
äää  üüü  ööö

This is due to the fact that they are still not UTF8-aware and
actually see "ä" as two single characters, expanding the "äää" with
4 spaces to a tab of length 10.
The correct way however is to expand the "äää" with 2 spaces to a
tab of length 5.
One can only imagine how this silently breaks a lot of code around
the world.
WHAT WERE THEY THINKING?
This commit is contained in:
FRIGN
2015-01-25 14:31:02 +01:00
parent 48bf88851a
commit 692c11bf2b
3 changed files with 143 additions and 77 deletions

143
expand.c
View File

@@ -1,89 +1,86 @@
/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "utf.h"
#include "util.h"
static int expand(const char *, FILE *, int);
static int iflag = 0;
static size_t *tablist = NULL;
static size_t tablistlen = 0;
static int iflag = 0;
static void
usage(void)
static size_t
parselist(const char *s, size_t slen)
{
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
}
size_t i, m, len;
char *sep;
int
main(int argc, char *argv[])
{
FILE *fp;
int tabstop = 8;
int ret = 0;
if (s[0] == ',' || s[0] == ' ')
eprintf("expand: tablist can't begin with a ',' or ' '.\n");
if (s[slen - 1] == ',' || s[slen - 1] == ' ')
eprintf("expand: tablist can't end with a ',' or ' '.\n");
ARGBEGIN {
case 'i':
iflag = 1;
break;
case 't':
tabstop = estrtol(EARGF(usage()), 0);
if (!tabstop)
eprintf("tab size cannot be zero\n");
break;
default:
usage();
} ARGEND;
if (argc == 0) {
expand("<stdin>", stdin, tabstop);
} else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp, tabstop);
fclose(fp);
len = 1;
for (i = 0; i < slen; i++) {
if (s[i] == ',' || s[i] == ' ') {
if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
eprintf("expand: empty field in tablist.\n");
len++;
}
}
return ret;
tablist = emalloc((len + 1) * sizeof(size_t));
m = 0;
for (i = 0; i < slen; i += sep - (s + i) + 1) {
tablist[m++] = strtol(s + i, &sep, 0);
if (tablist[m - 1] == 0)
eprintf("expand: tab size can't be zero.\n");
if (*sep && *sep != ',' && *sep != ' ')
eprintf("expand: invalid number in tablist.\n");
if (m > 1 && tablist[m - 1] < tablist[m - 2])
eprintf("expand: tablist must be ascending.\n");
}
/* tab length = 1 for the overflowing case later in the matcher */
tablist[len] = 1;
return len;
}
static int
expand(const char *file, FILE *fp, int tabstop)
expand(const char *file, FILE *fp)
{
int col = 0;
size_t bol = 1, col = 0, i;
Rune r;
int bol = 1;
for (;;) {
if (!readrune(file, fp, &r))
break;
while (readrune(file, fp, &r)) {
switch (r) {
case '\t':
if (tablistlen == 1)
i = 0;
else for (i = 0; i < tablistlen; i++)
if (col < tablist[i])
break;
if (bol || !iflag) {
do {
col++;
putchar(' ');
} while (col % tabstop);
} while (col % tablist[i]);
} else {
putchar('\t');
col += tabstop - col % tabstop;
col = tablist[i];
}
break;
case '\b':
bol = 0;
if (col)
col--;
bol = 0;
writerune("<stdout>", stdout, &r);
putchar('\b');
break;
case '\n':
col = 0;
bol = 1;
writerune("<stdout>", stdout, &r);
col = 0;
putchar('\n');
break;
default:
col++;
@@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
return 0;
}
static void
usage(void)
{
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp;
char *tl = "8";
int ret = 0;
ARGBEGIN {
case 'i':
iflag = 1;
break;
case 't':
tl = EARGF(usage());
if (!*tl)
eprintf("expand: tablist cannot be empty.\n");
break;
default:
usage();
} ARGEND;
tablistlen = parselist(tl, strlen(tl));
if (argc == 0)
expand("<stdin>", stdin);
else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp);
fclose(fp);
}
}
return ret;
}