Add tablist support and a mandoc-manpage to expand(1)
and mark it as finished in the README. This is another example showing how broken the GNU coreutils are: $ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20" äää üüü ööö $ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20" äää üüü ööö This is due to the fact that they are still not UTF8-aware and actually see "ä" as two single characters, expanding the "äää" with 4 spaces to a tab of length 10. The correct way however is to expand the "äää" with 2 spaces to a tab of length 5. One can only imagine how this silently breaks a lot of code around the world. WHAT WERE THEY THINKING?
This commit is contained in:
143
expand.c
143
expand.c
@@ -1,89 +1,86 @@
|
||||
/* See LICENSE file for copyright and license details. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "utf.h"
|
||||
#include "util.h"
|
||||
|
||||
static int expand(const char *, FILE *, int);
|
||||
static int iflag = 0;
|
||||
static size_t *tablist = NULL;
|
||||
static size_t tablistlen = 0;
|
||||
|
||||
static int iflag = 0;
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
static size_t
|
||||
parselist(const char *s, size_t slen)
|
||||
{
|
||||
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
|
||||
}
|
||||
size_t i, m, len;
|
||||
char *sep;
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
int tabstop = 8;
|
||||
int ret = 0;
|
||||
if (s[0] == ',' || s[0] == ' ')
|
||||
eprintf("expand: tablist can't begin with a ',' or ' '.\n");
|
||||
if (s[slen - 1] == ',' || s[slen - 1] == ' ')
|
||||
eprintf("expand: tablist can't end with a ',' or ' '.\n");
|
||||
|
||||
ARGBEGIN {
|
||||
case 'i':
|
||||
iflag = 1;
|
||||
break;
|
||||
case 't':
|
||||
tabstop = estrtol(EARGF(usage()), 0);
|
||||
if (!tabstop)
|
||||
eprintf("tab size cannot be zero\n");
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
|
||||
if (argc == 0) {
|
||||
expand("<stdin>", stdin, tabstop);
|
||||
} else {
|
||||
for (; argc > 0; argc--, argv++) {
|
||||
if (!(fp = fopen(argv[0], "r"))) {
|
||||
weprintf("fopen %s:", argv[0]);
|
||||
ret = 1;
|
||||
continue;
|
||||
}
|
||||
expand(argv[0], fp, tabstop);
|
||||
fclose(fp);
|
||||
len = 1;
|
||||
for (i = 0; i < slen; i++) {
|
||||
if (s[i] == ',' || s[i] == ' ') {
|
||||
if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
|
||||
eprintf("expand: empty field in tablist.\n");
|
||||
len++;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
tablist = emalloc((len + 1) * sizeof(size_t));
|
||||
|
||||
m = 0;
|
||||
for (i = 0; i < slen; i += sep - (s + i) + 1) {
|
||||
tablist[m++] = strtol(s + i, &sep, 0);
|
||||
if (tablist[m - 1] == 0)
|
||||
eprintf("expand: tab size can't be zero.\n");
|
||||
if (*sep && *sep != ',' && *sep != ' ')
|
||||
eprintf("expand: invalid number in tablist.\n");
|
||||
if (m > 1 && tablist[m - 1] < tablist[m - 2])
|
||||
eprintf("expand: tablist must be ascending.\n");
|
||||
}
|
||||
|
||||
/* tab length = 1 for the overflowing case later in the matcher */
|
||||
tablist[len] = 1;
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
expand(const char *file, FILE *fp, int tabstop)
|
||||
expand(const char *file, FILE *fp)
|
||||
{
|
||||
int col = 0;
|
||||
size_t bol = 1, col = 0, i;
|
||||
Rune r;
|
||||
int bol = 1;
|
||||
|
||||
for (;;) {
|
||||
if (!readrune(file, fp, &r))
|
||||
break;
|
||||
|
||||
while (readrune(file, fp, &r)) {
|
||||
switch (r) {
|
||||
case '\t':
|
||||
if (tablistlen == 1)
|
||||
i = 0;
|
||||
else for (i = 0; i < tablistlen; i++)
|
||||
if (col < tablist[i])
|
||||
break;
|
||||
if (bol || !iflag) {
|
||||
do {
|
||||
col++;
|
||||
putchar(' ');
|
||||
} while (col % tabstop);
|
||||
} while (col % tablist[i]);
|
||||
} else {
|
||||
putchar('\t');
|
||||
col += tabstop - col % tabstop;
|
||||
col = tablist[i];
|
||||
}
|
||||
break;
|
||||
case '\b':
|
||||
bol = 0;
|
||||
if (col)
|
||||
col--;
|
||||
bol = 0;
|
||||
writerune("<stdout>", stdout, &r);
|
||||
putchar('\b');
|
||||
break;
|
||||
case '\n':
|
||||
col = 0;
|
||||
bol = 1;
|
||||
writerune("<stdout>", stdout, &r);
|
||||
col = 0;
|
||||
putchar('\n');
|
||||
break;
|
||||
default:
|
||||
col++;
|
||||
@@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
char *tl = "8";
|
||||
int ret = 0;
|
||||
|
||||
ARGBEGIN {
|
||||
case 'i':
|
||||
iflag = 1;
|
||||
break;
|
||||
case 't':
|
||||
tl = EARGF(usage());
|
||||
if (!*tl)
|
||||
eprintf("expand: tablist cannot be empty.\n");
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
} ARGEND;
|
||||
|
||||
tablistlen = parselist(tl, strlen(tl));
|
||||
|
||||
if (argc == 0)
|
||||
expand("<stdin>", stdin);
|
||||
else {
|
||||
for (; argc > 0; argc--, argv++) {
|
||||
if (!(fp = fopen(argv[0], "r"))) {
|
||||
weprintf("fopen %s:", argv[0]);
|
||||
ret = 1;
|
||||
continue;
|
||||
}
|
||||
expand(argv[0], fp);
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user