Add tablist support and a mandoc-manpage to expand(1)

and mark it as finished in the README.

This is another example showing how broken the GNU coreutils are:

$ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
äää    üüü    ööö
$ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
äää  üüü  ööö

This is due to the fact that they are still not UTF8-aware and
actually see "ä" as two single characters, expanding the "äää" with
4 spaces to a tab of length 10.
The correct way however is to expand the "äää" with 2 spaces to a
tab of length 5.
One can only imagine how this silently breaks a lot of code around
the world.
WHAT WERE THEY THINKING?
This commit is contained in:
FRIGN 2015-01-25 14:31:02 +01:00
parent 48bf88851a
commit 692c11bf2b
3 changed files with 143 additions and 77 deletions

2
README
View File

@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= du no -H, -L, (-x)
=* echo yes none
=* env yes none
# expand yes none
#* expand yes none
expr yes none
=* false yes none
fold yes none

View File

@ -1,25 +1,50 @@
.TH EXPAND 1 sbase\-VERSION
.SH NAME
expand \- expand tabs to spaces
.SH SYNOPSIS
.B expand
.RB [ \-t
.IR n ]
.RI [ file ...]
.SH DESCRIPTION
expand processes the named files or the standard input, writing the
standard output with tabs changed into spaces. Backspace characters
are preserved into the output and decrement the column count for tab
calculations.
.SH OPTIONS
.TP
.BI \-i
Only change tabs to spaces at the start of lines.
.TP
.BI \-t " n"
Expand tabs to
.I n
spaces. We currently support only a single numerical argument.
.SH SEE ALSO
.IR unexpand (1),
.IR fold (1)
.Dd January 25, 2015
.Dt EXPAND 1 sbase\-VERSION
.Sh NAME
.Nm expand
.Nd expand tabs to spaces
.Sh SYNOPSIS
.Nm expand
.Op Fl i
.Op Fl t Ar tablist
.Op Ar file ...
.Sh DESCRIPTION
.Nm
converts tabs to spaces in each
.Ar file
as specified in
.Ar tablist .
If no file is given,
.Nm
reads from stdin.
.Pp
Backspace characters are preserved and decrement the column count
for tab calculations.
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl i
Only expand tabs at the beginning of lines, i.e. expand each
line until a character different from '\et' and ' ' is reached.
.It Fl t Ar tablist
Specify tab size or tabstops.
.Ar tablist
is a list of one (in the former case) or multiple (in the latter case)
strictly positive integers separated by ' ' or ','.
.Pp
The default
.Ar tablist
is "8".
.El
.Sh SEE ALSO
.Xr unexpand 1 ,
.Xr fold 1
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.
.Pp
The
.Op Fl i
flag is an extension to that specification

141
expand.c
View File

@ -1,89 +1,86 @@
/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "utf.h"
#include "util.h"
static int expand(const char *, FILE *, int);
static int iflag = 0;
static size_t *tablist = NULL;
static size_t tablistlen = 0;
static void
usage(void)
static size_t
parselist(const char *s, size_t slen)
{
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
}
size_t i, m, len;
char *sep;
int
main(int argc, char *argv[])
{
FILE *fp;
int tabstop = 8;
int ret = 0;
if (s[0] == ',' || s[0] == ' ')
eprintf("expand: tablist can't begin with a ',' or ' '.\n");
if (s[slen - 1] == ',' || s[slen - 1] == ' ')
eprintf("expand: tablist can't end with a ',' or ' '.\n");
ARGBEGIN {
case 'i':
iflag = 1;
break;
case 't':
tabstop = estrtol(EARGF(usage()), 0);
if (!tabstop)
eprintf("tab size cannot be zero\n");
break;
default:
usage();
} ARGEND;
if (argc == 0) {
expand("<stdin>", stdin, tabstop);
} else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp, tabstop);
fclose(fp);
len = 1;
for (i = 0; i < slen; i++) {
if (s[i] == ',' || s[i] == ' ') {
if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
eprintf("expand: empty field in tablist.\n");
len++;
}
}
return ret;
tablist = emalloc((len + 1) * sizeof(size_t));
m = 0;
for (i = 0; i < slen; i += sep - (s + i) + 1) {
tablist[m++] = strtol(s + i, &sep, 0);
if (tablist[m - 1] == 0)
eprintf("expand: tab size can't be zero.\n");
if (*sep && *sep != ',' && *sep != ' ')
eprintf("expand: invalid number in tablist.\n");
if (m > 1 && tablist[m - 1] < tablist[m - 2])
eprintf("expand: tablist must be ascending.\n");
}
/* tab length = 1 for the overflowing case later in the matcher */
tablist[len] = 1;
return len;
}
static int
expand(const char *file, FILE *fp, int tabstop)
expand(const char *file, FILE *fp)
{
int col = 0;
size_t bol = 1, col = 0, i;
Rune r;
int bol = 1;
for (;;) {
if (!readrune(file, fp, &r))
break;
while (readrune(file, fp, &r)) {
switch (r) {
case '\t':
if (tablistlen == 1)
i = 0;
else for (i = 0; i < tablistlen; i++)
if (col < tablist[i])
break;
if (bol || !iflag) {
do {
col++;
putchar(' ');
} while (col % tabstop);
} while (col % tablist[i]);
} else {
putchar('\t');
col += tabstop - col % tabstop;
col = tablist[i];
}
break;
case '\b':
bol = 0;
if (col)
col--;
bol = 0;
writerune("<stdout>", stdout, &r);
putchar('\b');
break;
case '\n':
col = 0;
bol = 1;
writerune("<stdout>", stdout, &r);
col = 0;
putchar('\n');
break;
default:
col++;
@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
return 0;
}
static void
usage(void)
{
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp;
char *tl = "8";
int ret = 0;
ARGBEGIN {
case 'i':
iflag = 1;
break;
case 't':
tl = EARGF(usage());
if (!*tl)
eprintf("expand: tablist cannot be empty.\n");
break;
default:
usage();
} ARGEND;
tablistlen = parselist(tl, strlen(tl));
if (argc == 0)
expand("<stdin>", stdin);
else {
for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]);
ret = 1;
continue;
}
expand(argv[0], fp);
fclose(fp);
}
}
return ret;
}