Add tablist support and a mandoc-manpage to expand(1)
and mark it as finished in the README. This is another example showing how broken the GNU coreutils are: $ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20" äää üüü ööö $ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20" äää üüü ööö This is due to the fact that they are still not UTF8-aware and actually see "ä" as two single characters, expanding the "äää" with 4 spaces to a tab of length 10. The correct way however is to expand the "äää" with 2 spaces to a tab of length 5. One can only imagine how this silently breaks a lot of code around the world. WHAT WERE THEY THINKING?
This commit is contained in:
parent
48bf88851a
commit
692c11bf2b
2
README
2
README
|
@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
||||||
= du no -H, -L, (-x)
|
= du no -H, -L, (-x)
|
||||||
=* echo yes none
|
=* echo yes none
|
||||||
=* env yes none
|
=* env yes none
|
||||||
# expand yes none
|
#* expand yes none
|
||||||
expr yes none
|
expr yes none
|
||||||
=* false yes none
|
=* false yes none
|
||||||
fold yes none
|
fold yes none
|
||||||
|
|
75
expand.1
75
expand.1
|
@ -1,25 +1,50 @@
|
||||||
.TH EXPAND 1 sbase\-VERSION
|
.Dd January 25, 2015
|
||||||
.SH NAME
|
.Dt EXPAND 1 sbase\-VERSION
|
||||||
expand \- expand tabs to spaces
|
.Sh NAME
|
||||||
.SH SYNOPSIS
|
.Nm expand
|
||||||
.B expand
|
.Nd expand tabs to spaces
|
||||||
.RB [ \-t
|
.Sh SYNOPSIS
|
||||||
.IR n ]
|
.Nm expand
|
||||||
.RI [ file ...]
|
.Op Fl i
|
||||||
.SH DESCRIPTION
|
.Op Fl t Ar tablist
|
||||||
expand processes the named files or the standard input, writing the
|
.Op Ar file ...
|
||||||
standard output with tabs changed into spaces. Backspace characters
|
.Sh DESCRIPTION
|
||||||
are preserved into the output and decrement the column count for tab
|
.Nm
|
||||||
calculations.
|
converts tabs to spaces in each
|
||||||
.SH OPTIONS
|
.Ar file
|
||||||
.TP
|
as specified in
|
||||||
.BI \-i
|
.Ar tablist .
|
||||||
Only change tabs to spaces at the start of lines.
|
If no file is given,
|
||||||
.TP
|
.Nm
|
||||||
.BI \-t " n"
|
reads from stdin.
|
||||||
Expand tabs to
|
.Pp
|
||||||
.I n
|
Backspace characters are preserved and decrement the column count
|
||||||
spaces. We currently support only a single numerical argument.
|
for tab calculations.
|
||||||
.SH SEE ALSO
|
.Sh OPTIONS
|
||||||
.IR unexpand (1),
|
.Bl -tag -width Ds
|
||||||
.IR fold (1)
|
.It Fl i
|
||||||
|
Only expand tabs at the beginning of lines, i.e. expand each
|
||||||
|
line until a character different from '\et' and ' ' is reached.
|
||||||
|
.It Fl t Ar tablist
|
||||||
|
Specify tab size or tabstops.
|
||||||
|
.Ar tablist
|
||||||
|
is a list of one (in the former case) or multiple (in the latter case)
|
||||||
|
strictly positive integers separated by ' ' or ','.
|
||||||
|
.Pp
|
||||||
|
The default
|
||||||
|
.Ar tablist
|
||||||
|
is "8".
|
||||||
|
.El
|
||||||
|
.Sh SEE ALSO
|
||||||
|
.Xr unexpand 1 ,
|
||||||
|
.Xr fold 1
|
||||||
|
.Sh STANDARDS
|
||||||
|
The
|
||||||
|
.Nm
|
||||||
|
utility is compliant with the
|
||||||
|
.St -p1003.1-2008
|
||||||
|
specification.
|
||||||
|
.Pp
|
||||||
|
The
|
||||||
|
.Op Fl i
|
||||||
|
flag is an extension to that specification
|
||||||
|
|
141
expand.c
141
expand.c
|
@ -1,89 +1,86 @@
|
||||||
/* See LICENSE file for copyright and license details. */
|
/* See LICENSE file for copyright and license details. */
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "utf.h"
|
#include "utf.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
static int expand(const char *, FILE *, int);
|
|
||||||
|
|
||||||
static int iflag = 0;
|
static int iflag = 0;
|
||||||
|
static size_t *tablist = NULL;
|
||||||
|
static size_t tablistlen = 0;
|
||||||
|
|
||||||
static void
|
static size_t
|
||||||
usage(void)
|
parselist(const char *s, size_t slen)
|
||||||
{
|
{
|
||||||
eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
|
size_t i, m, len;
|
||||||
}
|
char *sep;
|
||||||
|
|
||||||
int
|
if (s[0] == ',' || s[0] == ' ')
|
||||||
main(int argc, char *argv[])
|
eprintf("expand: tablist can't begin with a ',' or ' '.\n");
|
||||||
{
|
if (s[slen - 1] == ',' || s[slen - 1] == ' ')
|
||||||
FILE *fp;
|
eprintf("expand: tablist can't end with a ',' or ' '.\n");
|
||||||
int tabstop = 8;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
ARGBEGIN {
|
len = 1;
|
||||||
case 'i':
|
for (i = 0; i < slen; i++) {
|
||||||
iflag = 1;
|
if (s[i] == ',' || s[i] == ' ') {
|
||||||
break;
|
if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
|
||||||
case 't':
|
eprintf("expand: empty field in tablist.\n");
|
||||||
tabstop = estrtol(EARGF(usage()), 0);
|
len++;
|
||||||
if (!tabstop)
|
|
||||||
eprintf("tab size cannot be zero\n");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
usage();
|
|
||||||
} ARGEND;
|
|
||||||
|
|
||||||
if (argc == 0) {
|
|
||||||
expand("<stdin>", stdin, tabstop);
|
|
||||||
} else {
|
|
||||||
for (; argc > 0; argc--, argv++) {
|
|
||||||
if (!(fp = fopen(argv[0], "r"))) {
|
|
||||||
weprintf("fopen %s:", argv[0]);
|
|
||||||
ret = 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
expand(argv[0], fp, tabstop);
|
|
||||||
fclose(fp);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
tablist = emalloc((len + 1) * sizeof(size_t));
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
for (i = 0; i < slen; i += sep - (s + i) + 1) {
|
||||||
|
tablist[m++] = strtol(s + i, &sep, 0);
|
||||||
|
if (tablist[m - 1] == 0)
|
||||||
|
eprintf("expand: tab size can't be zero.\n");
|
||||||
|
if (*sep && *sep != ',' && *sep != ' ')
|
||||||
|
eprintf("expand: invalid number in tablist.\n");
|
||||||
|
if (m > 1 && tablist[m - 1] < tablist[m - 2])
|
||||||
|
eprintf("expand: tablist must be ascending.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tab length = 1 for the overflowing case later in the matcher */
|
||||||
|
tablist[len] = 1;
|
||||||
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
expand(const char *file, FILE *fp, int tabstop)
|
expand(const char *file, FILE *fp)
|
||||||
{
|
{
|
||||||
int col = 0;
|
size_t bol = 1, col = 0, i;
|
||||||
Rune r;
|
Rune r;
|
||||||
int bol = 1;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (!readrune(file, fp, &r))
|
|
||||||
break;
|
|
||||||
|
|
||||||
|
while (readrune(file, fp, &r)) {
|
||||||
switch (r) {
|
switch (r) {
|
||||||
case '\t':
|
case '\t':
|
||||||
|
if (tablistlen == 1)
|
||||||
|
i = 0;
|
||||||
|
else for (i = 0; i < tablistlen; i++)
|
||||||
|
if (col < tablist[i])
|
||||||
|
break;
|
||||||
if (bol || !iflag) {
|
if (bol || !iflag) {
|
||||||
do {
|
do {
|
||||||
col++;
|
col++;
|
||||||
putchar(' ');
|
putchar(' ');
|
||||||
} while (col % tabstop);
|
} while (col % tablist[i]);
|
||||||
} else {
|
} else {
|
||||||
putchar('\t');
|
putchar('\t');
|
||||||
col += tabstop - col % tabstop;
|
col = tablist[i];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case '\b':
|
case '\b':
|
||||||
|
bol = 0;
|
||||||
if (col)
|
if (col)
|
||||||
col--;
|
col--;
|
||||||
bol = 0;
|
putchar('\b');
|
||||||
writerune("<stdout>", stdout, &r);
|
|
||||||
break;
|
break;
|
||||||
case '\n':
|
case '\n':
|
||||||
col = 0;
|
|
||||||
bol = 1;
|
bol = 1;
|
||||||
writerune("<stdout>", stdout, &r);
|
col = 0;
|
||||||
|
putchar('\n');
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
col++;
|
col++;
|
||||||
|
@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
usage(void)
|
||||||
|
{
|
||||||
|
eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
FILE *fp;
|
||||||
|
char *tl = "8";
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
ARGBEGIN {
|
||||||
|
case 'i':
|
||||||
|
iflag = 1;
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
tl = EARGF(usage());
|
||||||
|
if (!*tl)
|
||||||
|
eprintf("expand: tablist cannot be empty.\n");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage();
|
||||||
|
} ARGEND;
|
||||||
|
|
||||||
|
tablistlen = parselist(tl, strlen(tl));
|
||||||
|
|
||||||
|
if (argc == 0)
|
||||||
|
expand("<stdin>", stdin);
|
||||||
|
else {
|
||||||
|
for (; argc > 0; argc--, argv++) {
|
||||||
|
if (!(fp = fopen(argv[0], "r"))) {
|
||||||
|
weprintf("fopen %s:", argv[0]);
|
||||||
|
ret = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
expand(argv[0], fp);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user