Refactor unexpand(1) code and manpage, adding tablist support

as already seen for expand(1), only twice as complicated.
This commit is contained in:
FRIGN 2015-02-08 21:24:22 +01:00
parent 28b4b25076
commit 1513c2b766
3 changed files with 154 additions and 91 deletions

2
README
View File

@ -73,7 +73,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=* true yes none =* true yes none
=* tty yes none =* tty yes none
=* uname yes none =* uname yes none
# unexpand yes none #* unexpand yes none
= uniq no -f, -s = uniq no -f, -s
= unlink yes none = unlink yes none
= uudecode no -o = uudecode no -o

View File

@ -1,30 +1,45 @@
.Dd January 30, 2015 .Dd February 8, 2015
.Dt UNEXPAND 1 .Dt UNEXPAND 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
.Nm unexpand .Nm unexpand
.Nd convert blanks to tabs .Nd unexpand spaces to tabs
.Sh SYNOPSIS .Sh SYNOPSIS
.Nm .Nm
.Op Fl a .Op Fl a
.Op Fl t Ar n .Op Fl t Ar tablist
.Op Ar file ... .Op Ar file ...
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
processes the named converts spaces to tabs in each
.Ar files .Ar file
or the standard input, writing the as specified in
standard output with consecutive blanks (spaces and tabs) converted .Ar tablist .
into tabs. Backspace characters are preserved into the output and If no file is given,
decrement the column count for tab calculations. .Nm
reads from stdin.
.Pp
Backspace characters are preserved and decrement the column count
for tab calculations.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl a .It Fl a
Convert blanks to tabs everywhere, not just at the start of lines. Convert spaces to tabs everywhere, not just at the start of lines.
.It Fl t Ar n .It Fl t Ar tablist
Set tab size to Specify tab size or tabstops.
.Ar n .Ar tablist
spaces (default: 8). is a list of one (in the former case) or multiple (in the latter case)
strictly positive integers separated by ' ' or ','.
.Pp
The default
.Ar tablist
is "8".
.El .El
.Sh SEE ALSO .Sh SEE ALSO
.Xr expand 1 .Xr expand 1
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.

View File

@ -1,5 +1,5 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <limits.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <wchar.h> #include <wchar.h>
@ -7,15 +7,126 @@
#include "utf.h" #include "utf.h"
#include "util.h" #include "util.h"
static void unexpand(const char *, FILE *);
static int aflag = 0; static int aflag = 0;
static int tabsize = 8; static size_t *tablist = NULL;
static int tablistlen = 8;
static size_t
parselist(const char *s)
{
size_t i;
char *p, *tmp;
tmp = estrdup(s);
for (i = 0; (p = strsep(&tmp, " ,")); i++) {
if (*p == '\0')
eprintf("empty field in tablist\n");
tablist = erealloc(tablist, (i + 1) * sizeof(*tablist));
tablist[i] = estrtonum(p, 1, MIN(LLONG_MAX, SIZE_MAX));
if (i > 0 && tablist[i - 1] >= tablist[i])
eprintf("tablist must be ascending\n");
}
tablist = erealloc(tablist, (i + 1) * sizeof(*tablist));
return i;
}
static void
unexpandspan(size_t last, size_t col)
{
size_t off, i, j;
Rune r;
if (tablistlen == 1) {
i = 0;
off = last % tablist[i];
if ((col - last) + off >= tablist[i] && last < col)
last -= off;
r = '\t';
for (; last + tablist[i] <= col; last += tablist[i])
writerune("<stdout>", stdout, &r);
r = ' ';
for (; last < col; last++)
writerune("<stdout>", stdout, &r);
} else {
for (i = 0; i < tablistlen; i++)
if (col < tablist[i])
break;
for (j = 0; j < tablistlen; j++)
if (last < tablist[j])
break;
r = '\t';
for (; j < i; j++) {
writerune("<stdout>", stdout, &r);
last = tablist[j];
}
r = ' ';
for (; last < col; last++)
writerune("<stdout>", stdout, &r);
}
}
static void
unexpand(const char *file, FILE *fp)
{
Rune r;
size_t last = 0, col = 0, i;
int bol = 1;
while (readrune(file, fp, &r)) {
switch (r) {
case ' ':
if (!bol && !aflag)
last++;
col++;
break;
case '\t':
if (tablistlen == 1) {
if (!bol && !aflag)
last += tablist[0] - col % tablist[0];
col += tablist[0] - col % tablist[0];
} else {
for (i = 0; i < tablistlen; i++)
if (col < tablist[i])
break;
if (!bol && !aflag)
last = tablist[i];
col = tablist[i];
}
break;
case '\b':
if (bol || aflag)
unexpandspan(last, col);
col -= (col > 0);
last = col;
bol = 0;
break;
case '\n':
if (bol || aflag)
unexpandspan(last, col);
last = col = 0;
bol = 1;
break;
default:
if (bol || aflag)
unexpandspan(last, col);
last = ++col;
bol = 0;
break;
}
if ((r != ' ' && r != '\t') || (!aflag && !bol))
writerune("<stdout>", stdout, &r);
}
if (last < col && (bol || aflag))
unexpandspan(last, col);
}
static void static void
usage(void) usage(void)
{ {
eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0); eprintf("usage: %s [-a] [-t tablist] [file ...]\n", argv0);
} }
int int
@ -23,12 +134,13 @@ main(int argc, char *argv[])
{ {
FILE *fp; FILE *fp;
int ret = 0; int ret = 0;
char *tl = "8";
ARGBEGIN { ARGBEGIN {
case 't': case 't':
tabsize = estrtonum(EARGF(usage()), 0, INT_MAX); tl = EARGF(usage());
if (tabsize <= 0) if (!*tl)
eprintf("unexpand: invalid tabsize\n"); eprintf("tablist cannot be empty\n");
/* Fallthrough: -t implies -a */ /* Fallthrough: -t implies -a */
case 'a': case 'a':
aflag = 1; aflag = 1;
@ -37,9 +149,11 @@ main(int argc, char *argv[])
usage(); usage();
} ARGEND; } ARGEND;
if (argc == 0) { tablistlen = parselist(tl);
if (argc == 0)
unexpand("<stdin>", stdin); unexpand("<stdin>", stdin);
} else { else {
for (; argc > 0; argc--, argv++) { for (; argc > 0; argc--, argv++) {
if (!(fp = fopen(argv[0], "r"))) { if (!(fp = fopen(argv[0], "r"))) {
weprintf("fopen %s:", argv[0]); weprintf("fopen %s:", argv[0]);
@ -52,69 +166,3 @@ main(int argc, char *argv[])
} }
return ret; return ret;
} }
static void
unexpandspan(unsigned int n, unsigned int col)
{
unsigned int off = (col-n) % tabsize;
Rune r;
if (n + off >= tabsize && n > 1)
n += off;
r = '\t';
for (; n >= tabsize; n -= tabsize)
writerune("<stdout>", stdout, &r);
r = ' ';
while (n--)
writerune("<stdout>", stdout, &r);
}
static void
unexpand(const char *file, FILE *fp)
{
unsigned int n = 0, col = 0;
Rune r;
int bol = 1;
while (1) {
if (!readrune(file, fp, &r))
break;
switch (r) {
case ' ':
if (bol || aflag)
n++;
col++;
break;
case '\t':
if (bol || aflag)
n += tabsize - col % tabsize;
col += tabsize - col % tabsize;
break;
case '\b':
if (bol || aflag)
unexpandspan(n, col);
col -= (col > 0);
n = 0;
bol = 0;
break;
case '\n':
if (bol || aflag)
unexpandspan(n, col);
n = col = 0;
bol = 1;
break;
default:
if (bol || aflag)
unexpandspan(n, col);
n = 0;
col++;
bol = 0;
}
if ((r != ' ' && r != '\t') || (!aflag && !bol))
writerune("<stdout>", stdout, &r);
}
if (n > 0 && (bol || aflag))
unexpandspan(n, col);
}