diff --git a/Makefile b/Makefile index 6efa7f0..42f723f 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,7 @@ BIN =\ cksum\ cmp\ cols\ + col\ comm\ cp\ cron\ diff --git a/README b/README index 7b36203..a88478b 100644 --- a/README +++ b/README @@ -19,6 +19,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, =* cksum yes none =* cmp yes none #* cols non-posix none + col yes none =* comm yes none =*| cp yes none (-i) =* cron non-posix none diff --git a/col.1 b/col.1 new file mode 100644 index 0000000..6b4dfba --- /dev/null +++ b/col.1 @@ -0,0 +1,90 @@ +.TH COL 1 col +.SH NAME +col - filter reverse line-feeds +.SH SYPNOSIS +.B col +.RB [ \-p ] +.RB [ \-b ] +.RB [ \-f ] +.RB [ \-x ] +.RB [ \-l +.IR num ] +.SH DESCRIPTION +.PP +The +.B col +utility filter all the reverse (and half reverse) line feeds, +as they are produced by nroff with .2C of ms(6) or by tbl(1). +.B col +also replaces spaces by tabs when it is possible. +The control sequences managed by +.B col +are: +.TP +.B ESC-7 +reverse line-feed +.TP +.B ESC-8 +reverse half-line-feed +.TP +.B ESC-9 +forward half-line-feed +.TP +.B VT +vertical-tab +.TP +.B SP +Space +.TP +.B TAB +horizontal tab +.TP +.B RETURN +Carriage return +.TP +.B NL +New line +.PP +All the other control codes and escape sequences are removed. +.B Col +transform all the spaces into tabulators. +.SH OPTIONS +.TP +.B \-p +Print unknown escape sequences to the output. +.TP +.B \-b +Do not print backspaces in output, +and Print only the last overstriked character in the output. +.TP +.B \-f +Allow forward half line feeds in the output. +.TP +.B \-x +Do not convert spaces in tabulators. +.TP +.BI \-l " num" +Increment to +.I num +the number of lines buffered for +.B col. +.SH BUGS +.PP +.B Col +only process text with a maximum of 256 lines with 800 characteres per line, +although the number of lines can be modified with +.B \-l +option. +When the number of lines is bigger, +the buffer is flushed to the output, +so new reverse line feeds can not operate in the flushed lines. +This implementation ignores SI and SO selection character sets, +because it is supposed it will work only with UTF-8 strings, +although the UTF-8 support is missed. +.SH SEE ALSO +.BR nroff (1), +.BR ms (6), +.BR tbl(1). + + + diff --git a/col.c b/col.c new file mode 100644 index 0000000..f7f24e4 --- /dev/null +++ b/col.c @@ -0,0 +1,227 @@ + +#include +#include +#include +#include +#include + +#include "util.h" + +#define NLINES 256 +#define NCOLS 800 + +char **buff; + +int obackspace, onotabs, ohalfline, oescape; +unsigned nline, ncol, nchar, nspaces, maxline, bs; +size_t pagsize = NLINES; + +static void +usage(void) +{ + enprintf(2, "usage: %s [-p][-l num][-b][-f][-x]\n", argv0); +} + +static void +flush(void) +{ + int c; + unsigned i, j; + + for (i = 0; i < maxline; ++i) { + for (j = 0; j < NCOLS && (c = buff[i][j]) != '\0'; ++j) + putchar(c); + putchar('\n'); + } + bs = nchar = nline = ncol = 0; +} + +static void +forward(unsigned n) +{ + unsigned lim; + + for (lim = ncol + n; ncol != lim && nchar < NCOLS-1; ++nchar) { + switch (buff[nline][nchar]) { + case '\b': + --ncol; + break; + case '\0': + buff[nline][nchar] = ' '; + /* FALLTHROUGH */ + default: + ++ncol; + break; + } + } +} + +static void +linefeed(int up, int rcarriage) +{ + unsigned oncol = ncol; + + nspaces = 0; + if (up > 0) { + if (nline == pagsize-1) { + flush(); + } else { + if (++nline > maxline) + maxline = nline; + } + } else { + if (nline > 0) + --nline; + } + bs = 0; + if (rcarriage) { + forward(oncol); + nchar = ncol = 0; + } +} + +static void +newchar(int c) +{ + char *cp; + + forward(nspaces); + nspaces = 0; + + switch (c) { + case ' ': + forward(1); + break; + case '\r': + nchar = ncol = 0; + break; + case '\t': + forward(8 - ncol%8); + break; + case '\b': + if (ncol > 0) + --ncol; + if (nchar > 0) + --nchar; + bs = 1; + break; + default: + cp = &buff[nline][nchar]; + if (*cp != '\0' && *cp != ' ' && bs && !obackspace) { + if (nchar != NCOLS-3) { + memmove(cp + 3, cp + 1, NCOLS - nchar - 2); + cp[1] = '\b'; + nchar += 2; + } + } + if (nchar != NCOLS-1) { + for (cp = buff[nline]; cp < &buff[nline][nchar]; ++cp) { + if (*cp == '\0') + *cp = ' '; + } + buff[nline][nchar++] = c; + ++ncol; + } + bs = 0; + } +} + +static void +col(void) +{ + int c; + + while ((c = getchar()) != EOF) { + switch (c) { + case '\x1b': + switch (c = getchar()) { + case '8': /* reverse half-line-feed */ + case '7': /* reverse line-feed */ + linefeed(-1, 0); + continue; + case '9': /* forward half-line-feed */ + if (ohalfline) + break; + linefeed(1, 0); + continue; + } + if (!oescape) + continue; + newchar('\x1b'); + if (c != EOF) + newchar(c); + break; + case '\v': + linefeed(-1, 0); + break; + case ' ': + if (!onotabs) { + if (++nspaces != 8) + continue; + c = '\t'; + nspaces = 0; + } + /* FALLTHROUGH */ + case '\r': + case '\b': + case '\t': + newchar(c); + break; + case '\n': + linefeed(1, 1); + break; + default: + if (!iscntrl(c)) + newchar(c); + break; + } + } +} + +static void +allocbuf(void) +{ + char **bp; + + buff = emalloc(sizeof(*buff) * pagsize); + for (bp = buff; bp < &buff[pagsize]; ++bp) + *bp = emalloc(NCOLS); +} + +int +main(int argc, char *argv[]) +{ + ARGBEGIN { + case 'b': + obackspace = 1; + break; + case 'f': + ohalfline = 1; + break; + case 'l': + pagsize = estrtonum(EARGF(usage()), 0, SIZE_MAX); + break; + case 'p': + oescape = 1; + break; + case 'x': + onotabs = 1; + break; + default: + usage(); + } ARGEND; + + if (argc > 0) + usage(); + + allocbuf(); + col(); + flush(); + + if (ferror(stdin)) + enprintf(1, "error reading input"); + if (ferror(stdout)) + enprintf(2, "error writing output"); + + return 0; +}