diff --git a/Makefile b/Makefile index 81dfaf6..ee84221 100644 --- a/Makefile +++ b/Makefile @@ -81,6 +81,7 @@ SRC = \ tee.c \ test.c \ touch.c \ + tr.c \ true.c \ tty.c \ uname.c \ diff --git a/tr.1 b/tr.1 new file mode 100644 index 0000000..662b4f8 --- /dev/null +++ b/tr.1 @@ -0,0 +1,50 @@ +.TH TR 1 sbase\-VERSION +.SH NAME +tr \- translate characters +.SH SYNOPSIS +.B tr +.RB set1 +.RI [ set2 ] +.SH DESCRIPTION +.B tr +reads input from stdin replacing every character in +.B set1 +with the character at the same index in +.B set2. +If set2 is not given +.B tr +deletes the characters in set1 from the input. + +Sets are specified as strings of characters. Almost all represent themselves. The following ones will be interpreted: +.TP +\e\e +backslash +.TP +\ea +audible BEL +.TP +\ef +form feed +.TP +\en +new line +.TP +\er +return +.TP +\et +horizontal tab +.TP +\ev +vertical tab +.PP +If set1 is longer than set2 +.B tr +will map all the remaining characters to the last one in set2. In case set2 is longer than set1, the remaining characters from set2 will be ignored. +.B +.SH NOTES +.B tr +is Unicode-aware but does not yet handle character classes (e.g. [:alnum:] or [:digit:]). +.SH SEE ALSO +.IR sed(1) +.IR awk(1) diff --git a/tr.c b/tr.c new file mode 100644 index 0000000..8cca430 --- /dev/null +++ b/tr.c @@ -0,0 +1,151 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include +#include +#include +#include +#include "text.h" +#include "util.h" + +static void +usage(void) +{ + eprintf("usage: %s set1 [set2]\n", argv0); +} + +void +handleescapes(char *s) +{ + switch(*s) { + case 'n': + *s = '\n'; + break; + case 't': + *s = '\t'; + break; + case '\\': + *s = '\\'; + break; + case 'r': + *s = '\r'; + break; + case 'f': + *s = '\f'; + break; + case 'a': + *s = '\a'; + break; + case 'b': + *s = '\b'; + break; + case 'v': + *s = '\v'; + break; + } +} + +void +parsemapping(const char *set1, const char *set2, wchar_t *mappings) +{ + char *s; + wchar_t runeleft; + wchar_t runeright; + int leftbytes; + int rightbytes; + size_t n = 0; + size_t lset2; + + if(set2) { + lset2 = strnlen(set2, 255 * sizeof(wchar_t)); + } else { + set2 = &set1[0]; + lset2 = 0; + } + + s = (char *)set1; + while(*s) { + if(*s == '\\') + handleescapes(++s); + leftbytes = mbtowc(&runeleft, s, 4); + if(set2[n] != '\0') + rightbytes = mbtowc(&runeright, set2 + n, 4); + mappings[runeleft] = runeright; + s += leftbytes; + if(n < lset2) + n += rightbytes; + } +} + +void +maptonull(const wchar_t *mappings, char *in) +{ + const char *s; + wchar_t runeleft; + int leftbytes = 0; + + s = in; + while(*s) { + leftbytes = mbtowc(&runeleft, s, 4); + if(!mappings[runeleft]) + putwchar(runeleft); + s += leftbytes; + } +} + +void +maptoset(const wchar_t *mappings, char *in) +{ + const char *s; + wchar_t runeleft; + int leftbytes = 0; + + s = in; + while(*s) { + leftbytes = mbtowc(&runeleft, s, 4); + if(!mappings[runeleft]) + putwchar(runeleft); + else + putwchar(mappings[runeleft]); + s += leftbytes; + } +} + +int +main(int argc, char *argv[]) +{ + wchar_t *mappings; + char *buf = NULL; + size_t size = 0; + void (*mapfunc)(const wchar_t*, char*); + + setlocale(LC_ALL, ""); + + mappings = (wchar_t *)mmap(NULL, 0x110000 * sizeof(wchar_t), + PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); + + ARGBEGIN { + default: + usage(); + } ARGEND; + + if(argc == 0) + usage(); + + if(argc >= 2) { + parsemapping(argv[0], argv[1], mappings); + mapfunc = maptoset; + } else { + parsemapping(argv[0], NULL, mappings); + mapfunc = maptonull; + } + + while(afgets(&buf, &size, stdin)) + mapfunc(mappings, buf); + free(buf); + if(ferror(stdin)) + eprintf(": read error:"); + + return EXIT_SUCCESS; +}