eee98ed3a4
It was about damn time. Consistency is very important in such a big codebase.
358 lines
6.3 KiB
C
358 lines
6.3 KiB
C
/* See LICENSE file for copyright and license details. */
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <locale.h>
|
|
#include <wchar.h>
|
|
|
|
#include "text.h"
|
|
#include "util.h"
|
|
|
|
static void
|
|
usage(void)
|
|
{
|
|
eprintf("usage: %s [-d] [-c] set1 [set2]\n", argv0);
|
|
}
|
|
|
|
static int dflag, cflag;
|
|
static wchar_t mappings[0x110000];
|
|
|
|
struct wset_state {
|
|
char *s; /* current character */
|
|
wchar_t rfirst, rlast; /* first and last in range */
|
|
wchar_t prev; /* previous returned character */
|
|
int prev_was_range; /* was the previous character part of a c-c range? */
|
|
};
|
|
|
|
struct set_state {
|
|
char *s, rfirst, rlast, prev;
|
|
int prev_was_octal; /* was the previous returned character written in octal? */
|
|
};
|
|
|
|
static void
|
|
set_state_defaults(struct set_state *s)
|
|
{
|
|
s->rfirst = 1;
|
|
s->rlast = 0;
|
|
s->prev_was_octal = 1;
|
|
}
|
|
|
|
static void
|
|
wset_state_defaults(struct wset_state *s)
|
|
{
|
|
s->rfirst = 1;
|
|
s->rlast = 0;
|
|
s->prev_was_range = 1;
|
|
}
|
|
|
|
/* sets *s to the char that was intended to be written.
|
|
* returns how many bytes the s pointer has to advance to skip the
|
|
* escape sequence if it was an octal, always zero otherwise. */
|
|
static int
|
|
resolve_escape(char *s)
|
|
{
|
|
int i;
|
|
unsigned char c;
|
|
|
|
switch (*s) {
|
|
case 'n':
|
|
*s = '\n';
|
|
return 0;
|
|
case 't':
|
|
*s = '\t';
|
|
return 0;
|
|
case 'r':
|
|
*s = '\r';
|
|
return 0;
|
|
case 'f':
|
|
*s = '\f';
|
|
return 0;
|
|
case 'a':
|
|
*s = '\a';
|
|
return 0;
|
|
case 'b':
|
|
*s = '\b';
|
|
return 0;
|
|
case 'v':
|
|
*s = '\v';
|
|
return 0;
|
|
case '\\':
|
|
*s = '\\';
|
|
return 0;
|
|
case '\0':
|
|
eprintf("stray '\\' at end of input:");
|
|
default: ;
|
|
}
|
|
|
|
if(*s < '0' || *s > '7')
|
|
eprintf("invalid character after '\\':");
|
|
for(i = 0, c = 0; s[i] >= '0' && s[i] <= '7' && i < 3; i++) {
|
|
c <<= 3;
|
|
c += s[i]-'0';
|
|
}
|
|
if(*s > '3' && i == 3)
|
|
eprintf("octal byte cannot be bigger than 377:");
|
|
*s = c;
|
|
return i;
|
|
}
|
|
|
|
#define embtowc(a, b) mbtowc(a, b, 4)
|
|
|
|
static int
|
|
xmbtowc(wchar_t *unicodep, const char *s)
|
|
{
|
|
int rv;
|
|
|
|
rv = embtowc(unicodep, s);
|
|
if (rv < 0)
|
|
eprintf("mbtowc: invalid input sequence:");
|
|
return rv;
|
|
}
|
|
|
|
static int
|
|
has_octal_escapes(const char *s)
|
|
{
|
|
while (*s)
|
|
if (*s++ == '\\' && *s >= '0' && *s <= '7')
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static char
|
|
get_next_char(struct set_state *s)
|
|
{
|
|
char c;
|
|
int nchars;
|
|
|
|
start:
|
|
if (s->rfirst <= s->rlast) {
|
|
c = s->rfirst;
|
|
s->rfirst++;
|
|
return c;
|
|
}
|
|
|
|
if (*s->s == '-' && !s->prev_was_octal) {
|
|
s->s++;
|
|
if (!*s->s)
|
|
return '-';
|
|
if (*s->s == '\\' && (nchars = resolve_escape(++(s->s))))
|
|
goto char_is_octal;
|
|
s->rlast = *(s->s)++;
|
|
if (!s->rlast)
|
|
return '\0';
|
|
s->prev_was_octal = 1;
|
|
s->rfirst = ++(s->prev);
|
|
goto start;
|
|
}
|
|
if (*s->s == '\\' && (nchars = resolve_escape(++(s->s))))
|
|
goto char_is_octal;
|
|
|
|
s->prev_was_octal = 0;
|
|
c = *(s->s)++;
|
|
s->prev = c;
|
|
return c;
|
|
|
|
char_is_octal:
|
|
s->prev_was_octal = 1;
|
|
c = *s->s;
|
|
s->s += nchars;
|
|
return c;
|
|
}
|
|
|
|
static wchar_t
|
|
get_next_wchar(struct wset_state *s)
|
|
{
|
|
start:
|
|
if (s->rfirst <= s->rlast) {
|
|
s->prev = s->rfirst;
|
|
s->rfirst++;
|
|
return s->prev;
|
|
}
|
|
|
|
if (*s->s == '-' && !s->prev_was_range) {
|
|
s->s++;
|
|
if (!*s->s)
|
|
return '-';
|
|
if (*s->s == '\\')
|
|
resolve_escape(++(s->s));
|
|
s->s += xmbtowc(&s->rlast, s->s);
|
|
if (!s->rlast)
|
|
return '\0';
|
|
s->rfirst = ++(s->prev);
|
|
s->prev_was_range = 1;
|
|
goto start;
|
|
}
|
|
|
|
if (*s->s == '\\')
|
|
resolve_escape(++(s->s));
|
|
s->s += xmbtowc(&s->prev, s->s);
|
|
s->prev_was_range = 0;
|
|
return s->prev;
|
|
}
|
|
|
|
static int
|
|
is_mapping_wide(const char *set1, const char *set2)
|
|
{
|
|
struct set_state ss1, ss2;
|
|
struct wset_state wss1, wss2;
|
|
wchar_t wc1, wc2, last_wc2;
|
|
|
|
if (has_octal_escapes(set1)) {
|
|
set_state_defaults(&ss1);
|
|
ss1.s = (char *) set1;
|
|
if (set2) {
|
|
set_state_defaults(&ss2);
|
|
ss2.s = (char *) set2;
|
|
/* if the character returned is from an octal triplet, it might be null
|
|
* and still need to continue */
|
|
while ((wc1 = (unsigned char) get_next_char(&ss1)) || ss1.prev_was_octal ) {
|
|
if (!(wc2 = (unsigned char) get_next_char(&ss2)))
|
|
wc2 = last_wc2;
|
|
mappings[wc1] = wc2;
|
|
last_wc2 = wc2;
|
|
}
|
|
} else {
|
|
while ((wc1 = (unsigned char) get_next_char(&ss1)) || ss1.prev_was_octal)
|
|
mappings[wc1] = 1;
|
|
}
|
|
return 0;
|
|
} else {
|
|
wset_state_defaults(&wss1);
|
|
wss1.s = (char *) set1;
|
|
if (set2) {
|
|
wset_state_defaults(&wss2);
|
|
wss2.s = (char *) set2;
|
|
while ((wc1 = get_next_wchar(&wss1))) {
|
|
if (!(wc2 = get_next_wchar(&wss2)))
|
|
wc2 = last_wc2;
|
|
mappings[wc1] = wc2;
|
|
last_wc2 = wc2;
|
|
}
|
|
} else {
|
|
while ((wc1 = get_next_wchar(&wss1)))
|
|
mappings[wc1] = 1;
|
|
}
|
|
return 1;
|
|
}
|
|
return 0; /* unreachable */
|
|
}
|
|
|
|
static void
|
|
wmap_null(char *in, ssize_t nbytes)
|
|
{
|
|
char *s;
|
|
wchar_t rune;
|
|
int parsed_bytes = 0;
|
|
|
|
s = in;
|
|
while (nbytes) {
|
|
parsed_bytes = embtowc(&rune, s);
|
|
if (parsed_bytes < 0) {
|
|
rune = *s;
|
|
parsed_bytes = 1;
|
|
}
|
|
if (((!mappings[rune])&1) ^ cflag)
|
|
putwchar(rune);
|
|
s += parsed_bytes;
|
|
nbytes -= parsed_bytes;
|
|
}
|
|
}
|
|
|
|
static void
|
|
wmap_set(char *in, ssize_t nbytes)
|
|
{
|
|
char *s;
|
|
wchar_t rune;
|
|
int parsed_bytes = 0;
|
|
|
|
s = in;
|
|
while (nbytes) {
|
|
parsed_bytes = embtowc(&rune, s);
|
|
if (parsed_bytes < 0) {
|
|
rune = *s;
|
|
parsed_bytes = 1;
|
|
}
|
|
if (!mappings[rune])
|
|
putwchar(rune);
|
|
else
|
|
putwchar(mappings[rune]);
|
|
nbytes -= parsed_bytes;
|
|
s += parsed_bytes;
|
|
}
|
|
}
|
|
|
|
static void
|
|
map_null(char *in, ssize_t nbytes)
|
|
{
|
|
char *s;
|
|
|
|
for (s = in; nbytes; s++, nbytes--)
|
|
if (((!mappings[(unsigned char)*s])&1) ^ cflag)
|
|
putchar(*s);
|
|
}
|
|
|
|
static void
|
|
map_set(char *in, ssize_t nbytes)
|
|
{
|
|
char *s;
|
|
|
|
for (s = in; nbytes; s++, nbytes--)
|
|
if (!mappings[(unsigned char)*s])
|
|
putchar(*s);
|
|
else
|
|
putchar(mappings[(unsigned char)*s]);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
char *buf = NULL;
|
|
size_t size = 0;
|
|
ssize_t nbytes;
|
|
void (*mapfunc)(char*, ssize_t);
|
|
|
|
setlocale(LC_ALL, "");
|
|
dflag = cflag = 0;
|
|
|
|
ARGBEGIN {
|
|
case 'd':
|
|
dflag = 1;
|
|
break;
|
|
case 'c':
|
|
cflag = 1;
|
|
break;
|
|
default:
|
|
usage();
|
|
} ARGEND;
|
|
|
|
if (argc == 0)
|
|
usage();
|
|
|
|
if (dflag) {
|
|
if (argc != 1)
|
|
usage();
|
|
if (is_mapping_wide(argv[0], NULL))
|
|
mapfunc = wmap_null;
|
|
else
|
|
mapfunc = map_null;
|
|
} else if (cflag) {
|
|
usage();
|
|
} else if (argc == 2) {
|
|
if (is_mapping_wide(argv[0], argv[1]))
|
|
mapfunc = wmap_set;
|
|
else
|
|
mapfunc = map_set;
|
|
} else {
|
|
usage();
|
|
}
|
|
|
|
while ((nbytes = agetline(&buf, &size, stdin)) != -1)
|
|
mapfunc(buf, nbytes);
|
|
free(buf);
|
|
if (ferror(stdin))
|
|
eprintf("<stdin>: read error:");
|
|
|
|
return 0;
|
|
}
|