/* See LICENSE file for copyright and license details. */ #include #include #include #include #include #include #include "text.h" #include "util.h" static void usage(void) { eprintf("usage: %s [-d] [-c] set1 [set2]\n", argv0); } static int dflag, cflag; static wchar_t mappings[0x110000]; struct wset_state { char *s; /* current character */ wchar_t rfirst, rlast; /* first and last in range */ wchar_t prev; /* previous returned character */ int prev_was_range; /* was the previous character part of a c-c range? */ }; struct set_state { char *s, rfirst, rlast, prev; int prev_was_octal; /* was the previous returned character written in octal? */ }; static void set_state_defaults(struct set_state *s) { s->rfirst = 1; s->rlast = 0; s->prev_was_octal = 1; } static void wset_state_defaults(struct wset_state *s) { s->rfirst = 1; s->rlast = 0; s->prev_was_range = 1; } /* sets *s to the char that was intended to be written. * returns how many bytes the s pointer has to advance to skip the * escape sequence if it was an octal, always zero otherwise. */ static int resolve_escape(char *s) { int i; unsigned char c; switch(*s) { case 'n': *s = '\n'; return 0; case 't': *s = '\t'; return 0; case 'r': *s = '\r'; return 0; case 'f': *s = '\f'; return 0; case 'a': *s = '\a'; return 0; case 'b': *s = '\b'; return 0; case 'v': *s = '\v'; return 0; case '\\': *s = '\\'; return 0; case '\0': eprintf("stray '\\' at end of input:"); default: ; } if(*s<'0' || *s>'7') eprintf("invalid character after '\\':"); for(i=0, c=0; s[i]>='0' && s[i]<='7' && i<3; i++) { c <<= 3; c += s[i]-'0'; } if(*s>'3' && i==3) eprintf("octal byte cannot be bigger than 377:"); *s = c; return i; } #define embtowc(a, b) mbtowc(a, b, 4) static int xmbtowc(wchar_t *unicodep, const char *s) { int rv; rv = embtowc(unicodep, s); if (rv < 0) eprintf("mbtowc: invalid input sequence:"); return rv; } static int has_octal_escapes(const char *s) { while(*s) if(*s++ == '\\' && *s >= '0' && *s <= '7') return 1; return 0; } static char get_next_char(struct set_state *s) { char c; int nchars; start: if(s->rfirst <= s->rlast) { c = s->rfirst; s->rfirst++; return c; } if(*s->s == '-' && !s->prev_was_octal) { s->s++; if(!*s->s) return '-'; if(*s->s == '\\' && (nchars = resolve_escape(++(s->s)))) goto char_is_octal; s->rlast = *(s->s)++; if(!s->rlast) return '\0'; s->prev_was_octal = 1; s->rfirst = ++(s->prev); goto start; } if(*s->s == '\\' && (nchars = resolve_escape(++(s->s)))) goto char_is_octal; s->prev_was_octal = 0; c = *(s->s)++; s->prev = c; return c; char_is_octal: s->prev_was_octal = 1; c = *s->s; s->s += nchars; return c; } static wchar_t get_next_wchar(struct wset_state *s) { start: if(s->rfirst <= s->rlast) { s->prev = s->rfirst; s->rfirst++; return s->prev; } if(*s->s == '-' && !s->prev_was_range) { s->s++; if(!*s->s) return '-'; if(*s->s == '\\') resolve_escape(++(s->s)); s->s += xmbtowc(&s->rlast, s->s); if(!s->rlast) return '\0'; s->rfirst = ++(s->prev); s->prev_was_range = 1; goto start; } if(*s->s == '\\') resolve_escape(++(s->s)); s->s += xmbtowc(&s->prev, s->s); s->prev_was_range = 0; return s->prev; } static int is_mapping_wide(const char *set1, const char *set2) { struct set_state ss1, ss2; struct wset_state wss1, wss2; wchar_t wc1, wc2, last_wc2; if(has_octal_escapes(set1)) { set_state_defaults(&ss1); ss1.s = (char *) set1; if(set2) { set_state_defaults(&ss2); ss2.s = (char *) set2; /* if the character returned is from an octal triplet, it might be null * and still need to continue */ while((wc1 = (unsigned char) get_next_char(&ss1)) || ss1.prev_was_octal ) { if(!(wc2 = (unsigned char) get_next_char(&ss2))) wc2 = last_wc2; mappings[wc1] = wc2; last_wc2 = wc2; } } else { while((wc1 = (unsigned char) get_next_char(&ss1)) || ss1.prev_was_octal) mappings[wc1] = 1; } return 0; } else { wset_state_defaults(&wss1); wss1.s = (char *) set1; if(set2) { wset_state_defaults(&wss2); wss2.s = (char *) set2; while((wc1 = get_next_wchar(&wss1))) { if(!(wc2 = get_next_wchar(&wss2))) wc2 = last_wc2; mappings[wc1] = wc2; last_wc2 = wc2; } } else { while((wc1 = get_next_wchar(&wss1))) mappings[wc1] = 1; } return 1; } return 0; /* unreachable */ } static void wmap_null(char *in, ssize_t nbytes) { char *s; wchar_t rune; int parsed_bytes = 0; s = in; while(nbytes) { parsed_bytes = embtowc(&rune, s); if(parsed_bytes < 0) { rune = *s; parsed_bytes = 1; } if(((!mappings[rune])&1) ^ cflag) putwchar(rune); s += parsed_bytes; nbytes -= parsed_bytes; } } static void wmap_set(char *in, ssize_t nbytes) { char *s; wchar_t rune; int parsed_bytes = 0; s = in; while(nbytes) { parsed_bytes = embtowc(&rune, s); if(parsed_bytes < 0) { rune = *s; parsed_bytes = 1; } if(!mappings[rune]) putwchar(rune); else putwchar(mappings[rune]); nbytes -= parsed_bytes; s += parsed_bytes; } } static void map_null(char *in, ssize_t nbytes) { char *s; for(s=in; nbytes; s++, nbytes--) if(((!mappings[(unsigned char)*s])&1) ^ cflag) putchar(*s); } static void map_set(char *in, ssize_t nbytes) { char *s; for(s=in; nbytes; s++, nbytes--) if(!mappings[(unsigned char)*s]) putchar(*s); else putchar(mappings[(unsigned char)*s]); } int main(int argc, char *argv[]) { char *buf = NULL; size_t size = 0; ssize_t nbytes; void (*mapfunc)(char*, ssize_t); setlocale(LC_ALL, ""); dflag = cflag = 0; ARGBEGIN { case 'd': dflag = 1; break; case 'c': cflag = 1; break; default: usage(); } ARGEND; if(argc == 0) usage(); if(dflag) { if(argc != 1) usage(); if(is_mapping_wide(argv[0], NULL)) mapfunc = wmap_null; else mapfunc = map_null; } else if(cflag) { usage(); } else if(argc == 2) { if(is_mapping_wide(argv[0], argv[1])) mapfunc = wmap_set; else mapfunc = map_set; } else { usage(); } while((nbytes = agetline(&buf, &size, stdin)) != -1) mapfunc(buf, nbytes); free(buf); if(ferror(stdin)) eprintf(": read error:"); return 0; }