Add support for arbitrary length delimiters in cut(1)
Having multibyte delimiters is not enough. For full flexibility, the possiblity of cutting input lines with arbitrary length delimiters is the real deal. Given this functionality, it only sounds reasonable to also add support to resolve escapes. Thanks to Truls Becken for making the suggestion and designing such a flexible cut(1)-implementation!
This commit is contained in:
parent
37d30f2bd8
commit
2277b619be
2
cut.1
2
cut.1
|
@ -67,4 +67,4 @@ utility is compliant with the
|
||||||
specification.
|
specification.
|
||||||
.Pp
|
.Pp
|
||||||
The possibility of separating numbers and ranges with a space
|
The possibility of separating numbers and ranges with a space
|
||||||
and specifying multibyte delimiters is an extension to that specification.
|
and specifying multibyte delimiters of arbitrary length is an extension to that specification.
|
||||||
|
|
59
cut.c
59
cut.c
|
@ -14,7 +14,7 @@ typedef struct Range {
|
||||||
|
|
||||||
static Range *list = NULL;
|
static Range *list = NULL;
|
||||||
static char mode = 0;
|
static char mode = 0;
|
||||||
static Rune delim = '\t';
|
static char *delim = "\t";
|
||||||
static size_t delimlen = 1;
|
static size_t delimlen = 1;
|
||||||
static int nflag = 0;
|
static int nflag = 0;
|
||||||
static int sflag = 0;
|
static int sflag = 0;
|
||||||
|
@ -73,7 +73,6 @@ seek(const char *s, size_t pos, size_t *prev, size_t count)
|
||||||
{
|
{
|
||||||
const char *t;
|
const char *t;
|
||||||
size_t n = pos - *prev, i;
|
size_t n = pos - *prev, i;
|
||||||
Rune r;
|
|
||||||
|
|
||||||
if (mode == 'b') {
|
if (mode == 'b') {
|
||||||
if ((t = memchr(s, '\0', n)))
|
if ((t = memchr(s, '\0', n)))
|
||||||
|
@ -89,12 +88,13 @@ seek(const char *s, size_t pos, size_t *prev, size_t count)
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
for (t = (count < delimlen + 1) ? s : s + delimlen; n && *t; ) {
|
for (t = (count < delimlen + 1) ? s : s + delimlen; n && *t; ) {
|
||||||
for (i = 1; t[i]; i++)
|
if (!strncmp(t, delim, delimlen)) {
|
||||||
if (fullrune(t, i))
|
if (!--n && count)
|
||||||
break;
|
|
||||||
charntorune(&r, t, i);
|
|
||||||
if (r == delim && !--n && count)
|
|
||||||
break;
|
break;
|
||||||
|
t += delimlen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (i = 1; !fullrune(t, i); i++);
|
||||||
t += i;
|
t += i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -116,7 +116,7 @@ cut(FILE *fp)
|
||||||
while ((len = getline(&buf, &size, fp)) != -1) {
|
while ((len = getline(&buf, &size, fp)) != -1) {
|
||||||
if (len && buf[len - 1] == '\n')
|
if (len && buf[len - 1] == '\n')
|
||||||
buf[len - 1] = '\0';
|
buf[len - 1] = '\0';
|
||||||
if (mode == 'f' && !utfrune(buf, delim)) {
|
if (mode == 'f' && !utfutf(buf, delim)) {
|
||||||
if (!sflag)
|
if (!sflag)
|
||||||
puts(buf);
|
puts(buf);
|
||||||
continue;
|
continue;
|
||||||
|
@ -139,6 +139,36 @@ cut(FILE *fp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
resolveescapes(char *s, size_t len)
|
||||||
|
{
|
||||||
|
size_t i, off, m;
|
||||||
|
|
||||||
|
for (i = 0; i < len - 1; i++) {
|
||||||
|
if (s[i] != '\\')
|
||||||
|
continue;
|
||||||
|
off = 0;
|
||||||
|
|
||||||
|
switch (s[i + 1]) {
|
||||||
|
case '\\': s[i] = '\\'; off++; break;
|
||||||
|
case 'a': s[i] = '\a'; off++; break;
|
||||||
|
case 'b': s[i] = '\b'; off++; break;
|
||||||
|
case 'f': s[i] = '\f'; off++; break;
|
||||||
|
case 'n': s[i] = '\n'; off++; break;
|
||||||
|
case 'r': s[i] = '\r'; off++; break;
|
||||||
|
case 't': s[i] = '\t'; off++; break;
|
||||||
|
case 'v': s[i] = '\v'; off++; break;
|
||||||
|
default: continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (m = i + 1; m <= len - off; m++)
|
||||||
|
s[m] = s[m + off];
|
||||||
|
len -= off;
|
||||||
|
}
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
|
@ -151,24 +181,17 @@ int
|
||||||
main(int argc, char *argv[])
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
int i;
|
|
||||||
char *m, *d;
|
|
||||||
|
|
||||||
ARGBEGIN {
|
ARGBEGIN {
|
||||||
case 'b':
|
case 'b':
|
||||||
case 'c':
|
case 'c':
|
||||||
case 'f':
|
case 'f':
|
||||||
mode = ARGC();
|
mode = ARGC();
|
||||||
m = EARGF(usage());
|
parselist(EARGF(usage()));
|
||||||
parselist(m);
|
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
d = EARGF(usage());
|
delim = EARGF(usage());
|
||||||
for (i = 1; i <= strlen(d); i++)
|
delimlen = resolveescapes(delim, strlen(delim));
|
||||||
if (fullrune(d, i))
|
|
||||||
break;
|
|
||||||
charntorune(&delim, d, i);
|
|
||||||
delimlen = i;
|
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
nflag = 1;
|
nflag = 1;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user