sort: add support for "per-keydef" flags

This commit is contained in:
Jakob Kramer 2014-05-06 16:07:05 +02:00 committed by sin
parent 109e8963f5
commit 6f7e9a5078
2 changed files with 78 additions and 31 deletions

27
sort.1
View File

@ -1,4 +1,4 @@
.TH NL 1 sbase\-VERSION .TH SORT 1 sbase\-VERSION
.SH NAME .SH NAME
sort \- sort lines sort \- sort lines
.SH SYNOPSIS .SH SYNOPSIS
@ -27,15 +27,26 @@ reverses the sort.
prints equal lines only once. prints equal lines only once.
.TP .TP
.B \-k key .B \-k key
specifies a key definition of the form \fBS\fR[.\fBs\fR][,\fBE\fR[.\fBe\fR]], specifies a key definition of the form
.BR S [. s ][ f ][, E [. e ][ f ]]
where where
.B S, .BR S ,
.B s, .BR s ,
.B E, .BR E ,
and and
.B e .B e
are the starting column, starting character in that column, ending column and are the starting column, starting character in that column, ending column and
the ending character of that column respectively. If they are not specified, the ending character of that column respectively. If they are not specified,
s refers to the first character of the specified starting column, E refers to .B s
the last column of every line, and e refers to the last character of that last refers to the first character of the specified starting column,
column. .B E
refers to the last column of every line, and
.B e
refers to the last character of that last column.
.B f
can be used to specify options
.RB ( n ,
.BR b )
that only apply to this key definition.
.B b
is special in that it only applies to the column that it was specified after.

80
sort.c
View File

@ -13,6 +13,14 @@ struct keydef {
int end_column; int end_column;
int start_char; int start_char;
int end_char; int end_char;
int flags;
};
enum {
MOD_N = 1 << 1,
MOD_STARTB = 1 << 2,
MOD_ENDB = 1 << 3,
MOD_R = 1 << 4
}; };
struct kdlist { struct kdlist {
@ -23,20 +31,18 @@ struct kdlist {
static struct kdlist *head = NULL; static struct kdlist *head = NULL;
static struct kdlist *curr = NULL; static struct kdlist *curr = NULL;
static void addkeydef(char *); static void addkeydef(char *, int);
static void freelist(void); static void freelist(void);
static int linecmp(const char **, const char **); static int linecmp(const char **, const char **);
static char *next_nonblank(char *); static char *next_nonblank(char *);
static char *next_blank(char *); static char *next_blank(char *);
static int parse_keydef(struct keydef *, char *); static int parse_flags(char **, int *, int);
static char *skip_columns(char *, size_t); static int parse_keydef(struct keydef *, char *, int);
static char *skip_columns(char *, size_t, bool);
static char *end_column(char *); static char *end_column(char *);
static char *columns(char *, const struct keydef *); static char *columns(char *, const struct keydef *);
static bool rflag = false;
static bool uflag = false; static bool uflag = false;
static bool nflag = false;
static bool bflag = false;
static void static void
usage(void) usage(void)
@ -50,28 +56,31 @@ main(int argc, char *argv[])
long i; long i;
FILE *fp; FILE *fp;
struct linebuf linebuf = EMPTY_LINEBUF; struct linebuf linebuf = EMPTY_LINEBUF;
int global_flags = 0;
ARGBEGIN { ARGBEGIN {
case 'n': case 'n':
nflag = true; global_flags |= MOD_N;
break; break;
case 'r': case 'r':
rflag = true; global_flags |= MOD_R;
break; break;
case 'u': case 'u':
uflag = true; uflag = true;
break; break;
case 'b': case 'b':
bflag = true; global_flags |= MOD_STARTB | MOD_ENDB;
break; break;
case 'k': case 'k':
addkeydef(EARGF(usage())); addkeydef(EARGF(usage()), global_flags);
break; break;
default: default:
usage(); usage();
} ARGEND; } ARGEND;
addkeydef("1"); if(!head && global_flags)
addkeydef("1", global_flags);
addkeydef("1", global_flags & MOD_R);
if(argc == 0) { if(argc == 0) {
getlines(stdin, &linebuf); getlines(stdin, &linebuf);
@ -98,7 +107,7 @@ main(int argc, char *argv[])
} }
static void static void
addkeydef(char *def) addkeydef(char *def, int flags)
{ {
struct kdlist *node; struct kdlist *node;
@ -107,7 +116,7 @@ addkeydef(char *def)
enprintf(2, "malloc:"); enprintf(2, "malloc:");
if(!head) if(!head)
head = node; head = node;
if(parse_keydef(&node->keydef, def)) if(parse_keydef(&node->keydef, def, flags))
enprintf(2, "faulty key definition\n"); enprintf(2, "faulty key definition\n");
if(curr) if(curr)
curr->next = node; curr->next = node;
@ -145,19 +154,42 @@ linecmp(const char **a, const char **b)
res = 0; res = 0;
else if(!(node == head) && !node->next) else if(!(node == head) && !node->next)
res = strcmp(s1, s2); res = strcmp(s1, s2);
else if(nflag) else if(node->keydef.flags & MOD_N)
res = strtol(s1, 0, 10) - strtol(s2, 0, 10); res = strtol(s1, 0, 10) - strtol(s2, 0, 10);
else else
res = strcmp(s1, s2); res = strcmp(s1, s2);
if(node->keydef.flags & MOD_R)
res = -res;
free(s1); free(s1);
free(s2); free(s2);
} }
return rflag ? -res : res; return res;
} }
static int static int
parse_keydef(struct keydef *kd, char *s) parse_flags(char **s, int *flags, int bflag)
{
while(isalpha(**s))
switch(*((*s)++)) {
case 'b':
*flags |= bflag;
break;
case 'n':
*flags |= MOD_N;
break;
case 'r':
*flags |= MOD_R;
break;
default:
return -1;
}
return 0;
}
static int
parse_keydef(struct keydef *kd, char *s, int flags)
{ {
char *rest = s; char *rest = s;
@ -166,6 +198,7 @@ parse_keydef(struct keydef *kd, char *s)
/* 0 means end of line */ /* 0 means end of line */
kd->end_column = 0; kd->end_column = 0;
kd->end_char = 0; kd->end_char = 0;
kd->flags = flags;
kd->start_column = strtol(rest, &rest, 10); kd->start_column = strtol(rest, &rest, 10);
if(kd->start_column < 1) if(kd->start_column < 1)
@ -174,6 +207,8 @@ parse_keydef(struct keydef *kd, char *s)
kd->start_char = strtol(rest+1, &rest, 10); kd->start_char = strtol(rest+1, &rest, 10);
if(kd->start_char < 1) if(kd->start_char < 1)
return -1; return -1;
if(parse_flags(&rest, &kd->flags, MOD_STARTB) == -1)
return -1;
if(*rest == ',') { if(*rest == ',') {
kd->end_column = strtol(rest+1, &rest, 10); kd->end_column = strtol(rest+1, &rest, 10);
if(kd->end_column && kd->end_column < kd->start_column) if(kd->end_column && kd->end_column < kd->start_column)
@ -183,6 +218,8 @@ parse_keydef(struct keydef *kd, char *s)
if(kd->end_char < 1) if(kd->end_char < 1)
return -1; return -1;
} }
if(parse_flags(&rest, &kd->flags, MOD_ENDB) == -1)
return -1;
} }
if(*rest != '\0') if(*rest != '\0')
return -1; return -1;
@ -206,7 +243,7 @@ next_blank(char *s)
} }
static char * static char *
skip_columns(char *s, size_t n) skip_columns(char *s, size_t n, bool bflag)
{ {
size_t i; size_t i;
@ -223,10 +260,9 @@ skip_columns(char *s, size_t n)
static char * static char *
end_column(char *s) end_column(char *s)
{ {
if(bflag) if(isblank(*s))
s = next_nonblank(s);
return next_blank(s); return next_blank(s);
else
return next_blank(next_nonblank(s));
} }
static char * static char *
@ -235,11 +271,11 @@ columns(char *line, const struct keydef *kd)
char *start, *end; char *start, *end;
char *res; char *res;
start = skip_columns(line, kd->start_column); start = skip_columns(line, kd->start_column, kd->flags & MOD_STARTB);
start += MIN(kd->start_char, end_column(start) - start) - 1; start += MIN(kd->start_char, end_column(start) - start) - 1;
if(kd->end_column) { if(kd->end_column) {
end = skip_columns(line, kd->end_column); end = skip_columns(line, kd->end_column, kd->flags & MOD_ENDB);
if(kd->end_char) if(kd->end_char)
end += MIN(kd->end_char, end_column(end) - end); end += MIN(kd->end_char, end_column(end) - end);
else else