| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | /* See LICENSE file for copyright and license details. */ | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | #include "text.h"
 | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | #include "utf.h"
 | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | #include "util.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct Range { | 
					
						
							|  |  |  | 	size_t min, max; | 
					
						
							|  |  |  | 	struct Range *next; | 
					
						
							|  |  |  | } Range; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | static Range *list     = NULL; | 
					
						
							|  |  |  | static char   mode     = 0; | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | static char  *delim    = "\t"; | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | static size_t delimlen = 1; | 
					
						
							|  |  |  | static int    nflag    = 0; | 
					
						
							|  |  |  | static int    sflag    = 0; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | insert(Range *r) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	Range *l, *p, *t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	for (p = NULL, l = list; l; p = l, l = l->next) { | 
					
						
							|  |  |  | 		if (r->max && r->max + 1 < l->min) { | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 			r->next = l; | 
					
						
							|  |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		} else if (!l->max || r->min < l->max + 2) { | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 			l->min = MIN(r->min, l->min); | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 			for (p = l, t = l->next; t; p = t, t = t->next) | 
					
						
							|  |  |  | 				if (r->max && r->max + 1 < t->min) | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 					break; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 			l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0; | 
					
						
							|  |  |  | 			l->next = t; | 
					
						
							|  |  |  | 			return; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	if (p) | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 		p->next = r; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		list = r; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | parselist(char *str) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	char *s; | 
					
						
							|  |  |  | 	size_t n = 1; | 
					
						
							|  |  |  | 	Range *r; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	for (s = str; *s; s++) { | 
					
						
							|  |  |  | 		if (*s == ' ') | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 			*s = ','; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		if (*s == ',') | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 			n++; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-11-16 11:07:26 +01:00
										 |  |  | 	r = emalloc(n * sizeof(Range)); | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	for (s = str; n; n--, s++) { | 
					
						
							| 
									
										
										
										
											2013-10-10 23:03:15 +01:00
										 |  |  | 		r->min = (*s == '-') ? 1 : strtoul(s, &s, 10); | 
					
						
							|  |  |  | 		r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		r->next = NULL; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		if (!r->min || (r->max && r->max < r->min) || (*s && *s != ',')) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 			eprintf("cut: bad list value\n"); | 
					
						
							|  |  |  | 		insert(r++); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static size_t | 
					
						
							|  |  |  | seek(const char *s, size_t pos, size_t *prev, size_t count) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const char *t; | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 	size_t n = pos - *prev, i; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	if (mode == 'b') { | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 		if ((t = memchr(s, '\0', n))) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 			return t - s; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		if (nflag) | 
					
						
							|  |  |  | 			while (n && !UTF8_POINT(s[n])) | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 				n--; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		*prev += n; | 
					
						
							|  |  |  | 		return n; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	} else if (mode == 'c') { | 
					
						
							|  |  |  | 		for (n++, t = s; *t; t++) | 
					
						
							|  |  |  | 			if (UTF8_POINT(*t) && !--n) | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	} else { | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 		for (t = (count < delimlen + 1) ? s : s + delimlen; n && *t; ) { | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | 			if (!strncmp(t, delim, delimlen)) { | 
					
						
							|  |  |  | 				if (!--n && count) | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 					break; | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | 				t += delimlen; | 
					
						
							|  |  |  | 				continue; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			for (i = 1; !fullrune(t, i); i++); | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 			t += i; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	*prev = pos; | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	return t - s; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | cut(FILE *fp) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2014-12-16 19:46:59 +00:00
										 |  |  | 	static char *buf = NULL; | 
					
						
							|  |  |  | 	static size_t size = 0; | 
					
						
							|  |  |  | 	char *s; | 
					
						
							|  |  |  | 	size_t i, n, p; | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 	ssize_t len; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	Range *r; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-18 12:49:30 -08:00
										 |  |  | 	while ((len = getline(&buf, &size, fp)) != -1) { | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		if (len && buf[len - 1] == '\n') | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 			buf[len - 1] = '\0'; | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | 		if (mode == 'f' && !utfutf(buf, delim)) { | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 			if (!sflag) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 				puts(buf); | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 		for (i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) { | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 			s += seek(s, r->min, &p, i); | 
					
						
							|  |  |  | 			i += (mode == 'f') ? delimlen : 1; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 			if (!*s) | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 			if (!r->max) { | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 				fputs(s, stdout); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2015-01-22 12:32:50 +01:00
										 |  |  | 			n = seek(s, r->max + 1, &p, i); | 
					
						
							|  |  |  | 			i += (mode == 'f') ? delimlen : 1; | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 			if (fwrite(s, 1, n, stdout) != n) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 				eprintf("write error:"); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		putchar('\n'); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | static size_t | 
					
						
							|  |  |  | resolveescapes(char *s, size_t len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	size_t i, off, m; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < len - 1; i++) { | 
					
						
							|  |  |  | 		if (s[i] != '\\') | 
					
						
							|  |  |  | 			continue; | 
					
						
							|  |  |  | 		off = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		switch (s[i + 1]) { | 
					
						
							|  |  |  |                 case '\\': s[i] = '\\'; off++; break; | 
					
						
							|  |  |  |                 case 'a':  s[i] = '\a'; off++; break; | 
					
						
							|  |  |  |                 case 'b':  s[i] = '\b'; off++; break; | 
					
						
							|  |  |  |                 case 'f':  s[i] = '\f'; off++; break; | 
					
						
							|  |  |  |                 case 'n':  s[i] = '\n'; off++; break; | 
					
						
							|  |  |  |                 case 'r':  s[i] = '\r'; off++; break; | 
					
						
							|  |  |  |                 case 't':  s[i] = '\t'; off++; break; | 
					
						
							|  |  |  |                 case 'v':  s[i] = '\v'; off++; break; | 
					
						
							|  |  |  | 		default:   continue; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for (m = i + 1; m <= len - off; m++) | 
					
						
							|  |  |  | 			s[m] = s[m + off]; | 
					
						
							|  |  |  | 		len -= off; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return len; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-18 11:30:31 +01:00
										 |  |  | static void | 
					
						
							|  |  |  | usage(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	eprintf("usage: cut -b list [-n] [file ...]\n" | 
					
						
							|  |  |  | 	        "       cut -c list [file ...]\n" | 
					
						
							|  |  |  | 	        "       cut -f list [-d delim] [-s] [file ...]\n"); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | int | 
					
						
							|  |  |  | main(int argc, char *argv[]) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	FILE *fp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ARGBEGIN { | 
					
						
							|  |  |  | 	case 'b': | 
					
						
							|  |  |  | 	case 'c': | 
					
						
							|  |  |  | 	case 'f': | 
					
						
							|  |  |  | 		mode = ARGC(); | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | 		parselist(EARGF(usage())); | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		break; | 
					
						
							|  |  |  | 	case 'd': | 
					
						
							| 
									
										
										
										
											2015-01-22 20:19:48 +01:00
										 |  |  | 		delim = EARGF(usage()); | 
					
						
							|  |  |  | 		delimlen = resolveescapes(delim, strlen(delim)); | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		break; | 
					
						
							|  |  |  | 	case 'n': | 
					
						
							| 
									
										
										
										
											2014-11-13 21:24:47 +01:00
										 |  |  | 		nflag = 1; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		break; | 
					
						
							|  |  |  | 	case 's': | 
					
						
							| 
									
										
										
										
											2014-11-13 21:24:47 +01:00
										 |  |  | 		sflag = 1; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		break; | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		usage(); | 
					
						
							|  |  |  | 	} ARGEND; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-11-13 18:29:30 +01:00
										 |  |  | 	if (!mode) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		usage(); | 
					
						
							| 
									
										
										
										
											2015-01-18 11:30:31 +01:00
										 |  |  | 	if (!argc) | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 		cut(stdin); | 
					
						
							| 
									
										
										
										
											2015-01-18 11:30:31 +01:00
										 |  |  | 	else for (; argc--; argv++) { | 
					
						
							|  |  |  | 		if (!strcmp(*argv, "-")) | 
					
						
							| 
									
										
										
										
											2014-12-17 21:14:14 +01:00
										 |  |  | 			cut(stdin); | 
					
						
							| 
									
										
										
										
											2015-01-18 11:30:31 +01:00
										 |  |  | 		else { | 
					
						
							| 
									
										
										
										
											2014-12-17 21:14:14 +01:00
										 |  |  | 			if (!(fp = fopen(*argv, "r"))) { | 
					
						
							| 
									
										
										
										
											2014-06-01 14:39:34 +02:00
										 |  |  | 				weprintf("fopen %s:", *argv); | 
					
						
							|  |  |  | 				continue; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			cut(fp); | 
					
						
							| 
									
										
										
										
											2014-12-17 21:14:14 +01:00
										 |  |  | 			fclose(fp); | 
					
						
							| 
									
										
										
										
											2013-11-13 11:39:24 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2014-10-02 23:46:04 +01:00
										 |  |  | 	return 0; | 
					
						
							| 
									
										
										
										
											2013-10-08 20:39:08 +01:00
										 |  |  | } |