tail: Process bytes with -c option, and add -m option for runes
POSIX says that -c specifies a number of bytes, not characters. This flag is commonly used by scripts that operate on binary files to things like extract a header. Treating the offsets as character offsets will break things in mysterious ways. Instead, add a -m option (chosen to match `wc -m`, which also operates on characters) to handle character offsets.
This commit is contained in:
		
				
					committed by
					
						 Anselm R Garbe
						Anselm R Garbe
					
				
			
			
				
	
			
			
			
						parent
						
							1ab4ac60ff
						
					
				
				
					commit
					ea8622a4ce
				
			
							
								
								
									
										6
									
								
								tail.1
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								tail.1
									
									
									
									
									
								
							| @@ -7,7 +7,7 @@ | |||||||
| .Sh SYNOPSIS | .Sh SYNOPSIS | ||||||
| .Nm | .Nm | ||||||
| .Op Fl f | .Op Fl f | ||||||
| .Op Fl c Ar num | Fl n Ar num | Fl Ns Ar num | .Op Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num | ||||||
| .Op Ar file ... | .Op Ar file ... | ||||||
| .Sh DESCRIPTION | .Sh DESCRIPTION | ||||||
| .Nm | .Nm | ||||||
| @@ -20,10 +20,10 @@ is given, | |||||||
| reads from stdin. | reads from stdin. | ||||||
| .Sh OPTIONS | .Sh OPTIONS | ||||||
| .Bl -tag -width Ds | .Bl -tag -width Ds | ||||||
| .It Fl c Ar num | Fl n Ar num | Fl Ns Ar num | .It Fl c Ar num | Fl m Ar num | Fl n Ar num | Fl Ns Ar num | ||||||
| Display final | Display final | ||||||
| .Ar num | .Ar num | ||||||
| characters | lines | | bytes | characters | lines | | ||||||
| lines. If | lines. If | ||||||
| .Ar num | .Ar num | ||||||
| begins with '+' | begins with '+' | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								tail.c
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								tail.c
									
									
									
									
									
								
							| @@ -26,12 +26,23 @@ dropinit(int fd, const char *fname, size_t count) | |||||||
| 		goto copy; | 		goto copy; | ||||||
| 	count--;  /* numbering starts at 1 */ | 	count--;  /* numbering starts at 1 */ | ||||||
| 	while (count && (n = read(fd, buf, sizeof(buf))) > 0) { | 	while (count && (n = read(fd, buf, sizeof(buf))) > 0) { | ||||||
| 		if (mode == 'n') { | 		switch (mode) { | ||||||
|  | 		case 'n':  /* lines */ | ||||||
| 			for (p = buf; count && n > 0; p++, n--) { | 			for (p = buf; count && n > 0; p++, n--) { | ||||||
| 				if (*p == '\n') | 				if (*p == '\n') | ||||||
| 					count--; | 					count--; | ||||||
| 			} | 			} | ||||||
| 		} else { | 			break; | ||||||
|  | 		case 'c':  /* bytes */ | ||||||
|  | 			if (count > n) { | ||||||
|  | 				count -= n; | ||||||
|  | 			} else { | ||||||
|  | 				p = buf + count; | ||||||
|  | 				n -= count; | ||||||
|  | 				count = 0; | ||||||
|  | 			} | ||||||
|  | 			break; | ||||||
|  | 		case 'm':  /* runes */ | ||||||
| 			for (p = buf; count && n > 0; p += nr, n -= nr, count--) { | 			for (p = buf; count && n > 0; p += nr, n -= nr, count--) { | ||||||
| 				nr = charntorune(&r, p, n); | 				nr = charntorune(&r, p, n); | ||||||
| 				if (!nr) { | 				if (!nr) { | ||||||
| @@ -42,6 +53,7 @@ dropinit(int fd, const char *fname, size_t count) | |||||||
| 					break; | 					break; | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  | 			break; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	if (count) { | 	if (count) { | ||||||
| @@ -90,7 +102,8 @@ taketail(int fd, const char *fname, size_t count) | |||||||
| 		if (n == 0) | 		if (n == 0) | ||||||
| 			break; | 			break; | ||||||
| 		len += n; | 		len += n; | ||||||
| 		if (mode == 'n') { | 		switch (mode) { | ||||||
|  | 		case 'n':  /* lines */ | ||||||
| 			/* ignore the last character; if it is a newline, it | 			/* ignore the last character; if it is a newline, it | ||||||
| 			 * ends the last line */ | 			 * ends the last line */ | ||||||
| 			for (p = buf + len - 2, left = count; p >= buf; p--) { | 			for (p = buf + len - 2, left = count; p >= buf; p--) { | ||||||
| @@ -102,7 +115,11 @@ taketail(int fd, const char *fname, size_t count) | |||||||
| 					break; | 					break; | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} else { | 			break; | ||||||
|  | 		case 'c':  /* bytes */ | ||||||
|  | 			p = count < len ? buf + len - count : buf; | ||||||
|  | 			break; | ||||||
|  | 		case 'm':  /* runes */ | ||||||
| 			for (p = buf + len - 1, left = count; p >= buf; p--) { | 			for (p = buf + len - 1, left = count; p >= buf; p--) { | ||||||
| 				/* skip utf-8 continuation bytes */ | 				/* skip utf-8 continuation bytes */ | ||||||
| 				if ((*p & 0xc0) == 0x80) | 				if ((*p & 0xc0) == 0x80) | ||||||
| @@ -111,6 +128,7 @@ taketail(int fd, const char *fname, size_t count) | |||||||
| 				if (!left) | 				if (!left) | ||||||
| 					break; | 					break; | ||||||
| 			} | 			} | ||||||
|  | 			break; | ||||||
| 		} | 		} | ||||||
| 		if (p > buf) { | 		if (p > buf) { | ||||||
| 			len -= p - buf; | 			len -= p - buf; | ||||||
| @@ -125,7 +143,7 @@ taketail(int fd, const char *fname, size_t count) | |||||||
| static void | static void | ||||||
| usage(void) | usage(void) | ||||||
| { | { | ||||||
| 	eprintf("usage: %s [-f] [-c num | -n num | -num] [file ...]\n", argv0); | 	eprintf("usage: %s [-f] [-c num | -m num | -n num | -num] [file ...]\n", argv0); | ||||||
| } | } | ||||||
|  |  | ||||||
| int | int | ||||||
| @@ -143,6 +161,7 @@ main(int argc, char *argv[]) | |||||||
| 		fflag = 1; | 		fflag = 1; | ||||||
| 		break; | 		break; | ||||||
| 	case 'c': | 	case 'c': | ||||||
|  | 	case 'm': | ||||||
| 	case 'n': | 	case 'n': | ||||||
| 		mode = ARGC(); | 		mode = ARGC(); | ||||||
| 		numstr = EARGF(usage()); | 		numstr = EARGF(usage()); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user