Add UTF-8-support to strings(1), add t-flag and refactor code
Previously, the string-length was limited to BUFSIZ, which is an obvious deficiency. Now the buffer only needs to be as long as the user specifies the minimal string length. I added UTF-8-support, because that's how POSIX wants it and there are cases where you need this. It doesn't add ELF-barf compared to the previous implementation. The t-flag is also pretty important for POSIX-compliance, so I added it. The only trouble previously was the a-flag, but given that POSIX leaves undefined what the a-flag actually does, we set it as default and don't care about parsing ELF-headers, which has already turned out to be a security issue in GNU coreutils[0]. [0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html
This commit is contained in:
		
							
								
								
									
										65
									
								
								strings.c
									
									
									
									
									
								
							
							
						
						
									
										65
									
								
								strings.c
									
									
									
									
									
								
							@@ -1,50 +1,75 @@
 | 
			
		||||
/* See LICENSE file for copyright and license details. */
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <stdlib.h>
 | 
			
		||||
 | 
			
		||||
#include "utf.h"
 | 
			
		||||
#include "util.h"
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
strings(FILE *fp, const char *fname, int len)
 | 
			
		||||
{
 | 
			
		||||
	unsigned char buf[BUFSIZ];
 | 
			
		||||
	int c, i = 0;
 | 
			
		||||
	off_t offset = 0;
 | 
			
		||||
static char *format = "";
 | 
			
		||||
 | 
			
		||||
	do {
 | 
			
		||||
		offset++;
 | 
			
		||||
		if (isprint(c = getc(fp)))
 | 
			
		||||
			buf[i++] = c;
 | 
			
		||||
		if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) {
 | 
			
		||||
			buf[i] = '\0';
 | 
			
		||||
			printf("%8ld: %s\n", (long)offset - i - 1, buf);
 | 
			
		||||
static void
 | 
			
		||||
strings(FILE *fp, const char *fname, size_t len)
 | 
			
		||||
{
 | 
			
		||||
	Rune r, *rbuf;
 | 
			
		||||
	size_t i, bread;
 | 
			
		||||
	off_t off;
 | 
			
		||||
 | 
			
		||||
	rbuf = emalloc(len * sizeof(*rbuf));
 | 
			
		||||
 | 
			
		||||
	for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) {
 | 
			
		||||
		off += bread;
 | 
			
		||||
		if (r == Runeerror)
 | 
			
		||||
			continue;
 | 
			
		||||
		else if (!isprintrune(r)) {
 | 
			
		||||
			if (i > len)
 | 
			
		||||
				putchar('\n');
 | 
			
		||||
			i = 0;
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
	} while (c != EOF);
 | 
			
		||||
	if (ferror(fp))
 | 
			
		||||
		eprintf("%s: read error:", fname);
 | 
			
		||||
		if (i < len) {
 | 
			
		||||
			rbuf[i++] = r;
 | 
			
		||||
			continue;
 | 
			
		||||
		} else if (i > len) {
 | 
			
		||||
			efputrune(&r, stdout, "<stdout>");
 | 
			
		||||
			continue;
 | 
			
		||||
		}
 | 
			
		||||
		printf(format, (long)off - i);
 | 
			
		||||
		for (i = 0; i < len; i++) {
 | 
			
		||||
			efputrune(rbuf + i, stdout, "<stdout>");
 | 
			
		||||
		}
 | 
			
		||||
		i++;
 | 
			
		||||
	}
 | 
			
		||||
	free(rbuf);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
usage(void)
 | 
			
		||||
{
 | 
			
		||||
	eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0);
 | 
			
		||||
	eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int
 | 
			
		||||
main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
	FILE *fp;
 | 
			
		||||
	size_t len = 4;
 | 
			
		||||
	int ret = 0;
 | 
			
		||||
	int len = 4;
 | 
			
		||||
	char f;
 | 
			
		||||
 | 
			
		||||
	ARGBEGIN {
 | 
			
		||||
	case 'a':
 | 
			
		||||
		break;
 | 
			
		||||
	case 'n':
 | 
			
		||||
		len = estrtonum(EARGF(usage()), 1, INT_MAX);
 | 
			
		||||
		len = estrtonum(EARGF(usage()), 1, LLONG_MAX);
 | 
			
		||||
		break;
 | 
			
		||||
	case 't':
 | 
			
		||||
		format = estrdup("%8l#: ");
 | 
			
		||||
		f = *EARGF(usage());
 | 
			
		||||
		if (f == 'd' || f == 'o' || f == 'x')
 | 
			
		||||
			format[3] = f;
 | 
			
		||||
		else
 | 
			
		||||
			usage();
 | 
			
		||||
		break;
 | 
			
		||||
	default:
 | 
			
		||||
		usage();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user