Add UTF-8-support to strings(1), add t-flag and refactor code
Previously, the string-length was limited to BUFSIZ, which is an obvious deficiency. Now the buffer only needs to be as long as the user specifies the minimal string length. I added UTF-8-support, because that's how POSIX wants it and there are cases where you need this. It doesn't add ELF-barf compared to the previous implementation. The t-flag is also pretty important for POSIX-compliance, so I added it. The only trouble previously was the a-flag, but given that POSIX leaves undefined what the a-flag actually does, we set it as default and don't care about parsing ELF-headers, which has already turned out to be a security issue in GNU coreutils[0]. [0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html
This commit is contained in:
parent
949dafc171
commit
e5b5497773
2
README
2
README
|
@ -67,7 +67,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
|||
sort no -m, -o, -d, -f, -i
|
||||
=* split yes none
|
||||
=* sponge non-posix none
|
||||
strings no -t
|
||||
#* strings yes none
|
||||
=* sync non-posix none
|
||||
=* tail yes none
|
||||
=* tar non-posix none
|
||||
|
|
48
strings.1
48
strings.1
|
@ -1,32 +1,52 @@
|
|||
.Dd November 23, 2014
|
||||
.Dd Februrary 17, 2015
|
||||
.Dt STRINGS 1
|
||||
.Os sbase
|
||||
.Sh NAME
|
||||
.Nm strings
|
||||
.Nd print the strings of printable characters in files
|
||||
.Nd print strings of printable characters in files
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl a
|
||||
.Op Fl n Ar len
|
||||
.Op Fl n Ar num
|
||||
.Op Fl t Ar format
|
||||
.Op Ar file ...
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
prints the printable character sequences that are at least 4 characters
|
||||
long. If no
|
||||
.Ar files
|
||||
are given,
|
||||
writes sequences of at least 4 printable characters in each
|
||||
.Ar file
|
||||
to stdout.
|
||||
If no
|
||||
.Ar file
|
||||
is given,
|
||||
.Nm
|
||||
reads from stdin.
|
||||
.Sh OPTIONS
|
||||
.Bl -tag -width Ds
|
||||
.It Fl a
|
||||
Scan files in their entirety. This is the default.
|
||||
.It Fl n Ar len
|
||||
Only print sequences that are at least
|
||||
.Ar len
|
||||
characters. The default is 4 characters.
|
||||
Scan each
|
||||
.Ar file
|
||||
entirely. This is the default.
|
||||
.It Fl n Ar num
|
||||
Print sequences of at least
|
||||
.Ar num
|
||||
characters. The default is 4.
|
||||
.It Fl t Ar format
|
||||
Prepend each string with its byte offset, with
|
||||
.Ar format
|
||||
being one of
|
||||
.Sy d , o , x
|
||||
for decimal, octal or hexadecimal numbers.
|
||||
.El
|
||||
.Sh STANDARDS
|
||||
The
|
||||
.Nm
|
||||
mirrors the semantics of Plan9
|
||||
.Xr strings 1 .
|
||||
utility is compliant with the
|
||||
.St -p1003.1-2008
|
||||
specification.
|
||||
.Pp
|
||||
The
|
||||
.Op Fl t
|
||||
output format has been changed from "%F %s" to "%8lF: %s", with
|
||||
.Sy F
|
||||
being one of
|
||||
.Sy d , o , x .
|
||||
|
|
65
strings.c
65
strings.c
|
@ -1,50 +1,75 @@
|
|||
/* See LICENSE file for copyright and license details. */
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "utf.h"
|
||||
#include "util.h"
|
||||
|
||||
static void
|
||||
strings(FILE *fp, const char *fname, int len)
|
||||
{
|
||||
unsigned char buf[BUFSIZ];
|
||||
int c, i = 0;
|
||||
off_t offset = 0;
|
||||
static char *format = "";
|
||||
|
||||
do {
|
||||
offset++;
|
||||
if (isprint(c = getc(fp)))
|
||||
buf[i++] = c;
|
||||
if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) {
|
||||
buf[i] = '\0';
|
||||
printf("%8ld: %s\n", (long)offset - i - 1, buf);
|
||||
static void
|
||||
strings(FILE *fp, const char *fname, size_t len)
|
||||
{
|
||||
Rune r, *rbuf;
|
||||
size_t i, bread;
|
||||
off_t off;
|
||||
|
||||
rbuf = emalloc(len * sizeof(*rbuf));
|
||||
|
||||
for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) {
|
||||
off += bread;
|
||||
if (r == Runeerror)
|
||||
continue;
|
||||
else if (!isprintrune(r)) {
|
||||
if (i > len)
|
||||
putchar('\n');
|
||||
i = 0;
|
||||
continue;
|
||||
}
|
||||
} while (c != EOF);
|
||||
if (ferror(fp))
|
||||
eprintf("%s: read error:", fname);
|
||||
if (i < len) {
|
||||
rbuf[i++] = r;
|
||||
continue;
|
||||
} else if (i > len) {
|
||||
efputrune(&r, stdout, "<stdout>");
|
||||
continue;
|
||||
}
|
||||
printf(format, (long)off - i);
|
||||
for (i = 0; i < len; i++) {
|
||||
efputrune(rbuf + i, stdout, "<stdout>");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(rbuf);
|
||||
}
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0);
|
||||
eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
FILE *fp;
|
||||
size_t len = 4;
|
||||
int ret = 0;
|
||||
int len = 4;
|
||||
char f;
|
||||
|
||||
ARGBEGIN {
|
||||
case 'a':
|
||||
break;
|
||||
case 'n':
|
||||
len = estrtonum(EARGF(usage()), 1, INT_MAX);
|
||||
len = estrtonum(EARGF(usage()), 1, LLONG_MAX);
|
||||
break;
|
||||
case 't':
|
||||
format = estrdup("%8l#: ");
|
||||
f = *EARGF(usage());
|
||||
if (f == 'd' || f == 'o' || f == 'x')
|
||||
format[3] = f;
|
||||
else
|
||||
usage();
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
|
|
Loading…
Reference in New Issue
Block a user