Add UTF-8-support to strings(1), add t-flag and refactor code
Previously, the string-length was limited to BUFSIZ, which is an obvious deficiency. Now the buffer only needs to be as long as the user specifies the minimal string length. I added UTF-8-support, because that's how POSIX wants it and there are cases where you need this. It doesn't add ELF-barf compared to the previous implementation. The t-flag is also pretty important for POSIX-compliance, so I added it. The only trouble previously was the a-flag, but given that POSIX leaves undefined what the a-flag actually does, we set it as default and don't care about parsing ELF-headers, which has already turned out to be a security issue in GNU coreutils[0]. [0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html
This commit is contained in:
parent
949dafc171
commit
e5b5497773
2
README
2
README
|
@ -67,7 +67,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
|
||||||
sort no -m, -o, -d, -f, -i
|
sort no -m, -o, -d, -f, -i
|
||||||
=* split yes none
|
=* split yes none
|
||||||
=* sponge non-posix none
|
=* sponge non-posix none
|
||||||
strings no -t
|
#* strings yes none
|
||||||
=* sync non-posix none
|
=* sync non-posix none
|
||||||
=* tail yes none
|
=* tail yes none
|
||||||
=* tar non-posix none
|
=* tar non-posix none
|
||||||
|
|
48
strings.1
48
strings.1
|
@ -1,32 +1,52 @@
|
||||||
.Dd November 23, 2014
|
.Dd Februrary 17, 2015
|
||||||
.Dt STRINGS 1
|
.Dt STRINGS 1
|
||||||
.Os sbase
|
.Os sbase
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
.Nm strings
|
.Nm strings
|
||||||
.Nd print the strings of printable characters in files
|
.Nd print strings of printable characters in files
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Nm
|
.Nm
|
||||||
.Op Fl a
|
.Op Fl a
|
||||||
.Op Fl n Ar len
|
.Op Fl n Ar num
|
||||||
|
.Op Fl t Ar format
|
||||||
.Op Ar file ...
|
.Op Ar file ...
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
.Nm
|
.Nm
|
||||||
prints the printable character sequences that are at least 4 characters
|
writes sequences of at least 4 printable characters in each
|
||||||
long. If no
|
.Ar file
|
||||||
.Ar files
|
to stdout.
|
||||||
are given,
|
If no
|
||||||
|
.Ar file
|
||||||
|
is given,
|
||||||
.Nm
|
.Nm
|
||||||
reads from stdin.
|
reads from stdin.
|
||||||
.Sh OPTIONS
|
.Sh OPTIONS
|
||||||
.Bl -tag -width Ds
|
.Bl -tag -width Ds
|
||||||
.It Fl a
|
.It Fl a
|
||||||
Scan files in their entirety. This is the default.
|
Scan each
|
||||||
.It Fl n Ar len
|
.Ar file
|
||||||
Only print sequences that are at least
|
entirely. This is the default.
|
||||||
.Ar len
|
.It Fl n Ar num
|
||||||
characters. The default is 4 characters.
|
Print sequences of at least
|
||||||
|
.Ar num
|
||||||
|
characters. The default is 4.
|
||||||
|
.It Fl t Ar format
|
||||||
|
Prepend each string with its byte offset, with
|
||||||
|
.Ar format
|
||||||
|
being one of
|
||||||
|
.Sy d , o , x
|
||||||
|
for decimal, octal or hexadecimal numbers.
|
||||||
.El
|
.El
|
||||||
.Sh STANDARDS
|
.Sh STANDARDS
|
||||||
|
The
|
||||||
.Nm
|
.Nm
|
||||||
mirrors the semantics of Plan9
|
utility is compliant with the
|
||||||
.Xr strings 1 .
|
.St -p1003.1-2008
|
||||||
|
specification.
|
||||||
|
.Pp
|
||||||
|
The
|
||||||
|
.Op Fl t
|
||||||
|
output format has been changed from "%F %s" to "%8lF: %s", with
|
||||||
|
.Sy F
|
||||||
|
being one of
|
||||||
|
.Sy d , o , x .
|
||||||
|
|
65
strings.c
65
strings.c
|
@ -1,50 +1,75 @@
|
||||||
/* See LICENSE file for copyright and license details. */
|
/* See LICENSE file for copyright and license details. */
|
||||||
#include <ctype.h>
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "utf.h"
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
|
|
||||||
static void
|
static char *format = "";
|
||||||
strings(FILE *fp, const char *fname, int len)
|
|
||||||
{
|
|
||||||
unsigned char buf[BUFSIZ];
|
|
||||||
int c, i = 0;
|
|
||||||
off_t offset = 0;
|
|
||||||
|
|
||||||
do {
|
static void
|
||||||
offset++;
|
strings(FILE *fp, const char *fname, size_t len)
|
||||||
if (isprint(c = getc(fp)))
|
{
|
||||||
buf[i++] = c;
|
Rune r, *rbuf;
|
||||||
if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) {
|
size_t i, bread;
|
||||||
buf[i] = '\0';
|
off_t off;
|
||||||
printf("%8ld: %s\n", (long)offset - i - 1, buf);
|
|
||||||
|
rbuf = emalloc(len * sizeof(*rbuf));
|
||||||
|
|
||||||
|
for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) {
|
||||||
|
off += bread;
|
||||||
|
if (r == Runeerror)
|
||||||
|
continue;
|
||||||
|
else if (!isprintrune(r)) {
|
||||||
|
if (i > len)
|
||||||
|
putchar('\n');
|
||||||
i = 0;
|
i = 0;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
} while (c != EOF);
|
if (i < len) {
|
||||||
if (ferror(fp))
|
rbuf[i++] = r;
|
||||||
eprintf("%s: read error:", fname);
|
continue;
|
||||||
|
} else if (i > len) {
|
||||||
|
efputrune(&r, stdout, "<stdout>");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
printf(format, (long)off - i);
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
efputrune(rbuf + i, stdout, "<stdout>");
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
free(rbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0);
|
eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char *argv[])
|
main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
size_t len = 4;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int len = 4;
|
char f;
|
||||||
|
|
||||||
ARGBEGIN {
|
ARGBEGIN {
|
||||||
case 'a':
|
case 'a':
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
len = estrtonum(EARGF(usage()), 1, INT_MAX);
|
len = estrtonum(EARGF(usage()), 1, LLONG_MAX);
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
format = estrdup("%8l#: ");
|
||||||
|
f = *EARGF(usage());
|
||||||
|
if (f == 'd' || f == 'o' || f == 'x')
|
||||||
|
format[3] = f;
|
||||||
|
else
|
||||||
|
usage();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
|
|
Loading…
Reference in New Issue
Block a user