Refactor cmp(1) code and manpage

The algorithm had some areas which had potential for improvement.
This should make cmp(1) faster.
There have been changes to behaviour as well:

1) If argv[0] and argv[1] are the same, cmp(1) returns Same.
2) POSIX specifies the format of the difference-message to be:
      "%s %s differ: char %d, line %d\n", file1, file2,
      <byte number>, <line number>
   However, as cmp(1) operates on bytes, not characters, I changed
   it to
      "%s %s differ: byte %d, line %d\n", file1, file2,
      <byte number>, <line number>
   This is one example where the standard just keeps the old format
   for backwards-compatibility. As this is harmful, this change
   makes sense in the sense of consistentcy (and because we take
   the difference of char and byte very seriously in sbase, as
   opposed to GNU coreutils).

The manpage has been annotated, reflecting the second change, and
sections shortened where possible.
Thus I marked cmp(1) as finished in README.
This commit is contained in:
FRIGN 2015-02-07 21:05:33 +01:00
parent d5d686e9f6
commit 79e45395e5
3 changed files with 48 additions and 43 deletions

2
README
View File

@ -17,7 +17,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= chown no -h, -H, -L, -P = chown no -h, -H, -L, -P
=* chroot non-posix none =* chroot non-posix none
=* cksum yes none =* cksum yes none
cmp yes none =* cmp yes none
#* cols non-posix none #* cols non-posix none
=* comm yes none =* comm yes none
= cp no -H, (-i), -L = cp no -H, (-i), -L

27
cmp.1
View File

@ -1,4 +1,4 @@
.Dd November 21, 2014 .Dd February 7, 2015
.Dt CMP 1 .Dt CMP 1
.Os sbase .Os sbase
.Sh NAME .Sh NAME
@ -10,23 +10,32 @@
.Ar file1 file2 .Ar file1 file2
.Sh DESCRIPTION .Sh DESCRIPTION
.Nm .Nm
compares two files byte by byte. If the files differ, compares
.Ar file1
and
.Ar file2
byte by byte. If they differ,
.Nm .Nm
prints the byte and writes the first differing byte- and line-number to stdout.
line number at which the difference occurred.
.Sh OPTIONS .Sh OPTIONS
.Bl -tag -width Ds .Bl -tag -width Ds
.It Fl l .It Fl l
Print the byte number, and the differing bytes (in octal), for each difference. Print byte-number and bytes (in octal) for each difference.
.It Fl s .It Fl s
Print nothing, only returns status. Print nothing and only return status.
.El .El
.Sh EXIT STATUS .Sh EXIT STATUS
.Bl -tag -width Ds .Bl -tag -width Ds
.It 0 .It 0
The files are identical. .Ar file1
and
.Ar file2
are identical.
.It 1 .It 1
The files are different. .Ar file1
and
.Ar file2
are different.
.It > 1 .It > 1
An error occurred. An error occurred.
.El .El
@ -39,3 +48,5 @@ The
utility is compliant with the utility is compliant with the
.St -p1003.1-2008 .St -p1003.1-2008
specification. specification.
.Pp
The "char" in the default result format has been replaced with "byte".

62
cmp.c
View File

@ -1,6 +1,7 @@
/* See LICENSE file for copyright and license details. */ /* See LICENSE file for copyright and license details. */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <unistd.h> #include <unistd.h>
#include "util.h" #include "util.h"
@ -16,12 +17,9 @@ usage(void)
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
int lflag = 0;
int sflag = 0;
int same = 1;
int b[2], i;
long line = 1, n = 1;
FILE *fp[2]; FILE *fp[2];
size_t i, line = 1, n = 1;
int lflag = 0, sflag = 0, same = 1, b[2];
ARGBEGIN { ARGBEGIN {
case 'l': case 'l':
@ -37,45 +35,41 @@ main(int argc, char *argv[])
if (argc != 2 || (lflag && sflag)) if (argc != 2 || (lflag && sflag))
usage(); usage();
if (argv[0][0] == '-' && !argv[0][1]) if (!strcmp(argv[0], argv[1]))
argv[0] = "/dev/fd/0"; return Same;
fp[0] = fopen(argv[0], "r");
if (!fp[0]) {
if (!sflag)
weprintf("fopen %s:", argv[0]);
exit(Error);
}
if (argv[1][0] == '-' && !argv[1][1]) for (i = 0; i < 2; i++) {
argv[1] = "/dev/fd/0"; if (argv[i][0] == '-' && !argv[i][1])
fp[1] = fopen(argv[1], "r"); argv[i] = "/dev/fd/0";
if (!fp[1]) { fp[i] = fopen(argv[i], "r");
if (!sflag) if (!fp[i]) {
weprintf("fopen %s:", argv[1]); if (!sflag)
exit(Error); weprintf("fopen %s:", argv[i]);
exit(Error);
}
} }
for (n = 1; ; n++) { for (n = 1; ; n++) {
b[0] = getc(fp[0]); b[0] = getc(fp[0]);
b[1] = getc(fp[1]); b[1] = getc(fp[1]);
if (b[0] == EOF && b[1] == EOF)
break; if (b[0] == b[1]) {
if (b[0] == '\n' && b[1] == '\n') if (b[0] == EOF)
line++; break;
if (b[0] == b[1]) else if (b[0] == '\n')
line++;
continue; continue;
for (i = 0; i < 2; i++) { }
if (b[i] == EOF) { if (b[0] == EOF || b[1] == EOF) {
if (!sflag) if (!sflag)
fprintf(stderr, "cmp: EOF on %s\n", fprintf(stderr, "cmp: EOF on %s\n",
!argv[i] ? "<stdin>" : argv[i]); argv[(b[0] == EOF) ? 0 : 1]);
exit(Diff); exit(Diff);
}
} }
if (!lflag) { if (!lflag) {
if (!sflag) if (!sflag)
printf("%s %s differ: char %ld, line %ld\n", printf("%s %s differ: byte %ld, line %ld\n",
argv[0], !argv[1] ? "<stdin>" : argv[1], n, line); argv[0], argv[1], n, line);
exit(Diff); exit(Diff);
} else { } else {
printf("%ld %o %o\n", n, b[0], b[1]); printf("%ld %o %o\n", n, b[0], b[1]);