sbase/cmp.c

/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>

#include "util.h"

enum { Same = 0, Diff = 1, Error = 2 };

static void
usage(void)
{
	enprintf(Error, "usage: %s [-l | -s] file1 file2\n", argv0);
}

int
main(int argc, char *argv[])
{
	FILE *fp[2];
	size_t line = 1, n;
	int lflag = 0, sflag = 0, same = 1, b[2];

	ARGBEGIN {
	case 'l':
		lflag = 1;
		break;
	case 's':
		sflag = 1;
		break;
	default:
		usage();
	} ARGEND;

	if (argc != 2 || (lflag && sflag))
		usage();

	for (n = 0; n < 2; n++) {
		if (argv[n][0] == '-' && !argv[n][1]) {
			argv[n] = "<stdin>";
			fp[n] = stdin;
		} else {
			fp[n] = fopen(argv[n], "r");
			if (!fp[n]) {
				if (!sflag)
					weprintf("fopen %s:", argv[n]);
				exit(Error);
			}
		}
	}

	for (n = 1; ; n++) {
		b[0] = getc(fp[0]);
		b[1] = getc(fp[1]);

		if (b[0] == b[1]) {
			if (b[0] == EOF)
				break;
			else if (b[0] == '\n')
				line++;
			continue;
		}
		if (b[0] == EOF || b[1] == EOF) {
			if (!sflag)
				fprintf(stderr, "cmp: EOF on %s\n",
				        argv[(b[0] == EOF) ? 0 : 1]);
			exit(Diff);
		}
		if (!lflag) {
			if (!sflag)
				printf("%s %s differ: byte %ld, line %ld\n",
				       argv[0], argv[1], n, line);
			exit(Diff);
		} else {
			printf("%ld %o %o\n", n, b[0], b[1]);
			same = 0;
		}
	}
	fclose(fp[0]);
	fclose(fp[1]);

	return same ? Same : Diff;
}
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`/* See LICENSE file for copyright and license details. */`
			`#include <stdio.h>`
			`#include <stdlib.h>`
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00
update cmp, grep 2011-06-18 05:42:24 +00:00			`#include "util.h"`

			`enum { Same = 0, Diff = 1, Error = 2 };`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`static void`
			`usage(void)`
			`{`
Update cmp(1) manpage to new style and do not allow both -l and -s to be set 2014-11-21 12:03:27 +00:00			`enprintf(Error, "usage: %s [-l \| -s] file1 file2\n", argv0);`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`}`

add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`int`
			`main(int argc, char *argv[])`
			`{`
			`FILE *fp[2];`
Reuse 'n' in cmp(1) Factors out one more local variable. 2015-02-07 20:13:54 +00:00			`size_t line = 1, n;`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`int lflag = 0, sflag = 0, same = 1, b[2];`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`ARGBEGIN {`
			`case 'l':`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`lflag = 1;`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`break;`
			`case 's':`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`sflag = 1;`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`break;`
			`default:`
			`usage();`
			`} ARGEND;`

Update cmp(1) manpage to new style and do not allow both -l and -s to be set 2014-11-21 12:03:27 +00:00			`if (argc != 2 \|\| (lflag && sflag))`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`usage();`

Reuse 'n' in cmp(1) Factors out one more local variable. 2015-02-07 20:13:54 +00:00			`for (n = 0; n < 2; n++) {`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00			`if (argv[n][0] == '-' && !argv[n][1]) {`
			`argv[n] = "<stdin>";`
			`fp[n] = stdin;`
			`} else {`
			`fp[n] = fopen(argv[n], "r");`
			`if (!fp[n]) {`
			`if (!sflag)`
			`weprintf("fopen %s:", argv[n]);`
			`exit(Error);`
			`}`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`}`
cmp: fix missing braces just a style fix, this doesn't change any behaviour since fp[1] is set to stdin above. Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org> 2014-04-01 14:55:37 +00:00			`}`
We should not require both files to be present for cmp(1) If the second file is not present, read from standard input. 2014-03-04 10:33:51 +00:00
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00			`for (n = 1; ; n++) {`
cmp: make EOF check more clear Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org> 2014-04-01 13:06:42 +00:00			`b[0] = getc(fp[0]);`
			`b[1] = getc(fp[1]);`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00
			`if (b[0] == b[1]) {`
			`if (b[0] == EOF)`
			`break;`
			`else if (b[0] == '\n')`
			`line++;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`continue;`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`}`
			`if (b[0] == EOF \|\| b[1] == EOF) {`
			`if (!sflag)`
			`fprintf(stderr, "cmp: EOF on %s\n",`
			`argv[(b[0] == EOF) ? 0 : 1]);`
			`exit(Diff);`
cmp: dont output EOF difference is sflag is set Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org> 2014-04-01 13:14:21 +00:00			`}`
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00			`if (!lflag) {`
			`if (!sflag)`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`printf("%s %s differ: byte %ld, line %ld\n",`
			`argv[0], argv[1], n, line);`
update cmp, grep 2011-06-18 05:42:24 +00:00			`exit(Diff);`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`} else {`
cmp(1) is POSIX compliant so mention it in the manpage Update format specifiers for -l as specified by POSIX. 2014-11-21 12:31:23 +00:00			`printf("%ld %o %o\n", n, b[0], b[1]);`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`same = 0;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`}`
			`}`
No need to check for stdin on fclose() as we are not trying to read from it afterwards. 2015-02-07 20:42:41 +00:00			`fclose(fp[0]);`
			`fclose(fp[1]);`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00
update cmp, grep 2011-06-18 05:42:24 +00:00			`return same ? Same : Diff;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`}`