sbase/cmp.c

/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"

static void
usage(void)
{
	enprintf(2, "usage: %s [-l | -s] file1 file2\n", argv0);
}

int
main(int argc, char *argv[])
{
	FILE *fp[2];
	size_t line = 1, n;
	int ret = 0, lflag = 0, sflag = 0, same = 1, b[2];

	ARGBEGIN {
	case 'l':
		lflag = 1;
		break;
	case 's':
		sflag = 1;
		break;
	default:
		usage();
	} ARGEND

	if (argc != 2 || (lflag && sflag))
		usage();

	for (n = 0; n < 2; n++) {
		if (!strcmp(argv[n], "-")) {
			argv[n] = "<stdin>";
			fp[n] = stdin;
		} else {
			if (!(fp[n] = fopen(argv[n], "r"))) {
				if (!sflag)
					weprintf("fopen %s:", argv[n]);
				return 2;
			}
		}
	}

	for (n = 1; ; n++) {
		b[0] = getc(fp[0]);
		b[1] = getc(fp[1]);

		if (b[0] == b[1]) {
			if (b[0] == EOF)
				break;
			else if (b[0] == '\n')
				line++;
			continue;
		} else if (b[0] == EOF || b[1] == EOF) {
			if (!sflag)
				weprintf("cmp: EOF on %s\n", argv[(b[0] != EOF)]);
			same = 0;
			break;
		} else if (!lflag) {
			if (!sflag)
				printf("%s %s differ: byte %zu, line %zu\n",
				       argv[0], argv[1], n, line);
			same = 0;
			break;
		} else {
			printf("%zu %o %o\n", n, b[0], b[1]);
			same = 0;
		}
	}

	if (!ret)
		ret = !same;
	if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) |
	    fshut(stdout, "<stdout>"))
		ret = 2;

	return ret;
}
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`/* See LICENSE file for copyright and license details. */`
			`#include <stdio.h>`
			`#include <stdlib.h>`
Remove handrolled strcmp()'s Favor readability over bare-metal. 2015-05-19 15:44:15 +00:00			`#include <string.h>`
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00
update cmp, grep 2011-06-18 05:42:24 +00:00			`#include "util.h"`

Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`static void`
			`usage(void)`
			`{`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`enprintf(2, "usage: %s [-l \| -s] file1 file2\n", argv0);`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`}`

add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`int`
			`main(int argc, char *argv[])`
			`{`
			`FILE *fp[2];`
Reuse 'n' in cmp(1) Factors out one more local variable. 2015-02-07 20:13:54 +00:00			`size_t line = 1, n;`
Simplify return & fshut() logic Get rid of the !!()-constructs and use ret where available (or introduce it). In some cases, there would be an "abort" on the first fshut-error, but we want to close all files and report all warnings and then quit, not just the warning for the first file. 2015-05-24 23:33:19 +00:00			`int ret = 0, lflag = 0, sflag = 0, same = 1, b[2];`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`ARGBEGIN {`
			`case 'l':`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`lflag = 1;`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`break;`
			`case 's':`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`sflag = 1;`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`break;`
			`default:`
			`usage();`
No need for semicolon after ARGEND This is also the style used in Plan 9. 2015-11-01 10:16:49 +00:00			`} ARGEND`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00
Print usage() when -l and -s are both given in cmp(1) This is unspecified behaviour, not undefined by POSIX. So we break out here. 2015-03-11 12:31:08 +00:00			`if (argc != 2 \|\| (lflag && sflag))`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`usage();`

Reuse 'n' in cmp(1) Factors out one more local variable. 2015-02-07 20:13:54 +00:00			`for (n = 0; n < 2; n++) {`
Remove handrolled strcmp()'s Favor readability over bare-metal. 2015-05-19 15:44:15 +00:00			`if (!strcmp(argv[n], "-")) {`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00			`argv[n] = "<stdin>";`
			`fp[n] = stdin;`
			`} else {`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`if (!(fp[n] = fopen(argv[n], "r"))) {`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00			`if (!sflag)`
			`weprintf("fopen %s:", argv[n]);`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`return 2;`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00			`}`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`}`
cmp: fix missing braces just a style fix, this doesn't change any behaviour since fp[1] is set to stdin above. Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org> 2014-04-01 14:55:37 +00:00			`}`
We should not require both files to be present for cmp(1) If the second file is not present, read from standard input. 2014-03-04 10:33:51 +00:00
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00			`for (n = 1; ; n++) {`
cmp: make EOF check more clear Signed-off-by: Hiltjo Posthuma <hiltjo@codemadness.org> 2014-04-01 13:06:42 +00:00			`b[0] = getc(fp[0]);`
			`b[1] = getc(fp[1]);`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00
			`if (b[0] == b[1]) {`
			`if (b[0] == EOF)`
			`break;`
			`else if (b[0] == '\n')`
			`line++;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`continue;`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`} else if (b[0] == EOF \|\| b[1] == EOF) {`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`if (!sflag)`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`weprintf("cmp: EOF on %s\n", argv[(b[0] != EOF)]);`
			`same = 0;`
			`break;`
			`} else if (!lflag) {`
Fix coding style It was about damn time. Consistency is very important in such a big codebase. 2014-11-13 17:29:30 +00:00			`if (!sflag)`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`printf("%s %s differ: byte %zu, line %zu\n",`
Refactor cmp(1) code and manpage The algorithm had some areas which had potential for improvement. This should make cmp(1) faster. There have been changes to behaviour as well: 1) If argv[0] and argv[1] are the same, cmp(1) returns Same. 2) POSIX specifies the format of the difference-message to be: "%s %s differ: char %d, line %d\n", file1, file2, <byte number>, <line number> However, as cmp(1) operates on bytes, not characters, I changed it to "%s %s differ: byte %d, line %d\n", file1, file2, <byte number>, <line number> This is one example where the standard just keeps the old format for backwards-compatibility. As this is harmful, this change makes sense in the sense of consistentcy (and because we take the difference of char and byte very seriously in sbase, as opposed to GNU coreutils). The manpage has been annotated, reflecting the second change, and sections shortened where possible. Thus I marked cmp(1) as finished in README. 2015-02-07 20:05:33 +00:00			`argv[0], argv[1], n, line);`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`same = 0;`
			`break;`
Eliminating the getopt disgrace. 2013-06-14 18:20:47 +00:00			`} else {`
Audit cmp(1) 1) Remove the return-value-enum, which is not necessary for a simple program like this. 2) Don't disallow both l and s to be specified. This is undefined behaviour defined by POSIX, so we don't start demanding things from the user. 3) Replace exit() with return (we are in main). 4) Refactor main loop to never return in the loop, but actually set the same-value and break, which increases readability. 5) Remove the final fclose()'s. The OS will take care of them, no need to become cleansy here. 6) Use idiomatic return-value using same. This concludes the increase of readability in the main-loop. 2015-03-11 10:16:40 +00:00			`printf("%zu %o %o\n", n, b[0], b[1]);`
Un-boolify sbase It actually makes the binaries smaller, the code easier to read (gems like "val == true", "val == false" are gone) and actually predictable in the sense of that we actually know what we're working with (one bitwise operator was quite adventurous and should now be fixed). This is also more consistent with the other suckless projects around which don't use boolean types. 2014-11-13 20:24:47 +00:00			`same = 0;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`}`
			`}`
Handle stdin properly in cmp(1) and remove path comparison Same paths are undefined behaviour, let's not endorse it. 2015-02-07 20:36:36 +00:00
Simplify return & fshut() logic Get rid of the !!()-constructs and use ret where available (or introduce it). In some cases, there would be an "abort" on the first fshut-error, but we want to close all files and report all warnings and then quit, not just the warning for the first file. 2015-05-24 23:33:19 +00:00			`if (!ret)`
			`ret = !same;`
			`if (fshut(fp[0], argv[0]) \| (fp[0] != fp[1] && fshut(fp[1], argv[1])) \|`
			`fshut(stdout, "<stdout>"))`
			`ret = 2;`

			`return ret;`
add cmp, thanks stateless 2011-06-10 00:55:12 +00:00			`}`