aboutsummaryrefslogtreecommitdiff
path: root/ch7/7-07_pattern-with-file.c
blob: 46732c4928b2d4fbfa7a1610eba37ff902e2550c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <stdio.h>
#include <string.h>

/* The C Programming Language: 2nd Edition
 *
 * Exercise 7-7: Modify the pattern finding program of Chapter 5 to take its
 * input from a set of named files or, if no files are named as arguments, from
 * the standard input. Should the file name be printed when a matching line is
 * found?
 *
 * Notes: The program in question is on page 117 of the book. Note from various
 * errata documents (found around the Web) that the line:
 *
 * while (c = *++argv[0])
 *
 * is not correct, since it's attempting to modify something that the standard
 * makes no guarantee to be modifiable. So to monkey-patch it, I used the
 * typical 'i is the iterator' variable.
 *
 * The biggest obstacle to this exercise was correcting the original code from
 * page 117.
 *
 * A gotcha that I came across is figuring out how to make use of or save the
 * pattern that the user will be searching for. I could have allocated a
 * character array (string) and stored it directly, but it made more sense to
 * create another character pointer and leverage the storage that argv was
 * already making use of. So I ended up with two pointers to the same data,
 * like hardlinks in a *nix filesystem.
 *
 * The filename that a pattern is found in *should* be present, because it's
 * trivial to match text in multiple files. Generally when one is looking for
 * text, they want to *edit* that text, and editing it is fruitless if you
 * don't have the file name handy. That said, putting it behind an option is a
 * great way to add extensibility to it.
 */

#define MAXLINE 1000

int get_line(char *line, int max, FILE *f);

int main(int argc, char *argv[]) {
	char line[MAXLINE];
	long lineno = 0;
	int c, except = 0, number = 0, showfiles = 0, found = 0, i = 0;
	char *pattern = NULL;
	char *filename = NULL;
	FILE *infile = NULL;
	if (argc < 2) {
		printf("pwf: please specify a pattern to search for\n");
		return 1;
	}
	while (--argc > 0 && (*++argv)[0] == '-') {
		i = 0;
		while ((c = (*argv)[++i]) != '\0') {
			switch (c) {
				case 'x':
					except = 1;
					break;
				case 'n':
					number = 1;
					break;
				case 'f':
					showfiles = 1;
					break;
				case 'h':
					printf("Usage: pwf [-{xnfh}] PATTERN [file] [file] ...\n");
					break;
				default:
					printf("pwf: illegal option %c\n", c);
					argc = 0;
					found = -1;
					break;
			}
		}
	}
	/* The first argument _after_ options should be the pattern; without
	 * a pattern we shouldn't be accepting anything at all.
	 */
	pattern = *argv;
	/* This handles the edge-case of no file names given. It's somewhat of an
	 * ugly hack since it fools argc, but it gets the job done. */
	if (argc == 1) {
		infile = stdin;
		filename = "stdin";
		argc++;
	}
	while (--argc > 0) {
		/* Now we handle the case of a given filename */
		if (infile != stdin) {
			filename = *(++argv);
			infile = fopen(filename, "r");
			if (filename == NULL || infile == NULL) {
				printf("File %s could not be opened; falling back to stdin...\n", filename);
				infile = stdin;
				filename = "stdin";
			}
		}
		/* We have _some_ usable file to work with, now. Let's do the thing */
		lineno = 0;
		while (get_line(line, MAXLINE, infile) > 0) {
			lineno++;
			if ((strstr(line, pattern) != NULL) != except) {
				if (showfiles) {
					printf("%s:", filename);
				}
				if (number) {
					printf("%ld:", lineno);
				}
				printf("%s", line);
				found++;
			}
		}
	}
	return found;
}

int get_line(char *s, int lim, FILE *f) {
	int c, i;
	for (i = 0; i < lim - 1 && (c = getc(f)) != EOF && c != '\n'; ++i) {
		*s++ = c;
	}
	if (c == '\n') {
		*s++ = c;
		i++;
	}
	*s = '\0';
	return i;
}