From 0947afc6a1338a0408c8d6fe12cda08f87cca2bb Mon Sep 17 00:00:00 2001 From: zlg Date: Mon, 21 Nov 2016 00:07:37 -0800 Subject: Solve Exercise 7-7: Match pattern in files This one was tricky, but the solution was rather simple. --- ch7/7-07_pattern-with-file.c | 128 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 ch7/7-07_pattern-with-file.c diff --git a/ch7/7-07_pattern-with-file.c b/ch7/7-07_pattern-with-file.c new file mode 100644 index 0000000..46732c4 --- /dev/null +++ b/ch7/7-07_pattern-with-file.c @@ -0,0 +1,128 @@ +#include +#include + +/* The C Programming Language: 2nd Edition + * + * Exercise 7-7: Modify the pattern finding program of Chapter 5 to take its + * input from a set of named files or, if no files are named as arguments, from + * the standard input. Should the file name be printed when a matching line is + * found? + * + * Notes: The program in question is on page 117 of the book. Note from various + * errata documents (found around the Web) that the line: + * + * while (c = *++argv[0]) + * + * is not correct, since it's attempting to modify something that the standard + * makes no guarantee to be modifiable. So to monkey-patch it, I used the + * typical 'i is the iterator' variable. + * + * The biggest obstacle to this exercise was correcting the original code from + * page 117. + * + * A gotcha that I came across is figuring out how to make use of or save the + * pattern that the user will be searching for. I could have allocated a + * character array (string) and stored it directly, but it made more sense to + * create another character pointer and leverage the storage that argv was + * already making use of. So I ended up with two pointers to the same data, + * like hardlinks in a *nix filesystem. + * + * The filename that a pattern is found in *should* be present, because it's + * trivial to match text in multiple files. Generally when one is looking for + * text, they want to *edit* that text, and editing it is fruitless if you + * don't have the file name handy. That said, putting it behind an option is a + * great way to add extensibility to it. + */ + +#define MAXLINE 1000 + +int get_line(char *line, int max, FILE *f); + +int main(int argc, char *argv[]) { + char line[MAXLINE]; + long lineno = 0; + int c, except = 0, number = 0, showfiles = 0, found = 0, i = 0; + char *pattern = NULL; + char *filename = NULL; + FILE *infile = NULL; + if (argc < 2) { + printf("pwf: please specify a pattern to search for\n"); + return 1; + } + while (--argc > 0 && (*++argv)[0] == '-') { + i = 0; + while ((c = (*argv)[++i]) != '\0') { + switch (c) { + case 'x': + except = 1; + break; + case 'n': + number = 1; + break; + case 'f': + showfiles = 1; + break; + case 'h': + printf("Usage: pwf [-{xnfh}] PATTERN [file] [file] ...\n"); + break; + default: + printf("pwf: illegal option %c\n", c); + argc = 0; + found = -1; + break; + } + } + } + /* The first argument _after_ options should be the pattern; without + * a pattern we shouldn't be accepting anything at all. + */ + pattern = *argv; + /* This handles the edge-case of no file names given. It's somewhat of an + * ugly hack since it fools argc, but it gets the job done. */ + if (argc == 1) { + infile = stdin; + filename = "stdin"; + argc++; + } + while (--argc > 0) { + /* Now we handle the case of a given filename */ + if (infile != stdin) { + filename = *(++argv); + infile = fopen(filename, "r"); + if (filename == NULL || infile == NULL) { + printf("File %s could not be opened; falling back to stdin...\n", filename); + infile = stdin; + filename = "stdin"; + } + } + /* We have _some_ usable file to work with, now. Let's do the thing */ + lineno = 0; + while (get_line(line, MAXLINE, infile) > 0) { + lineno++; + if ((strstr(line, pattern) != NULL) != except) { + if (showfiles) { + printf("%s:", filename); + } + if (number) { + printf("%ld:", lineno); + } + printf("%s", line); + found++; + } + } + } + return found; +} + +int get_line(char *s, int lim, FILE *f) { + int c, i; + for (i = 0; i < lim - 1 && (c = getc(f)) != EOF && c != '\n'; ++i) { + *s++ = c; + } + if (c == '\n') { + *s++ = c; + i++; + } + *s = '\0'; + return i; +} -- cgit v1.2.3-54-g00ecf