From e458edffa1e1cb3a7aa2c695e833e717bbd4efe4 Mon Sep 17 00:00:00 2001 From: zlg Date: Tue, 3 Mar 2015 03:35:37 -0800 Subject: Solve Exercise 6-01: Enhanced `getword` This exercise improved my understanding of identifying comments and quotes by focusing on edge cases. --- ch6/6-01_getword-enhanced.c | 170 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 ch6/6-01_getword-enhanced.c (limited to 'ch6/6-01_getword-enhanced.c') diff --git a/ch6/6-01_getword-enhanced.c b/ch6/6-01_getword-enhanced.c new file mode 100644 index 0000000..c040d1e --- /dev/null +++ b/ch6/6-01_getword-enhanced.c @@ -0,0 +1,170 @@ +#include +#include +#include + +/* The C Programming Language: 2nd Edition + * + * Exercise 6-1: Our version of `getword` does not properly handle underscores, + * string constants, comments, or preprocessor control lines. Write a better + * version. + * + * Notes: Preprocessor lines and one-line comments should be treated the same by + * ignoring everything after the beginning. Underscores need to be treated like + * part of the word, otherwise identifiers such as 'if_else' will get added to + * the list. Anything inside quotes and multi-line comments needs to be ignored, + * with a state variable to help us figure out what's going on. + */ + + +#define MAXWORD 100 +#define BUFSIZE 1000 + +struct key { + char *word; + int count; +} keytab[] = { + "auto", 0, + "break", 0, + "case", 0, + "char", 0, + "const", 0, + "continue", 0, + "default", 0, + "else", 0, + "enum", 0, + "for", 0, + "if", 0, + "int", 0, + "long", 0, + "return", 0, + "short", 0, + "signed", 0, + "static", 0, + "struct", 0, + "switch", 0, + "typedef", 0, + "unsigned", 0, + "void", 0, + "volatile", 0, + "while", 0 +}; + +#define NKEYS (sizeof keytab / sizeof(struct key)) + +char buf[BUFSIZE]; +int bufp = 0; +enum states { + NORMAL, + INQUOTE, + INCOMMENT +}; +int state = NORMAL; + +int getch(void); +void ungetch(int); +int getword(char *, int); +int binsearch(char *, struct key *, int); + +int main() { + int n; + char word[MAXWORD]; + while (getword(word, MAXWORD) != EOF) { + if (isalpha(word[0])) { + if ((n = binsearch(word, keytab, NKEYS)) >= 0) { + keytab[n].count++; + } + } + } + for (n = 0; n < NKEYS; n++) { + if (keytab[n].count > 0) { + printf("%4d %s\n", keytab[n].count, keytab[n].word); + } + } + return 0; +} + +int getch(void) { + return (bufp > 0) ? buf[--bufp] : getchar(); +} + +void ungetch(int c) { + if (bufp >= BUFSIZE) { + printf("ungetch: Too many characters.\n"); + } else { + buf[bufp++] = c; + } +} + +int binsearch(char *word, struct key tab[], int n) { + int cond; + int low, high, mid; + low = 0; + high = n - 1; + while (low <= high) { + mid = (low + high) / 2; + if ((cond = strcmp(word, tab[mid].word)) < 0) { + high = mid - 1; + } else if (cond > 0) { + low = mid + 1; + } else { + return mid; + } + } + return -1; +} + +int getword(char *word, int lim) { + int c; + char *w = word; + while (isspace(c = getch())) { + } + if (c != EOF && c != '#' && c != '"' && c != '/' && c != '*') { + *w++ = c; + } + if (c == '*' && state == INCOMMENT) { + if ((c = getch()) == '/') { + state == NORMAL; + return '/'; + } + } + /* Ignore comments */ + if (c == '/') { + c = getch(); + if (c == '/') { + while ((c = getch()) != EOF && c != '\n') { + } + return '*'; + } + if (c == '*') { + state = INCOMMENT; + return '*'; + } + } + /* Handle quotes */ + if (c == '"') { + state = INQUOTE; + } + if (state == INQUOTE) { + while ((c = getch()) != '"' && c != EOF) { + } + state = NORMAL; + } + /* Ignore preprocessor lines */ + if (c == '#') { + while ((c = getch()) != '\n' && c != EOF) { + } + } + /* Add exceptions for underscores */ + if (!isalpha(c) && c != '_') { + *w = '\0'; + return c; + } + for ( ; --lim > 0; w++) { + if (!isalnum(*w = getch()) && *w != '_') { + ungetch(*w); + break; + } + } + *w = '\0'; + return word[0]; +} -- cgit v1.2.3-54-g00ecf