diff options
author | zlg <zlg@zlg.space> | 2015-01-30 00:55:59 -0800 |
---|---|---|
committer | zlg <zlg@zlg.space> | 2015-01-30 00:55:59 -0800 |
commit | 1f92ea2a90d98ef150a8546cf5695a73ba68312d (patch) | |
tree | def01f396ff869a6c8ab99a9c838ba57b6de4a64 /ch5 | |
parent | Ensure 1-02's solution compiles (diff) | |
download | knr-1f92ea2a90d98ef150a8546cf5695a73ba68312d.tar.gz knr-1f92ea2a90d98ef150a8546cf5695a73ba68312d.tar.bz2 knr-1f92ea2a90d98ef150a8546cf5695a73ba68312d.tar.xz knr-1f92ea2a90d98ef150a8546cf5695a73ba68312d.zip |
Solve Exercise 5-18: error-recovering `dcl`
This exercise was a real pain in the ass. The original use of
getch() and ungetch() was inferior to a line-based approach. It
really doesn't seem like the way a parser should be built, but
it taught me a little about the order of recursion. I'm convinced
that a debugger is necessary if you want to build a good parser.
This implementation doesn't recurse in a natural way, or even
enough to stack data type prefixes. Hopefully by the time I get to
5-20 I'll have enough ideas to implement something that *does*
recurse well.
Diffstat (limited to '')
-rw-r--r-- | ch5/5-18_dcl-error-recovery.c | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/ch5/5-18_dcl-error-recovery.c b/ch5/5-18_dcl-error-recovery.c new file mode 100644 index 0000000..9a22764 --- /dev/null +++ b/ch5/5-18_dcl-error-recovery.c @@ -0,0 +1,172 @@ +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +/* The C Programming Language: 2nd Edition + * + * Exercise 5-18: Make `dcl` recover from input errors. + * + * Notes: The book does a poor job of describing what the reader should + * do here. Given that 5-20 calls for a refactor of `dcl`, my solution + * will simply complain at missing parens and brackets, and silently + * accept semicolons and comments at the end of the input line. Those are + * considered "errors" to the book's version of the program, and so this + * solution will fix them. + * + * This solution does not handle more complex data types such as const, + * static, long ("long long ..."), {un}signed, etc. It also won't handle + * arbitrary parentheses. Really simple stuff. + */ + +#define MAXTOKEN 100 +#define BUFSIZE 100 + +enum { NAME, PARENS, BRACKETS, TYPE }; + +void dcl(void); +void dirdcl(void); +int gettoken(void); +char buf[BUFSIZE]; +int bufp = 0; +int tokentype; + +char token[MAXTOKEN]; +char name[MAXTOKEN]; +char datatype[MAXTOKEN]; +char out[1000]; +int get_line(char *s, int lim); + +void dcl(void) { + int ns; + for (ns = 0; gettoken() == '*'; ns++) { + } + dirdcl(); + while (ns-- > 0) { + strcat(out, " pointer to"); + } +} + +void dirdcl(void) { + int type; + if (tokentype == '(') { + dcl(); + if (tokentype != ')') { + printf("error: missing )\n"); + } + } else if (tokentype == TYPE) { + strcat(datatype, " "); + strcat(datatype, token); + // check again for another type + gettoken(); + } else if (tokentype == NAME) { + strcpy(name, token); + } else { + printf("error: expected name or (dcl)\n"); + } + while ((type = gettoken()) == PARENS || type == BRACKETS) { + if (type == PARENS) { + strcat(out, " function returning"); + } else { + strcat(out, " array"); + strcat(out, token); + strcat(out, " of"); + } + } +} + +int gettoken(void) { + char *p = token; + while (buf[bufp] == ' ' || buf[bufp] == '\t') { + bufp++; + } + if (buf[bufp] == '/') { + if (buf[bufp+1] == '/' || buf[bufp+1] == '*') { + while (buf[bufp] != '\n') { + bufp++; + } + } + return tokentype = buf[bufp]; + } else if (buf[bufp] == '(') { + if (buf[++bufp] == ')') { + strcpy(token, "()"); + bufp++; + return tokentype = PARENS; + } else { + return tokentype = '('; + } + } else if (buf[bufp] == '[') { + while (buf[bufp] != ']') { + if (buf[bufp + 1] == '[') { + return tokentype = buf[bufp + 1]; + } + if (isspace(buf[bufp])) { + bufp++; + continue; + } + *p++ = buf[bufp++]; + } + *p++ = buf[bufp++]; + *p = '\0'; + return tokentype = BRACKETS; + } else if (isalpha(buf[bufp])) { + *p++ = buf[bufp++]; + while (isalnum(buf[bufp])) { + *p++ = buf[bufp++]; + } + *p = '\0'; + /* check types */ + if (strcmp(token, "int") == 0 || + strcmp(token, "double") == 0 || + strcmp(token, "char") == 0) { + return tokentype = TYPE; + } + return tokentype = NAME; + } else { + return tokentype = buf[bufp++]; + } +} + +/* Returns length of line including \n + */ +int get_line(char *s, int lim) { + int c, i; + for (i = 0; i < lim - 1 && (c=getchar()) != EOF && c != '\n'; ++i) { + *s++ = c; + } + if (c == '\n') { + *s++ = c; + i++; + } + *s = '\0'; + return i; +} + +int main(void) { + int len = 0; + while ((len = get_line(buf, BUFSIZE)) > 0 && buf[len - 1] != EOF) { + if (len == 1 && buf[bufp] == '\n') { + continue; + } + bufp = 0; + datatype[0] = 0; + gettoken(); + if (tokentype == TYPE) { + strcpy(datatype, token); + } + out[0] = '\0'; + name[0] = '\0'; + dcl(); + if (tokentype == ')') { + printf("error: too many closing parens\n\n"); + } else if (tokentype == ']') { + printf("error: too many closing brackets\n\n"); + } else if (tokentype == '(') { + printf("error: too many opening parens\n\n"); + } else if (tokentype == '[') { + printf("error: too many opening brackets\n\n"); + } else { + printf("%s: %s %s\n\n", name, out, datatype); + } + } + return 0; +} |