diff options
author | zlg <zlg@zlg.space> | 2015-11-07 02:41:42 -0800 |
---|---|---|
committer | zlg <zlg@zlg.space> | 2015-11-07 02:41:42 -0800 |
commit | 87761a45eb70cd22cf0b64fdcc0545006e92a8f9 (patch) | |
tree | a52e991021cdbee938d40e2446ee1177b6873b28 /ch6 | |
parent | Solve Exercise 6-03: Word cross-referencing (diff) | |
download | knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.gz knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.bz2 knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.xz knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.zip |
Solve Exercise 6-4: Highest Word Frequency
This exercise was good practice to get back into the mindset of
programming. It's not a pretty solution, but it works. I may end
up coming back to this solution, because I feel like I've missed
something that would make this exercise simpler.
Diffstat (limited to 'ch6')
-rw-r--r-- | ch6/6-04_top-words.c | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/ch6/6-04_top-words.c b/ch6/6-04_top-words.c new file mode 100644 index 0000000..796dac1 --- /dev/null +++ b/ch6/6-04_top-words.c @@ -0,0 +1,177 @@ +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdlib.h> + +/* The C Programming Language: 2nd Edition + * + * Exercise 6-4: Write a program that prints the distinct words in its input + * sorted into decreasing order of frequency of occurrence. Precede each word + * by its count. + * + * Notes: The approach I took builds a separate sorted tree instead of + * modifying the tree in-place, like a lot of sorting functions tend to do. + * This meant doubling the amount of RAM needed to solve the exercise. There + * is probably a more elegant way to do this -- even within Category-0 + * standards -- but I couldn't seem to figure it out. I'm disappointed that + * freq_sort and addsorted couldn't seem to be combined. + */ + +#define BUFSIZE 1000 +#define MAXWORD 100 +#define DEF_PRE_LEN 4 + +struct tnode { + char *word; + int count; + struct tnode *left; + struct tnode *right; +}; + +char buf[BUFSIZE]; +int bufp = 0; +int minlen; + +int getch(void); +void ungetch(int); +int getword(char *, int); +struct tnode *addtree(struct tnode *, char *); +struct tnode *freq_sort(struct tnode *, struct tnode *); +struct tnode *addsorted(struct tnode *, struct tnode *); +void treeprint(struct tnode *); +struct tnode *talloc(void); +char *mystrdup(char *); + +int main(int argc, char *argv[]) { + switch (argc) { + case 1: + minlen = DEF_PRE_LEN; + break; + case 2: + minlen = strtol(argv[1], NULL, 10); + break; + default: + printf("Too many arguments.\n"); + return 1; + } + struct tnode *root; + struct tnode *sorted; + char word[MAXWORD]; + root = NULL; + sorted = NULL; + while (getword(word, MAXWORD) != EOF) { + /* Only add the words we need to the tree */ + if (isalpha(word[0]) && strlen(word) > minlen) { + root = addtree(root, word); + } + } + /* Sort them in order of occurrence */ + if (root == NULL) { + printf("No words at that length or greater were found.\n"); + return 1; + } else { + sorted = freq_sort(root, sorted); + treeprint(sorted); + return 0; + } +} + +/* Iterate over the "tree" and create a new, sorted tree */ +struct tnode *freq_sort(struct tnode *p, struct tnode *s) { + for (; p != NULL; p = p->right) { + s = addsorted(p, s); + } + return s; +} + +/* Add entries to the new tree, ordered by word frequency. I feel + * that this function and freq_sort() could somehow be consolidated + * and that this could be done better. Patches or PRs welcome. + */ +struct tnode *addsorted(struct tnode *p, struct tnode *s) { + if (s != NULL) { + if (p->count > s->count) { + s->left = addsorted(p, s->left); + } else { + s->right = addsorted(p, s->right); + } + } else { + s = talloc(); + s->word = mystrdup(p->word); + s->count = p->count; + } + return s; +} + +int getword(char *word, int lim) { + int c; + char *w = word; + while (isspace(c = getch())) { + } + if (c == EOF) { + return EOF; + } + if (c != EOF) { + *w++ = c; + } + for ( ; --lim > 0; w++) { + if (!isalpha(*w = getch())) { + ungetch(*w); + break; + } + } + *w = '\0'; + return word[0]; +} + +int getch(void) { + return (bufp > 0) ? buf[--bufp] : getchar(); +} + +void ungetch(int c) { + if (bufp >= BUFSIZE) { + printf("ungetch: Too many characters.\n"); + } else { + buf[bufp++] = c; + } +} + +struct tnode *addtree(struct tnode *p, char *w) { + if (p != NULL) { + /* We're putting all new things to the right side to dumb our tree struct + * down to a list. It's technically one address larger than a linked list, + * but there's no harm in re-purposing a structure. + */ + if (strcasecmp(p->word, w) == 0) { + p->count++; + } else { + p->right = addtree(p->right, w); + } + } else { + p = talloc(); + p->word = mystrdup(w); + p->count = 1; + } + return p; +} + +void treeprint(struct tnode *p) { + if (p != NULL) { + treeprint(p->left); + printf("%4d %-20s\n", p->count, p->word); + treeprint(p->right); + } +} + +struct tnode *talloc(void) { + return (struct tnode *) malloc(sizeof(struct tnode)); +} + +char *mystrdup(char *s) { + char *p; + p = (char *) malloc(strlen(s)+1); + if (p != NULL) { + strcpy(p , s); + } + return p; +} |