From 87761a45eb70cd22cf0b64fdcc0545006e92a8f9 Mon Sep 17 00:00:00 2001 From: zlg Date: Sat, 7 Nov 2015 02:41:42 -0800 Subject: Solve Exercise 6-4: Highest Word Frequency This exercise was good practice to get back into the mindset of programming. It's not a pretty solution, but it works. I may end up coming back to this solution, because I feel like I've missed something that would make this exercise simpler. --- ch6/6-04_top-words.c | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 ch6/6-04_top-words.c (limited to 'ch6/6-04_top-words.c') diff --git a/ch6/6-04_top-words.c b/ch6/6-04_top-words.c new file mode 100644 index 0000000..796dac1 --- /dev/null +++ b/ch6/6-04_top-words.c @@ -0,0 +1,177 @@ +#include +#include +#include +#include + +/* The C Programming Language: 2nd Edition + * + * Exercise 6-4: Write a program that prints the distinct words in its input + * sorted into decreasing order of frequency of occurrence. Precede each word + * by its count. + * + * Notes: The approach I took builds a separate sorted tree instead of + * modifying the tree in-place, like a lot of sorting functions tend to do. + * This meant doubling the amount of RAM needed to solve the exercise. There + * is probably a more elegant way to do this -- even within Category-0 + * standards -- but I couldn't seem to figure it out. I'm disappointed that + * freq_sort and addsorted couldn't seem to be combined. + */ + +#define BUFSIZE 1000 +#define MAXWORD 100 +#define DEF_PRE_LEN 4 + +struct tnode { + char *word; + int count; + struct tnode *left; + struct tnode *right; +}; + +char buf[BUFSIZE]; +int bufp = 0; +int minlen; + +int getch(void); +void ungetch(int); +int getword(char *, int); +struct tnode *addtree(struct tnode *, char *); +struct tnode *freq_sort(struct tnode *, struct tnode *); +struct tnode *addsorted(struct tnode *, struct tnode *); +void treeprint(struct tnode *); +struct tnode *talloc(void); +char *mystrdup(char *); + +int main(int argc, char *argv[]) { + switch (argc) { + case 1: + minlen = DEF_PRE_LEN; + break; + case 2: + minlen = strtol(argv[1], NULL, 10); + break; + default: + printf("Too many arguments.\n"); + return 1; + } + struct tnode *root; + struct tnode *sorted; + char word[MAXWORD]; + root = NULL; + sorted = NULL; + while (getword(word, MAXWORD) != EOF) { + /* Only add the words we need to the tree */ + if (isalpha(word[0]) && strlen(word) > minlen) { + root = addtree(root, word); + } + } + /* Sort them in order of occurrence */ + if (root == NULL) { + printf("No words at that length or greater were found.\n"); + return 1; + } else { + sorted = freq_sort(root, sorted); + treeprint(sorted); + return 0; + } +} + +/* Iterate over the "tree" and create a new, sorted tree */ +struct tnode *freq_sort(struct tnode *p, struct tnode *s) { + for (; p != NULL; p = p->right) { + s = addsorted(p, s); + } + return s; +} + +/* Add entries to the new tree, ordered by word frequency. I feel + * that this function and freq_sort() could somehow be consolidated + * and that this could be done better. Patches or PRs welcome. + */ +struct tnode *addsorted(struct tnode *p, struct tnode *s) { + if (s != NULL) { + if (p->count > s->count) { + s->left = addsorted(p, s->left); + } else { + s->right = addsorted(p, s->right); + } + } else { + s = talloc(); + s->word = mystrdup(p->word); + s->count = p->count; + } + return s; +} + +int getword(char *word, int lim) { + int c; + char *w = word; + while (isspace(c = getch())) { + } + if (c == EOF) { + return EOF; + } + if (c != EOF) { + *w++ = c; + } + for ( ; --lim > 0; w++) { + if (!isalpha(*w = getch())) { + ungetch(*w); + break; + } + } + *w = '\0'; + return word[0]; +} + +int getch(void) { + return (bufp > 0) ? buf[--bufp] : getchar(); +} + +void ungetch(int c) { + if (bufp >= BUFSIZE) { + printf("ungetch: Too many characters.\n"); + } else { + buf[bufp++] = c; + } +} + +struct tnode *addtree(struct tnode *p, char *w) { + if (p != NULL) { + /* We're putting all new things to the right side to dumb our tree struct + * down to a list. It's technically one address larger than a linked list, + * but there's no harm in re-purposing a structure. + */ + if (strcasecmp(p->word, w) == 0) { + p->count++; + } else { + p->right = addtree(p->right, w); + } + } else { + p = talloc(); + p->word = mystrdup(w); + p->count = 1; + } + return p; +} + +void treeprint(struct tnode *p) { + if (p != NULL) { + treeprint(p->left); + printf("%4d %-20s\n", p->count, p->word); + treeprint(p->right); + } +} + +struct tnode *talloc(void) { + return (struct tnode *) malloc(sizeof(struct tnode)); +} + +char *mystrdup(char *s) { + char *p; + p = (char *) malloc(strlen(s)+1); + if (p != NULL) { + strcpy(p , s); + } + return p; +} -- cgit v1.2.3-54-g00ecf