aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzlg <zlg@zlg.space>2015-11-07 02:41:42 -0800
committerzlg <zlg@zlg.space>2015-11-07 02:41:42 -0800
commit87761a45eb70cd22cf0b64fdcc0545006e92a8f9 (patch)
treea52e991021cdbee938d40e2446ee1177b6873b28
parentSolve Exercise 6-03: Word cross-referencing (diff)
downloadknr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.gz
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.bz2
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.xz
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.zip
Solve Exercise 6-4: Highest Word Frequency
This exercise was good practice to get back into the mindset of programming. It's not a pretty solution, but it works. I may end up coming back to this solution, because I feel like I've missed something that would make this exercise simpler.
-rw-r--r--ch6/6-04_top-words.c177
1 files changed, 177 insertions, 0 deletions
diff --git a/ch6/6-04_top-words.c b/ch6/6-04_top-words.c
new file mode 100644
index 0000000..796dac1
--- /dev/null
+++ b/ch6/6-04_top-words.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* The C Programming Language: 2nd Edition
+ *
+ * Exercise 6-4: Write a program that prints the distinct words in its input
+ * sorted into decreasing order of frequency of occurrence. Precede each word
+ * by its count.
+ *
+ * Notes: The approach I took builds a separate sorted tree instead of
+ * modifying the tree in-place, like a lot of sorting functions tend to do.
+ * This meant doubling the amount of RAM needed to solve the exercise. There
+ * is probably a more elegant way to do this -- even within Category-0
+ * standards -- but I couldn't seem to figure it out. I'm disappointed that
+ * freq_sort and addsorted couldn't seem to be combined.
+ */
+
+#define BUFSIZE 1000
+#define MAXWORD 100
+#define DEF_PRE_LEN 4
+
+struct tnode {
+ char *word;
+ int count;
+ struct tnode *left;
+ struct tnode *right;
+};
+
+char buf[BUFSIZE];
+int bufp = 0;
+int minlen;
+
+int getch(void);
+void ungetch(int);
+int getword(char *, int);
+struct tnode *addtree(struct tnode *, char *);
+struct tnode *freq_sort(struct tnode *, struct tnode *);
+struct tnode *addsorted(struct tnode *, struct tnode *);
+void treeprint(struct tnode *);
+struct tnode *talloc(void);
+char *mystrdup(char *);
+
+int main(int argc, char *argv[]) {
+ switch (argc) {
+ case 1:
+ minlen = DEF_PRE_LEN;
+ break;
+ case 2:
+ minlen = strtol(argv[1], NULL, 10);
+ break;
+ default:
+ printf("Too many arguments.\n");
+ return 1;
+ }
+ struct tnode *root;
+ struct tnode *sorted;
+ char word[MAXWORD];
+ root = NULL;
+ sorted = NULL;
+ while (getword(word, MAXWORD) != EOF) {
+ /* Only add the words we need to the tree */
+ if (isalpha(word[0]) && strlen(word) > minlen) {
+ root = addtree(root, word);
+ }
+ }
+ /* Sort them in order of occurrence */
+ if (root == NULL) {
+ printf("No words at that length or greater were found.\n");
+ return 1;
+ } else {
+ sorted = freq_sort(root, sorted);
+ treeprint(sorted);
+ return 0;
+ }
+}
+
+/* Iterate over the "tree" and create a new, sorted tree */
+struct tnode *freq_sort(struct tnode *p, struct tnode *s) {
+ for (; p != NULL; p = p->right) {
+ s = addsorted(p, s);
+ }
+ return s;
+}
+
+/* Add entries to the new tree, ordered by word frequency. I feel
+ * that this function and freq_sort() could somehow be consolidated
+ * and that this could be done better. Patches or PRs welcome.
+ */
+struct tnode *addsorted(struct tnode *p, struct tnode *s) {
+ if (s != NULL) {
+ if (p->count > s->count) {
+ s->left = addsorted(p, s->left);
+ } else {
+ s->right = addsorted(p, s->right);
+ }
+ } else {
+ s = talloc();
+ s->word = mystrdup(p->word);
+ s->count = p->count;
+ }
+ return s;
+}
+
+int getword(char *word, int lim) {
+ int c;
+ char *w = word;
+ while (isspace(c = getch())) {
+ }
+ if (c == EOF) {
+ return EOF;
+ }
+ if (c != EOF) {
+ *w++ = c;
+ }
+ for ( ; --lim > 0; w++) {
+ if (!isalpha(*w = getch())) {
+ ungetch(*w);
+ break;
+ }
+ }
+ *w = '\0';
+ return word[0];
+}
+
+int getch(void) {
+ return (bufp > 0) ? buf[--bufp] : getchar();
+}
+
+void ungetch(int c) {
+ if (bufp >= BUFSIZE) {
+ printf("ungetch: Too many characters.\n");
+ } else {
+ buf[bufp++] = c;
+ }
+}
+
+struct tnode *addtree(struct tnode *p, char *w) {
+ if (p != NULL) {
+ /* We're putting all new things to the right side to dumb our tree struct
+ * down to a list. It's technically one address larger than a linked list,
+ * but there's no harm in re-purposing a structure.
+ */
+ if (strcasecmp(p->word, w) == 0) {
+ p->count++;
+ } else {
+ p->right = addtree(p->right, w);
+ }
+ } else {
+ p = talloc();
+ p->word = mystrdup(w);
+ p->count = 1;
+ }
+ return p;
+}
+
+void treeprint(struct tnode *p) {
+ if (p != NULL) {
+ treeprint(p->left);
+ printf("%4d %-20s\n", p->count, p->word);
+ treeprint(p->right);
+ }
+}
+
+struct tnode *talloc(void) {
+ return (struct tnode *) malloc(sizeof(struct tnode));
+}
+
+char *mystrdup(char *s) {
+ char *p;
+ p = (char *) malloc(strlen(s)+1);
+ if (p != NULL) {
+ strcpy(p , s);
+ }
+ return p;
+}