Solve Exercise 6-4: Highest Word Frequency

This exercise was good practice to get back into the mindset of programming. It's not a pretty solution, but it works. I may end up coming back to this solution, because I feel like I've missed something that would make this exercise simpler.
author: zlg <zlg@zlg.space> 2015-11-07 02:41:42 -0800
committer: zlg <zlg@zlg.space> 2015-11-07 02:41:42 -0800
commit: 87761a45eb70cd22cf0b64fdcc0545006e92a8f9 (patch)
tree: a52e991021cdbee938d40e2446ee1177b6873b28 /ch6
parent: Solve Exercise 6-03: Word cross-referencing (diff)
download: knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.gz
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.bz2
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.xz
knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.zip
1 files changed, 177 insertions, 0 deletions
diff --git a/ch6/6-04_top-words.c b/ch6/6-04_top-words.c
new file mode 100644
index 0000000..796dac1
--- /dev/null
+++ b/ch6/6-04_top-words.c
@@ -0,0 +1,177 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* The C Programming Language: 2nd Edition
+ *
+ * Exercise 6-4: Write a program that prints the distinct words in its input
+ * sorted into decreasing order of frequency of occurrence. Precede each word
+ * by its count.
+ *
+ * Notes: The approach I took builds a separate sorted tree instead of
+ * modifying the tree in-place, like a lot of sorting functions tend to do.
+ * This meant doubling the amount of RAM needed to solve the exercise. There
+ * is probably a more elegant way to do this -- even within Category-0
+ * standards -- but I couldn't seem to figure it out. I'm disappointed that
+ * freq_sort and addsorted couldn't seem to be combined.
+ */
+
+#define BUFSIZE 1000
+#define MAXWORD 100
+#define DEF_PRE_LEN 4
+
+struct tnode {
+	char *word;
+	int count;
+	struct tnode *left;
+	struct tnode *right;
+};
+
+char buf[BUFSIZE];
+int bufp = 0;
+int minlen;
+
+int getch(void);
+void ungetch(int);
+int getword(char *, int);
+struct tnode *addtree(struct tnode *, char *);
+struct tnode *freq_sort(struct tnode *, struct tnode *);
+struct tnode *addsorted(struct tnode *, struct tnode *);
+void treeprint(struct tnode *);
+struct tnode *talloc(void);
+char *mystrdup(char *);
+
+int main(int argc, char *argv[]) {
+	switch (argc) {
+		case 1:
+			minlen = DEF_PRE_LEN;
+			break;
+		case 2:
+			minlen = strtol(argv[1], NULL, 10);
+			break;
+		default:
+			printf("Too many arguments.\n");
+			return 1;
+	}
+	struct tnode *root;
+	struct tnode *sorted;
+	char word[MAXWORD];
+	root = NULL;
+	sorted = NULL;
+	while (getword(word, MAXWORD) != EOF) {
+		/* Only add the words we need to the tree */
+		if (isalpha(word[0]) && strlen(word) > minlen) {
+			root = addtree(root, word);
+		}
+	}
+	/* Sort them in order of occurrence */
+	if (root == NULL) {
+		printf("No words at that length or greater were found.\n");
+		return 1;
+	} else {
+		sorted = freq_sort(root, sorted);
+		treeprint(sorted);
+		return 0;
+	}
+}
+
+/* Iterate over the "tree" and create a new, sorted tree */
+struct tnode *freq_sort(struct tnode *p, struct tnode *s) {
+	for (; p != NULL; p = p->right) {
+		s = addsorted(p, s);
+	}
+	return s;
+}
+
+/* Add entries to the new tree, ordered by word frequency. I feel
+ * that this function and freq_sort() could somehow be consolidated
+ * and that this could be done better. Patches or PRs welcome.
+ */
+struct tnode *addsorted(struct tnode *p, struct tnode *s) {
+	if (s != NULL) {
+		if (p->count > s->count) {
+			s->left = addsorted(p, s->left);
+		} else {
+			s->right = addsorted(p, s->right);
+		}
+	} else {
+		s = talloc();
+		s->word = mystrdup(p->word);
+		s->count = p->count;
+	}
+	return s;
+}
+
+int getword(char *word, int lim) {
+	int c;
+	char *w = word;
+	while (isspace(c = getch())) {
+	}
+	if (c == EOF) {
+		return EOF;
+	}
+	if (c != EOF) {
+		*w++ = c;
+	}
+	for ( ; --lim > 0; w++) {
+		if (!isalpha(*w = getch())) {
+			ungetch(*w);
+			break;
+		}
+	}
+	*w = '\0';
+	return word[0];
+}
+
+int getch(void) {
+	return (bufp > 0) ? buf[--bufp] : getchar();
+}
+
+void ungetch(int c) {
+	if (bufp >= BUFSIZE) {
+		printf("ungetch: Too many characters.\n");
+	} else {
+		buf[bufp++] = c;
+	}
+}
+
+struct tnode *addtree(struct tnode *p, char *w) {
+	if (p != NULL) {
+		/* We're putting all new things to the right side to dumb our tree struct
+		 * down to a list. It's technically one address larger than a linked list,
+		 * but there's no harm in re-purposing a structure.
+		 */
+		if (strcasecmp(p->word, w) == 0) {
+			p->count++;
+		} else {
+			p->right = addtree(p->right, w);
+		}
+	} else {
+		p = talloc();
+		p->word = mystrdup(w);
+		p->count = 1;
+	}
+	return p;
+}
+
+void treeprint(struct tnode *p) {
+	if (p != NULL) {
+		treeprint(p->left);
+		printf("%4d  %-20s\n", p->count,  p->word);
+		treeprint(p->right);
+	}
+}
+
+struct tnode *talloc(void) {
+	return (struct tnode *) malloc(sizeof(struct tnode));
+}
+
+char *mystrdup(char *s) {
+	char *p;
+	p = (char *) malloc(strlen(s)+1);
+	if (p != NULL) {
+		strcpy(p , s);
+	}
+	return p;
+}
author	zlg <zlg@zlg.space>	2015-11-07 02:41:42 -0800
committer	zlg <zlg@zlg.space>	2015-11-07 02:41:42 -0800
commit	87761a45eb70cd22cf0b64fdcc0545006e92a8f9 (patch)
tree	a52e991021cdbee938d40e2446ee1177b6873b28 /ch6
parent	Solve Exercise 6-03: Word cross-referencing (diff)
download	knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.gz knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.bz2 knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.tar.xz knr-87761a45eb70cd22cf0b64fdcc0545006e92a8f9.zip