aboutsummaryrefslogtreecommitdiff
path: root/ch6/6-03_line_location.c
diff options
context:
space:
mode:
authorzlg <zlg@zlg.space>2015-08-04 01:20:56 -0700
committerzlg <zlg@zlg.space>2015-08-04 01:20:56 -0700
commitbeac4bc8129869d82a6bc49957464c88e50ccfb9 (patch)
tree457c3302d2ad7ffb6fd07648d29578885897912a /ch6/6-03_line_location.c
parentSolve Exercise 6-02: Common prefix printing (diff)
downloadknr-beac4bc8129869d82a6bc49957464c88e50ccfb9.tar.gz
knr-beac4bc8129869d82a6bc49957464c88e50ccfb9.tar.bz2
knr-beac4bc8129869d82a6bc49957464c88e50ccfb9.tar.xz
knr-beac4bc8129869d82a6bc49957464c88e50ccfb9.zip
Solve Exercise 6-03: Word cross-referencing
Diffstat (limited to 'ch6/6-03_line_location.c')
-rw-r--r--ch6/6-03_line_location.c190
1 files changed, 190 insertions, 0 deletions
diff --git a/ch6/6-03_line_location.c b/ch6/6-03_line_location.c
new file mode 100644
index 0000000..9a22c81
--- /dev/null
+++ b/ch6/6-03_line_location.c
@@ -0,0 +1,190 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* The C Programming Language: 2nd Edition
+ *
+ * Exercise 6-3: Write a cross-referencer that prints a list of all words in a
+ * document, and, for each word, a list of the line numbers on which it occurs.
+ * Remove noise words like "the", "and", and so on.
+ *
+ * Notes: The trick to this is to pare down a few functions from 6-2, then add
+ * a separate struct to be part of the tnode struct. I opted for a linked list
+ * of line numbers and some recursion. getline could have been used, and might
+ * have been slightly simpler, but this gets the job done. There's certainly
+ * some room for better word detection, and the tlist structure could probably
+ * be something that's not O(n), but this is Category-0 so who cares? :)
+ */
+
+#define BUFSIZE 1000
+#define MAXWORD 100
+#define DEF_PRE_LEN 4
+
+struct tlist {
+ struct tlist *next;
+ int num;
+};
+
+struct tnode {
+ char *word;
+ struct tlist *lines;
+ struct tnode *left;
+ struct tnode *right;
+};
+
+char buf[BUFSIZE];
+int bufp = 0;
+int minlen;
+int line_nr = 1;
+
+int getch(void);
+void ungetch(int);
+int getword(char *, int);
+struct tnode *addtree(struct tnode *, char *, int);
+void addlist(struct tlist *, int);
+void treeprint(struct tnode *);
+void listprint(struct tlist *);
+struct tnode *talloc(void);
+struct tlist *lalloc(void);
+char *mystrdup(char *);
+
+int main(int argc, char *argv[]) {
+ switch (argc) {
+ case 1:
+ minlen = DEF_PRE_LEN;
+ break;
+ case 2:
+ minlen = strtol(argv[1], NULL, 10);
+ break;
+ default:
+ printf("Too many arguments.\n");
+ return 1;
+ }
+ struct tnode *root;
+ char word[MAXWORD];
+ root = NULL;
+ while (getword(word, MAXWORD) != EOF) {
+ /* Only add the words we need to the tree */
+ if (isalpha(word[0]) && strlen(word) > minlen) {
+ root = addtree(root, word, line_nr);
+ }
+ }
+ if (root == NULL) {
+ printf("No words at that length or greater were found.\n");
+ return 1;
+ } else {
+ treeprint(root);
+ return 0;
+ }
+}
+
+int getword(char *word, int lim) {
+ int c;
+ char *w = word;
+ while (isspace(c = getch())) {
+ if (c == '\n') {
+ line_nr++;
+ }
+ }
+ if (c == EOF) {
+ return EOF;
+ }
+ if (c != EOF) {
+ *w++ = c;
+ }
+ for ( ; --lim > 0; w++) {
+ if (!isalpha(*w = getch())) {
+ ungetch(*w);
+ break;
+ }
+ }
+ *w = '\0';
+ return word[0];
+}
+
+int getch(void) {
+ return (bufp > 0) ? buf[--bufp] : getchar();
+}
+
+void ungetch(int c) {
+ if (bufp >= BUFSIZE) {
+ printf("ungetch: Too many characters.\n");
+ } else {
+ buf[bufp++] = c;
+ }
+}
+
+struct tnode *addtree(struct tnode *p, char *w, int pos) {
+ int cond;
+ if (p != NULL) {
+ if ((cond = strcmp(w, p->word)) == 0) {
+ addlist(p->lines, pos);
+ } else if (cond < 0) {
+ p->left = addtree(p->left, w, pos);
+ } else if (cond > 0) {
+ p->right = addtree(p->right, w, pos);
+ }
+ } else {
+ p = talloc();
+ p->lines = lalloc();
+ p->word = mystrdup(w);
+ /* Add to the list of lines */
+ (p->lines)->num = pos;
+ (p->lines)->next = NULL;
+ p->left = p->right = NULL;
+ }
+ return p;
+}
+
+void addlist(struct tlist *l, int pos) {
+ /* Follow the list until we reach the end */
+ while (l->next != NULL) {
+ l = l->next;
+ }
+ /* Don't add a line number if the numbers match! */
+ if (l->num == pos) {
+ return;
+ }
+ /* This won't happen unless l->next is NULL, meaning we've reached the end of the list. */
+ struct tlist *newline = lalloc();
+ newline->num = pos;
+ newline->next = NULL;
+ /* Let's add to it now */
+ l->next = newline;
+}
+
+void treeprint(struct tnode *p) {
+ if (p != NULL) {
+ treeprint(p->left);
+ printf("%-20s ", p->word);
+ listprint(p->lines);
+ treeprint(p->right);
+ }
+}
+
+void listprint(struct tlist *list) {
+ printf("%3d ", list->num);
+ if (list->next != NULL) {
+ listprint(list->next);
+ } else {
+ printf("\n");
+ }
+}
+
+struct tnode *talloc(void) {
+ return (struct tnode *) malloc(sizeof(struct tnode));
+}
+
+struct tlist *lalloc(void) {
+ return (struct tlist *) malloc(sizeof(struct tlist));
+}
+
+char *mystrdup(char *s) {
+ char *p;
+ p = (char *) malloc(strlen(s)+1);
+ if (p != NULL) {
+ strcpy(p , s);
+ }
+ return p;
+}