aboutsummaryrefslogtreecommitdiff
path: root/1-24_syntax-checker.c
blob: f56e4f7432358177d2d080e97c6bdb955543262b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <stdio.h>

/* The C Programming Language: 2nd Edition
 * Exercise 1-24:
 * "Write a program to check a C program for rudimentary syntax errors like
 * unbalanced parentheses, brackets, and braces. Don't forget about quotes, both
 * single and double, escape sequences, and comments. (This program is hard if
 * you do it in full generality.)"
 *
 * Proksima from Freenode's ##c helped me understand full generality where
 * Ixquick, Wikipedia, and StackOverflow all failed: a program that has full
 * generality handles all use cases. In this case, my program should report no
 * errors from a well formed C source file, and return correct errors for every
 * non-valid C source file.
 *
 * I can tackle this one the same way I tackled the previous exercise: with a
 * FSM. The trick is in catching the mismatched levels.
 */

/* Create our nestable counts. These keep track of each type of syntax
 * character. */
#define PARENS 0
#define BRACKETS 1
#define BRACES 2

/* Create our states, which tell the parser what's going on. INSQ, INDQ, and
 * INCM are much larger so it's easier to tell what's what. If I were better
 * versed in binary operations, this could use less RAM. I intend to revisit
 * this later on. */
#define OUT 0
#define INPR 1
#define INBK 2
#define INBC 3
#define INSQ 100
#define INDQ 1000
#define INCM 10000

char c;
int counts[3];
int state, i, linenr;

int main() {
	for (i = 0; i < 3; ++i) {
		counts[i] = 0;
	}

	state = OUT;
	linenr = 1;
	// Begin streaming!
	while ((c = getchar()) != EOF) {
		if (c == '\n') {
			linenr += 1;
			if (state >= INSQ) {
				break;
			}
		}
		if (c == '(') {
			counts[PARENS] += 1;
			state += INPR;
		}
		if (c == ')') {
			counts[PARENS] -= 1;
			state -= INPR;
			if (counts[PARENS] < 0) {
				break;
			}
		}
		if (c == '[') {
			counts[BRACKETS] += 1;
			state += INBK;
		}
		if (c == ']') {
			counts[BRACKETS] -= 1;
			state -= INBK;
			if (counts[BRACKETS] < 0) {
				break;
			}
		}
		if (c == '{') {
			counts[BRACES] += 1;
			state += INBC;
		}
		if (c == '}') {
			counts[BRACES] -= 1;
			state -= INBC;
			if (counts[BRACES] < 0) {
				break;
			}
		}
		if (c == '"' && state > INDQ) {
			state -= INDQ;
			if (state < 0) {
				break;
			}
		}
		if (c == '"' && state < INSQ) {
			state += INDQ;
			if (state >= (INDQ * 2)) {
				break;
			}
		}
		if (c == '\'' && state > INSQ && state < INDQ) {
			state -= INSQ;
			if (state < 0) {
				break;
			}
		}
		if (c == '\'' && state < INSQ) {
			state += INSQ;
			if (state >= (INSQ * 2)) {
				break;
			}
		}
	}

	if (state != 0) {
		printf("SYNTAX ERROR: ");
		
		if (state >= INSQ && state < INDQ) {
			printf("Unclosed single quote on line %d!\n", linenr);
			return 1;
		}
		if (counts[PARENS] > 0) {
			printf("Unclosed parenthesis on line %d!\n", linenr);
			return 1;
		}
		if (counts[PARENS] < 0) {
			printf("Too many close parentheses on line %d!\n", linenr);
			return 1;
		}
		if (counts[BRACKETS] > 0) {
			printf("Unclosed brackets on line %d!\n", linenr);
			return 1;
		}
		if (counts[BRACKETS] < 0) {
			printf("Too many close brackets on line %d!\n", linenr);
			return 1;
		}
		if (counts[BRACES] > 0) {
			printf("Unclosed braces on line %d!\n", linenr);
			return 1;
		}
		if (counts[BRACES] < 0) {
			printf("Too many close braces on line %d!\n", linenr);
			return 1;
		}
	}

	printf("All clean.\n");
	return 0;
}