-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscanner.c
More file actions
336 lines (289 loc) · 8.42 KB
/
scanner.c
File metadata and controls
336 lines (289 loc) · 8.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/* Scanner
* @copyright (c) 2008, Hedspi, Hanoi University of Technology
* @author Huu-Duc Nguyen
* @version 1.0
*/
#include <stdio.h>
#include <stdlib.h>
#include "reader.h"
#include "charcode.h"
#include "token.h"
#include "error.h"
extern int lineNo;
extern int colNo;
extern int currentChar;
extern CharCode charCodes[];
/***************************************************************/
void skipBlank() {
while((currentChar!=EOF)&&(charCodes[currentChar]==CHAR_SPACE))
readChar();
}
void skipComment() {
int state = 0;
while ((currentChar != EOF) && (state < 2)) {
switch (charCodes[currentChar]) {
case CHAR_TIMES:
state = 1;
break;
case CHAR_RPAR:
if (state == 1) state = 2;
else state = 0;
break;
default:
state = 0;
}
readChar();
}
if (state != 2)
error(ERR_ENDOFCOMMENT, lineNo, colNo);
}
Token* readIdentKeyword(void) {
Token *token = makeToken(TK_NONE, lineNo, colNo);
int count = 1;
token->string[0] = (char)currentChar;
readChar();
while ((currentChar != EOF) &&
((charCodes[currentChar] == CHAR_LETTER) || (charCodes[currentChar] == CHAR_DIGIT))) {
if (count <= MAX_IDENT_LEN) token->string[count++] = (char)currentChar;
readChar();
}
if (count > MAX_IDENT_LEN) {
error(ERR_IDENTTOOLONG, token->lineNo, token->colNo);
return token;
}
token->string[count] = '\0';
token->tokenType = checkKeyword(token->string);
if (token->tokenType == TK_NONE)
token->tokenType = TK_IDENT;
return token;
}
Token* readNumber(void) {
Token *token = makeToken(TK_NUMBER, lineNo, colNo);
int count = 0;
while ((currentChar != EOF) && (charCodes[currentChar] == CHAR_DIGIT)) {
token->string[count++] = (char)currentChar;
readChar();
}
token->string[count] = '\0';
token->value = atoi(token->string);
return token;
}
Token* readConstChar(void) {
Token *token = makeToken(TK_CHAR, lineNo, colNo);
readChar();
if (currentChar == EOF) {
token->tokenType = TK_NONE;
error(ERR_INVALIDCHARCONSTANT, token->lineNo, token->colNo);
return token;
}
token->string[0] = currentChar;
token->string[1] = '\0';
readChar();
if (currentChar == EOF) {
token->tokenType = TK_NONE;
error(ERR_INVALIDCHARCONSTANT, token->lineNo, token->colNo);
return token;
}
if (charCodes[currentChar] == CHAR_SINGLEQUOTE) {
readChar();
return token;
} else {
token->tokenType = TK_NONE;
error(ERR_INVALIDCHARCONSTANT, token->lineNo, token->colNo);
return token;
}
}
Token* getToken(void) {
Token *token;
int ln, cn;
if (currentChar == EOF)
return makeToken(TK_EOF, lineNo, colNo);
switch (charCodes[currentChar]) {
case CHAR_SPACE: skipBlank(); return getToken();
case CHAR_LETTER: return readIdentKeyword();
case CHAR_DIGIT: return readNumber();
case CHAR_PLUS:
token = makeToken(SB_PLUS, lineNo, colNo);
readChar();
return token;
case CHAR_MINUS:
token = makeToken(SB_MINUS, lineNo, colNo);
readChar();
return token;
case CHAR_TIMES:
token = makeToken(SB_TIMES, lineNo, colNo);
readChar();
return token;
case CHAR_SLASH:
token = makeToken(SB_SLASH, lineNo, colNo);
readChar();
return token;
case CHAR_LT:
ln = lineNo;
cn = colNo;
readChar();
if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
readChar();
return makeToken(SB_LE, ln, cn);
} else return makeToken(SB_LT, ln, cn);
case CHAR_GT:
ln = lineNo;
cn = colNo;
readChar();
if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
readChar();
return makeToken(SB_GE, ln, cn);
} else return makeToken(SB_GT, ln, cn);
case CHAR_EQ:
token = makeToken(SB_EQ, lineNo, colNo);
readChar();
return token;
case CHAR_EXCLAIMATION:
ln = lineNo;
cn = colNo;
readChar();
if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
readChar();
return makeToken(SB_NEQ, ln, cn);
} else {
token = makeToken(TK_NONE, ln, cn);
error(ERR_INVALIDSYMBOL, ln, cn);
return token;
}
case CHAR_COMMA:
token = makeToken(SB_COMMA, lineNo, colNo);
readChar();
return token;
case CHAR_PERIOD:
ln = lineNo;
cn = colNo;
readChar();
if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_RPAR)) {
readChar();
return makeToken(SB_RSEL, ln, cn);
} else return makeToken(SB_PERIOD, ln, cn);
case CHAR_SEMICOLON:
token = makeToken(SB_SEMICOLON, lineNo, colNo);
readChar();
return token;
case CHAR_COLON:
ln = lineNo;
cn = colNo;
readChar();
if ((currentChar != EOF) && (charCodes[currentChar] == CHAR_EQ)) {
readChar();
return makeToken(SB_ASSIGN, ln, cn);
} else return makeToken(SB_COLON, ln, cn);
case CHAR_SINGLEQUOTE: return readConstChar();
case CHAR_LPAR:
ln = lineNo;
cn = colNo;
readChar();
if (currentChar == EOF)
return makeToken(SB_LPAR, ln, cn);
switch (charCodes[currentChar]) {
case CHAR_PERIOD:
readChar();
return makeToken(SB_LSEL, ln, cn);
case CHAR_TIMES:
readChar();
skipComment();
return getToken();
default:
return makeToken(SB_LPAR, ln, cn);
}
case CHAR_RPAR:
token = makeToken(SB_RPAR, lineNo, colNo);
readChar();
return token;
default:
token = makeToken(TK_NONE, lineNo, colNo);
error(ERR_INVALIDSYMBOL, lineNo, colNo);
readChar();
return token;
}
}
Token* getValidToken(void) {
Token *token = getToken();
while (token->tokenType == TK_NONE) {
free(token);
token = getToken();
}
return token;
}
/******************************************************************/
void printToken(Token *token) {
printf("%d-%d:", token->lineNo, token->colNo);
switch (token->tokenType) {
case TK_NONE: printf("TK_NONE\n"); break;
case TK_IDENT: printf("TK_IDENT(%s)\n", token->string); break;
case TK_NUMBER: printf("TK_NUMBER(%s)\n", token->string); break;
case TK_CHAR: printf("TK_CHAR(\'%s\')\n", token->string); break;
case TK_EOF: printf("TK_EOF\n"); break;
case KW_PROGRAM: printf("KW_PROGRAM\n"); break;
case KW_CONST: printf("KW_CONST\n"); break;
case KW_TYPE: printf("KW_TYPE\n"); break;
case KW_VAR: printf("KW_VAR\n"); break;
case KW_INTEGER: printf("KW_INTEGER\n"); break;
case KW_CHAR: printf("KW_CHAR\n"); break;
case KW_ARRAY: printf("KW_ARRAY\n"); break;
case KW_OF: printf("KW_OF\n"); break;
case KW_FUNCTION: printf("KW_FUNCTION\n"); break;
case KW_PROCEDURE: printf("KW_PROCEDURE\n"); break;
case KW_BEGIN: printf("KW_BEGIN\n"); break;
case KW_END: printf("KW_END\n"); break;
case KW_CALL: printf("KW_CALL\n"); break;
case KW_IF: printf("KW_IF\n"); break;
case KW_THEN: printf("KW_THEN\n"); break;
case KW_ELSE: printf("KW_ELSE\n"); break;
case KW_WHILE: printf("KW_WHILE\n"); break;
case KW_DO: printf("KW_DO\n"); break;
case KW_FOR: printf("KW_FOR\n"); break;
case KW_TO: printf("KW_TO\n"); break;
case SB_SEMICOLON: printf("SB_SEMICOLON\n"); break;
case SB_COLON: printf("SB_COLON\n"); break;
case SB_PERIOD: printf("SB_PERIOD\n"); break;
case SB_COMMA: printf("SB_COMMA\n"); break;
case SB_ASSIGN: printf("SB_ASSIGN\n"); break;
case SB_EQ: printf("SB_EQ\n"); break;
case SB_NEQ: printf("SB_NEQ\n"); break;
case SB_LT: printf("SB_LT\n"); break;
case SB_LE: printf("SB_LE\n"); break;
case SB_GT: printf("SB_GT\n"); break;
case SB_GE: printf("SB_GE\n"); break;
case SB_PLUS: printf("SB_PLUS\n"); break;
case SB_MINUS: printf("SB_MINUS\n"); break;
case SB_TIMES: printf("SB_TIMES\n"); break;
case SB_SLASH: printf("SB_SLASH\n"); break;
case SB_LPAR: printf("SB_LPAR\n"); break;
case SB_RPAR: printf("SB_RPAR\n"); break;
case SB_LSEL: printf("SB_LSEL\n"); break;
case SB_RSEL: printf("SB_RSEL\n"); break;
}
}
int scan(char *fileName) {
Token *token;
if (openInputStream(fileName) == IO_ERROR)
return IO_ERROR;
token = getToken();
while (token->tokenType != TK_EOF) {
printToken(token);
free(token);
token = getToken();
}
free(token);
closeInputStream();
return IO_SUCCESS;
}
/******************************************************************/
int main(int argc, char *argv[]) {
if (argc <= 1) {
printf("scanner: no input file.\n");
return -1;
}
if (scan(argv[1]) == IO_ERROR) {
printf("Can\'t read input file!\n");
return -1;
}
return 0;
}