-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlexer.mll
More file actions
executable file
·149 lines (139 loc) · 2.97 KB
/
lexer.mll
File metadata and controls
executable file
·149 lines (139 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
(* Analyse lexicale *)
{
open Lexing
open Parser
open Ast
open Error
open Format
let current_pos b =
lexeme_start_p b,
lexeme_end_p b
let id_or_keyword =
let h = Hashtbl.create 17 in
List.iter (fun (s,k) -> Hashtbl.add h s k)
[ "boolean", BOOLEAN;
"class", CLASS;
"else", ELSE;
"extends", EXTENDS;
"false", BOOL false;
"for", FOR;
"if", IF;
"instanceof", INSTANCEOF;
"int", INT;
"new", NEW;
"null", NULL;
"public", PUBLIC;
"return", RETURN;
"static", STATIC;
"this", THIS;
"true", BOOL true;
"void", VOID;
];
fun s -> try Hashtbl.find h s with Not_found -> IDENT s
let str_buff = Buffer.create 256
}
let alpha = ['a'-'z' 'A'-'Z']
let digit = ['0'-'9']
let ident = (alpha | '_') (alpha | '_' | digit)*
let char = ([' ' - '~'] # [ '\\' '\'' '\"']) | '\\' ('n' | 't' | '\\' |'\"')
rule token = parse
| '\n'
{ new_line lexbuf; token lexbuf }
| [' ' '\t' '\r']+
{ token lexbuf }
| "/*"
{ comment lexbuf; token lexbuf }
| "//" [^'\n']* ('\n' | eof)
{ Lexing.new_line lexbuf; token lexbuf }
| ident
{ id_or_keyword (lexeme lexbuf) }
| digit+ as s
{
try
INTEGER (Int32.of_string s)
with _ ->
error (Lexical_error
(sprintf "invalid integer constant '%s'" s))
(current_pos lexbuf)
}
| '\"'
{ Buffer.reset str_buff;
string lexbuf }
| '(' { LP }
| ')' { RP }
| '{' { LB }
| '}' { RB }
| '['
{ LSB }
| ']'
{ RSB }
| ','
{ COMMA }
| ';'
{ SEMICOLON }
| '.'
{ DOT }
| "-"
{ MINUS }
| "+"
{ PLUS }
| "*"
{ TIMES }
| "/"
{ DIV }
| "%"
{ MOD }
| "!"
{ BANG }
| "&&"
{ AND }
| "||"
{ OR }
| "="
{ EQ }
| ">"
{ GT }
| ">="
{ GEQ }
| "<"
{ LT }
| "<="
{ LEQ }
| "=="
{ EQEQ }
| "!="
{ NEQ }
| "++"
{ PLUSPLUS }
| "--"
{ MINUSMINUS }
| eof
{ EOF }
| _
{ error (Lexical_error ("illegal character: " ^ lexeme lexbuf))
(current_pos lexbuf)
}
and comment = parse
| "*/" { () }
| '\n' { new_line lexbuf; comment lexbuf }
| eof { error (Lexical_error ("unterminated comment")) (current_pos lexbuf)}
| _ { comment lexbuf }
and string = parse
| char as s {
let c =
if String.length s = 1 then s.[0] else
match s.[1] with
| 'n' -> '\n'
| 't' -> '\t'
| '\\' -> '\\'
| '\"' -> '\"'
| _ ->
error
(Lexical_error ("invalid escape sequence " ^ s))
(current_pos lexbuf)
in
Buffer.add_char str_buff c;
string lexbuf }
| '\"' { STRING (Buffer.contents str_buff) }
| eof { error (Lexical_error ("unterminated string")) (current_pos lexbuf)}
| _ as c { error (Lexical_error (sprintf "invalid character '%c'" c)) (current_pos lexbuf) }