xref: /haiku/src/bin/rc/lexer.l (revision e221c09e508ffc3c62738140c9b6fc4fa211662a)
1 /*
2  * Copyright (c) 2003 Matthijs Hollemans
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 %{
24 //------------------------------------------------------------------------------
25 
26 #include <stack>
27 #include <string.h>
28 
29 #include "rdef.h"
30 #include "compile.h"
31 #include "private.h"
32 #include "parser.hpp"
33 
34 #define LEXERROR(msg) abort_compile(RDEF_COMPILE_ERR, msg);
35 
36 // Initial size (and increment) of lexbuf.
37 #define LEX_BUF_SIZE  (64*1024)
38 
39 // Temporary buffer that the lexer uses to parse string and raw literals.
40 // The buffer will grow if necessary, to accommodate large data blocks.
41 static uint8* lexbuf;
42 
43 static uint8* lexptr;   // current write position in lexbuf
44 static size_t lexsize;  // current size of the lex buffer
45 static size_t lexcnt;   // how full lexbuf currently is
46 
47 static void resetbuf();     // resets lexptr and lexcnt
48 static void addbuf(uint8);  // appends byte to lexbuf
49 
50 // When we encounter an #include directive, we push the current
51 // buffer, filename, and line number on the include stack, so we
52 // can resume lexing that file when we're done with the include.
53 struct include_t {
54 	YY_BUFFER_STATE buffer;
55 	char* filename;
56 	int lineno;
57 };
58 
59 static std::stack<include_t> include_stack;
60 
61 static void open_include();
62 static void close_include();
63 
64 //------------------------------------------------------------------------------
65 %}
66 
67 %option noyywrap
68 %option yylineno
69 
70 LETTER      [a-zA-Z]
71 BIN         [01]
72 OCT         [0-7]
73 DEC         [0-9]
74 HEX         [0-9a-fA-F]
75 IDENT       [a-zA-Z_][a-zA-Z0-9_]*
76 WSPACE      [ \r\t\n\f]
77 EXP         [eE][+-]?{DEC}+
78 
79 %x COMMENT
80 %x STRDATA
81 %x RAWDATA
82 %x INCLUDE
83 
84 %%
85 
86 enum                     return ENUM;
87 resource                 return RESOURCE;
88 array                    return ARRAY;
89 message                  return MESSAGE;
90 archive                  return ARCHIVE;
91 type                     return RTYPE;
92 import                   return IMPORT;
93 
94 false                    yylval.b = false; return BOOL;
95 true                     yylval.b = true;  return BOOL;
96 
97 0[xX]{HEX}{1,16}         { yylval.i = strtoull(yytext + 2, NULL, 16);
98                            return INTEGER; }
99 0{OCT}{1,24}             { yylval.i = strtoull(yytext, NULL, 8);
100                            return INTEGER; }
101 0[bB]{BIN}{1,64}         { yylval.i = strtoull(yytext + 2, NULL, 2);
102                            return INTEGER; }
103 {DEC}+                   { yylval.i = strtoull(yytext, NULL, 10);
104                            return INTEGER; }
105 '....'                   { yylval.i = (yytext[1] << 24)
106                                     | (yytext[2] << 16)
107                                     | (yytext[3] << 8)
108                                     |  yytext[4];
109                            return INTEGER; }
110 
111 {DEC}+{EXP}              yylval.f = strtod(yytext, NULL); return FLOAT;
112 {DEC}*\.{DEC}+{EXP}?     yylval.f = strtod(yytext, NULL); return FLOAT;
113 {DEC}+\.{DEC}*{EXP}?     yylval.f = strtod(yytext, NULL); return FLOAT;
114 
115 #{DEC}+                  { yylval.t = strtoul(yytext + 1, NULL, 10);
116                            return TYPECODE; }
117 #0[xX]{HEX}{1,8}         { yylval.t = strtoul(yytext + 3, NULL, 16);
118                            return TYPECODE; }
119 #'....'                  { yylval.t = (yytext[2] << 24)
120                                     | (yytext[3] << 16)
121                                     | (yytext[4] << 8)
122                                     |  yytext[5];
123                            return TYPECODE; }
124 
125 {IDENT}                  { yylval.I = (char*) alloc_mem(yyleng + 1);
126                            memcpy(yylval.I, yytext, yyleng + 1);
127                            return IDENT; }
128 
129 \"                       BEGIN(STRDATA); resetbuf();
130 <STRDATA>\"{WSPACE}+\"   /* concatenate two literals */
131 <STRDATA>\"              { BEGIN(INITIAL);
132                            addbuf('\0');
133                            yylval.d.type = get_type("string");
134                            yylval.d.size = lexcnt;
135                            yylval.d.ptr  = alloc_mem(lexcnt);
136                            memcpy(yylval.d.ptr, lexbuf, lexcnt);
137                            return STRING; }
138 <STRDATA>\n              LEXERROR("string not terminated")
139 <STRDATA>\\{OCT}{3}      addbuf(strtol(yytext + 1, NULL, 8));
140 <STRDATA>\\0[xX]{HEX}{2} addbuf(strtol(yytext + 3, NULL, 16));
141 <STRDATA>\\[xX]{HEX}{2}  addbuf(strtol(yytext + 2, NULL, 16));
142 <STRDATA>\\b             addbuf('\b');
143 <STRDATA>\\f             addbuf('\f');
144 <STRDATA>\\n             addbuf('\n');
145 <STRDATA>\\r             addbuf('\r');
146 <STRDATA>\\t             addbuf('\t');
147 <STRDATA>\\v             addbuf('\v');
148 <STRDATA>\\0             addbuf('\0');
149 <STRDATA>\\.             addbuf(yytext[1]);
150 <STRDATA>.               addbuf(yytext[0]);
151 
152 $\"                      BEGIN(RAWDATA); resetbuf();
153 <RAWDATA>\"{WSPACE}+$\"  /* concatenate two literals */
154 <RAWDATA>\"              { BEGIN(INITIAL);
155                            yylval.d.type = get_type("raw");
156                            yylval.d.size = lexcnt;
157                            yylval.d.ptr  = alloc_mem(lexcnt);
158                            memcpy(yylval.d.ptr, lexbuf, lexcnt);
159                            return RAW; }
160 <RAWDATA>\n              LEXERROR("raw data not terminated")
161 <RAWDATA>{HEX}{2}        addbuf(strtol(yytext, NULL, 16));
162 <RAWDATA>{HEX}           LEXERROR("number of characters must be even")
163 <RAWDATA>.               LEXERROR("invalid character in raw data")
164 
165 "/*"                     BEGIN(COMMENT);  /* eat multi-line comment */
166 <COMMENT>[^*\n]*         /* eat anything that is not a '*' */
167 <COMMENT>"*"+[^*/\n]*    /* eat up '*'s not followed by '/'s */
168 <COMMENT>\n
169 <COMMENT>"*"+"/"         BEGIN(INITIAL);
170 <COMMENT><<EOF>>         LEXERROR("forgot to close /*..*/ comment")
171 
172 "//"[^\n]*               /* eat single-line comment */
173 {WSPACE}+                /* eat whitespace */
174 
175 \#include[ \t]+\"        BEGIN(INCLUDE);
176 <INCLUDE>[ \t]*          /* eat the whitespace */
177 <INCLUDE>[^ \t\n\"]+\"   open_include();
178 <INCLUDE>\n              LEXERROR("error in include statement")
179 <INCLUDE><<EOF>>         LEXERROR("error in include statement")
180 <<EOF>>                  { if (include_stack.empty())
181                                yyterminate();
182                            else
183                                close_include(); }
184 
185 .                        return yytext[0];
186 
187 %%
188 //------------------------------------------------------------------------------
189 
190 void
191 resetbuf()
192 {
193 	lexptr = lexbuf;
194 	lexcnt = 0;
195 }
196 
197 
198 void
199 addbuf(uint8 b)
200 {
201 	if (lexcnt == lexsize) {
202 		lexsize += LEX_BUF_SIZE;
203 		lexbuf = (uint8*) realloc(lexbuf, lexsize);
204 		if (lexbuf == NULL)
205 			abort_compile(B_NO_MEMORY, "out of memory");
206 
207 		lexptr = lexbuf + lexcnt;
208 	}
209 
210 	*lexptr++ = b;
211 	++lexcnt;
212 }
213 
214 
215 void
216 open_include()
217 {
218 	yytext[yyleng - 1] = '\0';  // remove trailing " quote
219 
220 	char tmpname[B_PATH_NAME_LENGTH];
221 	if (open_file_from_include_dir(yytext, tmpname)) {
222 		yyin = fopen(tmpname, "r");
223 		if (yyin != NULL) {
224 			include_t incl;
225 			incl.buffer   = YY_CURRENT_BUFFER;
226 			incl.lineno   = yylineno;
227 			incl.filename = strdup(lexfile);
228 			include_stack.push(incl);
229 
230 			strcpy(lexfile, tmpname);
231 			yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
232 			yylineno = 1;
233 
234 			BEGIN(INITIAL);
235 			return;
236 		}
237 	}
238 
239 	abort_compile(RDEF_COMPILE_ERR, "cannot open include %s", yytext);
240 }
241 
242 
243 void
244 close_include()
245 {
246 	fclose(yyin);
247 	yy_delete_buffer(YY_CURRENT_BUFFER);
248 
249 	include_t incl = include_stack.top();
250 	include_stack.pop();
251 
252 	yy_switch_to_buffer(incl.buffer);
253 	yylineno = incl.lineno;
254 	strcpy(lexfile, incl.filename);
255 
256 	free(incl.filename);
257 }
258 
259 
260 void
261 init_lexer()
262 {
263 	lexsize = LEX_BUF_SIZE;
264 	lexbuf = (uint8*) malloc(lexsize);
265 	if (lexbuf == NULL)
266 		abort_compile(B_NO_MEMORY, "out of memory");
267 
268 	yyrestart(yyin);  // necessary for multiple input files
269 	yylineno = 1;
270 }
271 
272 
273 void
274 clean_up_lexer()
275 {
276 	while (!include_stack.empty()) {
277 		close_include();
278 	}
279 
280 	if (stdin != yyin)
281 		fclose(yyin);
282 	yy_delete_buffer(YY_CURRENT_BUFFER);
283 
284 	free(lexbuf);
285 }
286 
287