1 /*
2 * Copyright 2014, Rene Gollent, rene@gollent.com.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7 #include "CLanguageFamilySyntaxHighlightInfo.h"
8
9 #include <AutoDeleter.h>
10
11 #include "CLanguageTokenizer.h"
12 #include "LineDataSource.h"
13 #include "TeamTypeInformation.h"
14 #include "TypeLookupConstraints.h"
15
16
17 using namespace CLanguage;
18
19
20 static const char* kLanguageKeywords[] = {
21 "NULL",
22 "asm",
23 "auto",
24 "bool",
25 "break",
26 "case",
27 "catch",
28 "char",
29 "class",
30 "const",
31 "const_cast",
32 "constexpr",
33 "continue",
34 "default",
35 "delete",
36 "do",
37 "double",
38 "dynamic_cast",
39 "else",
40 "enum",
41 "explicit",
42 "extern",
43 "false",
44 "float",
45 "for",
46 "goto",
47 "if",
48 "inline",
49 "int",
50 "long",
51 "mutable",
52 "namespace",
53 "new",
54 "operator",
55 "private",
56 "protected",
57 "public",
58 "register",
59 "reinterpret_cast",
60 "return",
61 "short",
62 "signed",
63 "sizeof",
64 "static",
65 "static_cast",
66 "struct",
67 "switch",
68 "template",
69 "this",
70 "throw",
71 "true",
72 "try",
73 "typedef",
74 "typeid",
75 "typename",
76 "union",
77 "unsigned",
78 "using",
79 "virtual",
80 "void",
81 "volatile",
82 "while"
83 };
84
85
IsLanguageKeyword(const Token & token)86 static bool IsLanguageKeyword(const Token& token)
87 {
88 int lower = 0;
89 int upper = (sizeof(kLanguageKeywords)/sizeof(char*)) - 1;
90
91 while (lower < upper) {
92 int mid = (lower + upper + 1) / 2;
93
94 int cmp = token.string.Compare(kLanguageKeywords[mid]);
95 if (cmp == 0)
96 return true;
97 else if (cmp < 0)
98 upper = mid - 1;
99 else
100 lower = mid;
101 }
102
103 return token.string.Compare(kLanguageKeywords[lower]) == 0;
104 }
105
106
107 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::SyntaxPair
108
109
110 struct CLanguageFamilySyntaxHighlightInfo::SyntaxPair {
111 int32 column;
112 syntax_highlight_type type;
113
SyntaxPairCLanguageFamilySyntaxHighlightInfo::SyntaxPair114 SyntaxPair(int32 column, syntax_highlight_type type)
115 :
116 column(column),
117 type(type)
118 {
119 }
120 };
121
122
123 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::LineInfo
124
125
126 class CLanguageFamilySyntaxHighlightInfo::LineInfo {
127 public:
LineInfo(int32 line)128 LineInfo(int32 line)
129 :
130 fLine(line),
131 fPairs(5, true)
132 {
133 }
134
CountPairs() const135 inline int32 CountPairs() const
136 {
137 return fPairs.CountItems();
138 }
139
PairAt(int32 index) const140 SyntaxPair* PairAt(int32 index) const
141 {
142 return fPairs.ItemAt(index);
143 }
144
AddPair(int32 column,syntax_highlight_type type)145 bool AddPair(int32 column, syntax_highlight_type type)
146 {
147 SyntaxPair* pair = new(std::nothrow) SyntaxPair(column, type);
148 if (pair == NULL)
149 return false;
150
151 ObjectDeleter<SyntaxPair> pairDeleter(pair);
152 if (!fPairs.AddItem(pair))
153 return false;
154
155 pairDeleter.Detach();
156 return true;
157 }
158
159 private:
160 typedef BObjectList<SyntaxPair> SyntaxPairList;
161
162 private:
163 int32 fLine;
164 SyntaxPairList fPairs;
165 };
166
167
168 // #pragma mark - CLanguageFamilySyntaxHighlightInfo;
169
170
CLanguageFamilySyntaxHighlightInfo(LineDataSource * source,Tokenizer * tokenizer,TeamTypeInformation * typeInfo)171 CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo(
172 LineDataSource* source, Tokenizer* tokenizer,
173 TeamTypeInformation* typeInfo)
174 :
175 SyntaxHighlightInfo(),
176 fHighlightSource(source),
177 fTokenizer(tokenizer),
178 fTypeInfo(typeInfo),
179 fLineInfos(10, true)
180 {
181 fHighlightSource->AcquireReference();
182 }
183
184
~CLanguageFamilySyntaxHighlightInfo()185 CLanguageFamilySyntaxHighlightInfo::~CLanguageFamilySyntaxHighlightInfo()
186 {
187 fHighlightSource->ReleaseReference();
188 delete fTokenizer;
189 }
190
191
192 int32
GetLineHighlightRanges(int32 line,int32 * _columns,syntax_highlight_type * _types,int32 maxCount)193 CLanguageFamilySyntaxHighlightInfo::GetLineHighlightRanges(int32 line,
194 int32* _columns, syntax_highlight_type* _types, int32 maxCount)
195 {
196 if (line >= fHighlightSource->CountLines())
197 return 0;
198
199 // lazily parse the source's highlight information the first time
200 // it's actually requested. Subsequently it's cached for quick retrieval.
201 if (fLineInfos.CountItems() == 0) {
202 if (_ParseLines() != B_OK)
203 return 0;
204 }
205
206 LineInfo* info = fLineInfos.ItemAt(line);
207 if (info == NULL)
208 return 0;
209
210 int32 count = 0;
211 for (; count < info->CountPairs(); count++) {
212 if (count == maxCount - 1)
213 break;
214
215 SyntaxPair* pair = info->PairAt(count);
216 if (pair == NULL)
217 break;
218
219 _columns[count] = pair->column;
220 _types[count] = pair->type;
221 }
222
223 return count;
224 }
225
226
227 status_t
_ParseLines()228 CLanguageFamilySyntaxHighlightInfo::_ParseLines()
229 {
230 syntax_highlight_type type = SYNTAX_HIGHLIGHT_NONE;
231
232 for (int32 i = 0; i < fHighlightSource->CountLines(); i++) {
233 const char* line = fHighlightSource->LineAt(i);
234 fTokenizer->SetTo(line);
235 LineInfo* info = NULL;
236
237 status_t error = _ParseLine(i, type, info);
238 if (error != B_OK)
239 return error;
240
241 ObjectDeleter<LineInfo> infoDeleter(info);
242 if (!fLineInfos.AddItem(info))
243 return B_NO_MEMORY;
244
245 infoDeleter.Detach();
246 }
247
248 return B_OK;
249 }
250
251
252 status_t
_ParseLine(int32 line,syntax_highlight_type & _lastType,LineInfo * & _info)253 CLanguageFamilySyntaxHighlightInfo::_ParseLine(int32 line,
254 syntax_highlight_type& _lastType, LineInfo*& _info)
255 {
256 bool inCommentBlock = (_lastType == SYNTAX_HIGHLIGHT_COMMENT);
257 bool inPreprocessor = false;
258
259 _info = new(std::nothrow) LineInfo(line);
260 if (_info == NULL)
261 return B_NO_MEMORY;
262 ObjectDeleter<LineInfo> infoDeleter(_info);
263 if (inCommentBlock) {
264 if (!_info->AddPair(0, SYNTAX_HIGHLIGHT_COMMENT))
265 return B_NO_MEMORY;
266 }
267
268 try {
269 for (;;) {
270 const Token& token = fTokenizer->NextToken();
271 if (token.type == TOKEN_END_OF_LINE)
272 break;
273
274 if (inCommentBlock) {
275 if (token.type == TOKEN_END_COMMENT_BLOCK)
276 inCommentBlock = false;
277 continue;
278 } else if (inPreprocessor) {
279 fTokenizer->NextToken();
280 inPreprocessor = false;
281 } else if (token.type == TOKEN_INLINE_COMMENT) {
282 if (!_info->AddPair(token.position, SYNTAX_HIGHLIGHT_COMMENT))
283 return B_NO_MEMORY;
284 break;
285 }
286
287 syntax_highlight_type current = _MapTokenToSyntaxType(token);
288 if (_lastType == current)
289 continue;
290
291 _lastType = current;
292 if (!_info->AddPair(token.position, current))
293 return B_NO_MEMORY;
294
295 if (token.type == TOKEN_BEGIN_COMMENT_BLOCK)
296 inCommentBlock = true;
297 else if (token.type == TOKEN_POUND)
298 inPreprocessor = true;
299 }
300 } catch (...) {
301 // if a parse exception was thrown, simply ignore it.
302 // in such a case, we can't guarantee correct highlight
303 // information anyhow, so simply return whatever we started
304 // with.
305 }
306
307 _lastType = inCommentBlock
308 ? SYNTAX_HIGHLIGHT_COMMENT : SYNTAX_HIGHLIGHT_NONE;
309 infoDeleter.Detach();
310 return B_OK;
311 }
312
313
314 syntax_highlight_type
_MapTokenToSyntaxType(const Token & token)315 CLanguageFamilySyntaxHighlightInfo::_MapTokenToSyntaxType(const Token& token)
316 {
317 static TypeLookupConstraints constraints;
318
319 switch (token.type) {
320 case TOKEN_IDENTIFIER:
321 if (IsLanguageKeyword(token))
322 return SYNTAX_HIGHLIGHT_KEYWORD;
323 else if (fTypeInfo->TypeExistsByName(token.string, constraints))
324 return SYNTAX_HIGHLIGHT_TYPE;
325 break;
326
327 case TOKEN_CONSTANT:
328 return SYNTAX_HIGHLIGHT_NUMERIC_LITERAL;
329
330 case TOKEN_END_OF_LINE:
331 break;
332
333 case TOKEN_PLUS:
334 case TOKEN_MINUS:
335 case TOKEN_STAR:
336 case TOKEN_SLASH:
337 case TOKEN_MODULO:
338 case TOKEN_OPENING_PAREN:
339 case TOKEN_CLOSING_PAREN:
340 case TOKEN_OPENING_SQUARE_BRACKET:
341 case TOKEN_CLOSING_SQUARE_BRACKET:
342 case TOKEN_OPENING_CURLY_BRACE:
343 case TOKEN_CLOSING_CURLY_BRACE:
344 case TOKEN_LOGICAL_AND:
345 case TOKEN_LOGICAL_OR:
346 case TOKEN_LOGICAL_NOT:
347 case TOKEN_BITWISE_AND:
348 case TOKEN_BITWISE_OR:
349 case TOKEN_BITWISE_NOT:
350 case TOKEN_BITWISE_XOR:
351 case TOKEN_EQ:
352 case TOKEN_NE:
353 case TOKEN_GT:
354 case TOKEN_GE:
355 case TOKEN_LT:
356 case TOKEN_LE:
357 case TOKEN_MEMBER_PTR:
358 case TOKEN_CONDITION:
359 case TOKEN_COLON:
360 case TOKEN_SEMICOLON:
361 case TOKEN_BACKSLASH:
362 return SYNTAX_HIGHLIGHT_OPERATOR;
363
364 case TOKEN_STRING_LITERAL:
365 return SYNTAX_HIGHLIGHT_STRING_LITERAL;
366
367 case TOKEN_POUND:
368 return SYNTAX_HIGHLIGHT_PREPROCESSOR_KEYWORD;
369
370 case TOKEN_BEGIN_COMMENT_BLOCK:
371 case TOKEN_END_COMMENT_BLOCK:
372 case TOKEN_INLINE_COMMENT:
373 return SYNTAX_HIGHLIGHT_COMMENT;
374 }
375
376 return SYNTAX_HIGHLIGHT_NONE;
377 }
378