xref: /haiku/src/kits/debugger/source_language/c_family/CLanguageFamilySyntaxHighlightInfo.cpp (revision fce4895d1884da5ae6fb299d23c735c598e690b1)
1 /*
2  * Copyright 2014, Rene Gollent, rene@gollent.com.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "CLanguageFamilySyntaxHighlightInfo.h"
8 
9 #include <AutoDeleter.h>
10 
11 #include "CLanguageTokenizer.h"
12 #include "LineDataSource.h"
13 #include "TeamTypeInformation.h"
14 #include "TypeLookupConstraints.h"
15 
16 
17 using namespace CLanguage;
18 
19 
20 static const char* kLanguageKeywords[] = {
21 	"NULL",
22 	"asm",
23 	"auto",
24 	"bool",
25 	"break",
26 	"case",
27 	"catch",
28 	"char",
29 	"class",
30 	"const",
31 	"const_cast",
32 	"constexpr",
33 	"continue",
34 	"default",
35 	"delete",
36 	"do",
37 	"double",
38 	"dynamic_cast",
39 	"else",
40 	"enum",
41 	"explicit",
42 	"extern",
43 	"false",
44 	"float",
45 	"for",
46 	"goto",
47 	"if",
48 	"inline",
49 	"int",
50 	"long",
51 	"mutable",
52 	"namespace",
53 	"new",
54 	"operator",
55 	"private",
56 	"protected",
57 	"public",
58 	"register",
59 	"reinterpret_cast",
60 	"return",
61 	"short",
62 	"signed",
63 	"sizeof",
64 	"static",
65 	"static_cast",
66 	"struct",
67 	"switch",
68 	"template",
69 	"this",
70 	"throw",
71 	"true",
72 	"try",
73 	"typedef",
74 	"typeid",
75 	"typename",
76 	"union",
77 	"unsigned",
78 	"using",
79 	"virtual",
80 	"void",
81 	"volatile",
82 	"while"
83 };
84 
85 
IsLanguageKeyword(const Token & token)86 static bool IsLanguageKeyword(const Token& token)
87 {
88 	int lower = 0;
89 	int upper = (sizeof(kLanguageKeywords)/sizeof(char*)) - 1;
90 
91 	while (lower < upper) {
92 		int mid = (lower + upper + 1) / 2;
93 
94 		int cmp = token.string.Compare(kLanguageKeywords[mid]);
95 		if (cmp == 0)
96 			return true;
97 		else if (cmp < 0)
98 			upper = mid - 1;
99 		else
100 			lower = mid;
101 	}
102 
103 	return token.string.Compare(kLanguageKeywords[lower]) == 0;
104 }
105 
106 
107 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::SyntaxPair
108 
109 
110 struct CLanguageFamilySyntaxHighlightInfo::SyntaxPair {
111 	int32 column;
112 	syntax_highlight_type type;
113 
SyntaxPairCLanguageFamilySyntaxHighlightInfo::SyntaxPair114 	SyntaxPair(int32 column, syntax_highlight_type type)
115 		:
116 		column(column),
117 		type(type)
118 	{
119 	}
120 };
121 
122 
123 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::LineInfo
124 
125 
126 class CLanguageFamilySyntaxHighlightInfo::LineInfo {
127 public:
LineInfo(int32 line)128 	LineInfo(int32 line)
129 		:
130 		fLine(line),
131 		fPairs(5, true)
132 	{
133 	}
134 
CountPairs() const135 	inline int32 CountPairs() const
136 	{
137 		return fPairs.CountItems();
138 	}
139 
PairAt(int32 index) const140 	SyntaxPair* PairAt(int32 index) const
141 	{
142 		return fPairs.ItemAt(index);
143 	}
144 
AddPair(int32 column,syntax_highlight_type type)145 	bool AddPair(int32 column, syntax_highlight_type type)
146 	{
147 		SyntaxPair* pair = new(std::nothrow) SyntaxPair(column, type);
148 		if (pair == NULL)
149 			return false;
150 
151 		ObjectDeleter<SyntaxPair> pairDeleter(pair);
152 		if (!fPairs.AddItem(pair))
153 			return false;
154 
155 		pairDeleter.Detach();
156 		return true;
157 	}
158 
159 private:
160 	typedef BObjectList<SyntaxPair> SyntaxPairList;
161 
162 private:
163 	int32 fLine;
164 	SyntaxPairList fPairs;
165 };
166 
167 
168 // #pragma mark - CLanguageFamilySyntaxHighlightInfo;
169 
170 
CLanguageFamilySyntaxHighlightInfo(LineDataSource * source,Tokenizer * tokenizer,TeamTypeInformation * typeInfo)171 CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo(
172 	LineDataSource* source, Tokenizer* tokenizer,
173 	TeamTypeInformation* typeInfo)
174 	:
175 	SyntaxHighlightInfo(),
176 	fHighlightSource(source),
177 	fTokenizer(tokenizer),
178 	fTypeInfo(typeInfo),
179 	fLineInfos(10, true)
180 {
181 	fHighlightSource->AcquireReference();
182 }
183 
184 
~CLanguageFamilySyntaxHighlightInfo()185 CLanguageFamilySyntaxHighlightInfo::~CLanguageFamilySyntaxHighlightInfo()
186 {
187 	fHighlightSource->ReleaseReference();
188 	delete fTokenizer;
189 }
190 
191 
192 int32
GetLineHighlightRanges(int32 line,int32 * _columns,syntax_highlight_type * _types,int32 maxCount)193 CLanguageFamilySyntaxHighlightInfo::GetLineHighlightRanges(int32 line,
194 	int32* _columns, syntax_highlight_type* _types, int32 maxCount)
195 {
196 	if (line >= fHighlightSource->CountLines())
197 		return 0;
198 
199 	// lazily parse the source's highlight information the first time
200 	// it's actually requested. Subsequently it's cached for quick retrieval.
201 	if (fLineInfos.CountItems() == 0) {
202 		if (_ParseLines() != B_OK)
203 			return 0;
204 	}
205 
206 	LineInfo* info = fLineInfos.ItemAt(line);
207 	if (info == NULL)
208 		return 0;
209 
210 	int32 count = 0;
211 	for (; count < info->CountPairs(); count++) {
212 		if (count == maxCount - 1)
213 			break;
214 
215 		SyntaxPair* pair = info->PairAt(count);
216 		if (pair == NULL)
217 			break;
218 
219 		_columns[count] = pair->column;
220 		_types[count] = pair->type;
221 	}
222 
223 	return count;
224 }
225 
226 
227 status_t
_ParseLines()228 CLanguageFamilySyntaxHighlightInfo::_ParseLines()
229 {
230 	syntax_highlight_type type = SYNTAX_HIGHLIGHT_NONE;
231 
232 	for (int32 i = 0; i < fHighlightSource->CountLines(); i++) {
233 		const char* line = fHighlightSource->LineAt(i);
234 		fTokenizer->SetTo(line);
235 		LineInfo* info = NULL;
236 
237 		status_t error = _ParseLine(i, type, info);
238 		if (error != B_OK)
239 			return error;
240 
241 		ObjectDeleter<LineInfo> infoDeleter(info);
242 		if (!fLineInfos.AddItem(info))
243 			return B_NO_MEMORY;
244 
245 		infoDeleter.Detach();
246 	}
247 
248 	return B_OK;
249 }
250 
251 
252 status_t
_ParseLine(int32 line,syntax_highlight_type & _lastType,LineInfo * & _info)253 CLanguageFamilySyntaxHighlightInfo::_ParseLine(int32 line,
254 	syntax_highlight_type& _lastType, LineInfo*& _info)
255 {
256 	bool inCommentBlock = (_lastType == SYNTAX_HIGHLIGHT_COMMENT);
257 	bool inPreprocessor = false;
258 
259 	_info = new(std::nothrow) LineInfo(line);
260 	if (_info == NULL)
261 		return B_NO_MEMORY;
262 	ObjectDeleter<LineInfo> infoDeleter(_info);
263 	if (inCommentBlock) {
264 		if (!_info->AddPair(0, SYNTAX_HIGHLIGHT_COMMENT))
265 			return B_NO_MEMORY;
266 	}
267 
268 	try {
269 		for (;;) {
270 			const Token& token = fTokenizer->NextToken();
271 			if (token.type == TOKEN_END_OF_LINE)
272 				break;
273 
274 			if (inCommentBlock) {
275 				if (token.type == TOKEN_END_COMMENT_BLOCK)
276 					inCommentBlock = false;
277 				continue;
278 			} else if (inPreprocessor) {
279 				fTokenizer->NextToken();
280 				inPreprocessor = false;
281 			} else if (token.type == TOKEN_INLINE_COMMENT) {
282 				if (!_info->AddPair(token.position, SYNTAX_HIGHLIGHT_COMMENT))
283 					return B_NO_MEMORY;
284 				break;
285 			}
286 
287 			syntax_highlight_type current = _MapTokenToSyntaxType(token);
288 			if (_lastType == current)
289 				continue;
290 
291 			_lastType = current;
292 			if (!_info->AddPair(token.position, current))
293 				return B_NO_MEMORY;
294 
295 			if (token.type == TOKEN_BEGIN_COMMENT_BLOCK)
296 				inCommentBlock = true;
297 			else if (token.type == TOKEN_POUND)
298 				inPreprocessor = true;
299 		}
300 	} catch (...) {
301 		// if a parse exception was thrown, simply ignore it.
302 		// in such a case, we can't guarantee correct highlight
303 		// information anyhow, so simply return whatever we started
304 		// with.
305 	}
306 
307 	_lastType = inCommentBlock
308 		? SYNTAX_HIGHLIGHT_COMMENT : SYNTAX_HIGHLIGHT_NONE;
309 	infoDeleter.Detach();
310 	return B_OK;
311 }
312 
313 
314 syntax_highlight_type
_MapTokenToSyntaxType(const Token & token)315 CLanguageFamilySyntaxHighlightInfo::_MapTokenToSyntaxType(const Token& token)
316 {
317 	static TypeLookupConstraints constraints;
318 
319 	switch (token.type) {
320 		case TOKEN_IDENTIFIER:
321 			if (IsLanguageKeyword(token))
322 				return SYNTAX_HIGHLIGHT_KEYWORD;
323 			else if (fTypeInfo->TypeExistsByName(token.string, constraints))
324 				return SYNTAX_HIGHLIGHT_TYPE;
325 			break;
326 
327 		case TOKEN_CONSTANT:
328 			return SYNTAX_HIGHLIGHT_NUMERIC_LITERAL;
329 
330 		case TOKEN_END_OF_LINE:
331 			break;
332 
333 		case TOKEN_PLUS:
334 		case TOKEN_MINUS:
335 		case TOKEN_STAR:
336 		case TOKEN_SLASH:
337 		case TOKEN_MODULO:
338 		case TOKEN_OPENING_PAREN:
339 		case TOKEN_CLOSING_PAREN:
340 		case TOKEN_OPENING_SQUARE_BRACKET:
341 		case TOKEN_CLOSING_SQUARE_BRACKET:
342 		case TOKEN_OPENING_CURLY_BRACE:
343 		case TOKEN_CLOSING_CURLY_BRACE:
344 		case TOKEN_LOGICAL_AND:
345 		case TOKEN_LOGICAL_OR:
346 		case TOKEN_LOGICAL_NOT:
347 		case TOKEN_BITWISE_AND:
348 		case TOKEN_BITWISE_OR:
349 		case TOKEN_BITWISE_NOT:
350 		case TOKEN_BITWISE_XOR:
351 		case TOKEN_EQ:
352 		case TOKEN_NE:
353 		case TOKEN_GT:
354 		case TOKEN_GE:
355 		case TOKEN_LT:
356 		case TOKEN_LE:
357 		case TOKEN_MEMBER_PTR:
358 		case TOKEN_CONDITION:
359 		case TOKEN_COLON:
360 		case TOKEN_SEMICOLON:
361 		case TOKEN_BACKSLASH:
362 			return SYNTAX_HIGHLIGHT_OPERATOR;
363 
364 		case TOKEN_STRING_LITERAL:
365 			return SYNTAX_HIGHLIGHT_STRING_LITERAL;
366 
367 		case TOKEN_POUND:
368 			return SYNTAX_HIGHLIGHT_PREPROCESSOR_KEYWORD;
369 
370 		case TOKEN_BEGIN_COMMENT_BLOCK:
371 		case TOKEN_END_COMMENT_BLOCK:
372 		case TOKEN_INLINE_COMMENT:
373 			return SYNTAX_HIGHLIGHT_COMMENT;
374 	}
375 
376 	return SYNTAX_HIGHLIGHT_NONE;
377 }
378