1 /* 2 * Copyright 2014, Rene Gollent, rene@gollent.com. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "CLanguageFamilySyntaxHighlightInfo.h" 8 9 #include <AutoDeleter.h> 10 11 #include "CLanguageTokenizer.h" 12 #include "LineDataSource.h" 13 #include "TeamTypeInformation.h" 14 #include "TypeLookupConstraints.h" 15 16 17 using namespace CLanguage; 18 19 20 static const char* kLanguageKeywords[] = { 21 "NULL", 22 "asm", 23 "auto", 24 "bool", 25 "break", 26 "case", 27 "catch", 28 "char", 29 "class", 30 "const", 31 "const_cast", 32 "constexpr", 33 "continue", 34 "default", 35 "delete", 36 "do", 37 "double", 38 "dynamic_cast", 39 "else", 40 "enum", 41 "explicit", 42 "extern", 43 "false", 44 "float", 45 "for", 46 "goto", 47 "if", 48 "inline", 49 "int", 50 "long", 51 "mutable", 52 "namespace", 53 "new", 54 "operator", 55 "private", 56 "protected", 57 "public", 58 "register", 59 "reinterpret_cast", 60 "return", 61 "short", 62 "signed", 63 "sizeof", 64 "static", 65 "static_cast", 66 "struct", 67 "switch", 68 "template", 69 "this", 70 "throw", 71 "true", 72 "try", 73 "typedef", 74 "typeid", 75 "typename", 76 "union", 77 "unsigned", 78 "using", 79 "virtual", 80 "void", 81 "volatile", 82 "while" 83 }; 84 85 86 static bool IsLanguageKeyword(const Token& token) 87 { 88 int lower = 0; 89 int upper = (sizeof(kLanguageKeywords)/sizeof(char*)) - 1; 90 91 while (lower < upper) { 92 int mid = (lower + upper + 1) / 2; 93 94 int cmp = token.string.Compare(kLanguageKeywords[mid]); 95 if (cmp == 0) 96 return true; 97 else if (cmp < 0) 98 upper = mid - 1; 99 else 100 lower = mid; 101 } 102 103 return token.string.Compare(kLanguageKeywords[lower]) == 0; 104 } 105 106 107 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::SyntaxPair 108 109 110 struct CLanguageFamilySyntaxHighlightInfo::SyntaxPair { 111 int32 column; 112 syntax_highlight_type type; 113 114 SyntaxPair(int32 column, syntax_highlight_type type) 115 : 116 column(column), 117 type(type) 118 { 119 } 120 }; 121 122 123 // #pragma mark - CLanguageFamilySyntaxHighlightInfo::LineInfo 124 125 126 class CLanguageFamilySyntaxHighlightInfo::LineInfo { 127 public: 128 LineInfo(int32 line) 129 : 130 fLine(line), 131 fPairs(5, true) 132 { 133 } 134 135 inline int32 CountPairs() const 136 { 137 return fPairs.CountItems(); 138 } 139 140 SyntaxPair* PairAt(int32 index) const 141 { 142 return fPairs.ItemAt(index); 143 } 144 145 bool AddPair(int32 column, syntax_highlight_type type) 146 { 147 SyntaxPair* pair = new(std::nothrow) SyntaxPair(column, type); 148 if (pair == NULL) 149 return false; 150 151 ObjectDeleter<SyntaxPair> pairDeleter(pair); 152 if (!fPairs.AddItem(pair)) 153 return false; 154 155 pairDeleter.Detach(); 156 return true; 157 } 158 159 private: 160 typedef BObjectList<SyntaxPair> SyntaxPairList; 161 162 private: 163 int32 fLine; 164 SyntaxPairList fPairs; 165 }; 166 167 168 // #pragma mark - CLanguageFamilySyntaxHighlightInfo; 169 170 171 CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo( 172 LineDataSource* source, Tokenizer* tokenizer, 173 TeamTypeInformation* typeInfo) 174 : 175 SyntaxHighlightInfo(), 176 fHighlightSource(source), 177 fTokenizer(tokenizer), 178 fTypeInfo(typeInfo), 179 fLineInfos(10, true) 180 { 181 fHighlightSource->AcquireReference(); 182 } 183 184 185 CLanguageFamilySyntaxHighlightInfo::~CLanguageFamilySyntaxHighlightInfo() 186 { 187 fHighlightSource->ReleaseReference(); 188 delete fTokenizer; 189 } 190 191 192 int32 193 CLanguageFamilySyntaxHighlightInfo::GetLineHighlightRanges(int32 line, 194 int32* _columns, syntax_highlight_type* _types, int32 maxCount) 195 { 196 if (line >= fHighlightSource->CountLines()) 197 return 0; 198 199 // lazily parse the source's highlight information the first time 200 // it's actually requested. Subsequently it's cached for quick retrieval. 201 if (fLineInfos.CountItems() == 0) { 202 if (_ParseLines() != B_OK) 203 return 0; 204 } 205 206 LineInfo* info = fLineInfos.ItemAt(line); 207 if (info == NULL) 208 return 0; 209 210 int32 count = 0; 211 for (; count < info->CountPairs(); count++) { 212 if (count == maxCount - 1) 213 break; 214 215 SyntaxPair* pair = info->PairAt(count); 216 if (pair == NULL) 217 break; 218 219 _columns[count] = pair->column; 220 _types[count] = pair->type; 221 } 222 223 return count; 224 } 225 226 227 status_t 228 CLanguageFamilySyntaxHighlightInfo::_ParseLines() 229 { 230 syntax_highlight_type type = SYNTAX_HIGHLIGHT_NONE; 231 232 for (int32 i = 0; i < fHighlightSource->CountLines(); i++) { 233 const char* line = fHighlightSource->LineAt(i); 234 fTokenizer->SetTo(line); 235 LineInfo* info = NULL; 236 237 status_t error = _ParseLine(i, type, info); 238 if (error != B_OK) 239 return error; 240 241 ObjectDeleter<LineInfo> infoDeleter(info); 242 if (!fLineInfos.AddItem(info)) 243 return B_NO_MEMORY; 244 245 infoDeleter.Detach(); 246 } 247 248 return B_OK; 249 } 250 251 252 status_t 253 CLanguageFamilySyntaxHighlightInfo::_ParseLine(int32 line, 254 syntax_highlight_type& _lastType, LineInfo*& _info) 255 { 256 bool inCommentBlock = (_lastType == SYNTAX_HIGHLIGHT_COMMENT); 257 bool inPreprocessor = false; 258 259 _info = new(std::nothrow) LineInfo(line); 260 if (_info == NULL) 261 return B_NO_MEMORY; 262 ObjectDeleter<LineInfo> infoDeleter(_info); 263 if (inCommentBlock) { 264 if (!_info->AddPair(0, SYNTAX_HIGHLIGHT_COMMENT)) 265 return B_NO_MEMORY; 266 } 267 268 try { 269 for (;;) { 270 const Token& token = fTokenizer->NextToken(); 271 if (token.type == TOKEN_END_OF_LINE) 272 break; 273 274 if (inCommentBlock) { 275 if (token.type == TOKEN_END_COMMENT_BLOCK) 276 inCommentBlock = false; 277 continue; 278 } else if (inPreprocessor) { 279 fTokenizer->NextToken(); 280 inPreprocessor = false; 281 } else if (token.type == TOKEN_INLINE_COMMENT) { 282 if (!_info->AddPair(token.position, SYNTAX_HIGHLIGHT_COMMENT)) 283 return B_NO_MEMORY; 284 break; 285 } 286 287 syntax_highlight_type current = _MapTokenToSyntaxType(token); 288 if (_lastType == current) 289 continue; 290 291 _lastType = current; 292 if (!_info->AddPair(token.position, current)) 293 return B_NO_MEMORY; 294 295 if (token.type == TOKEN_BEGIN_COMMENT_BLOCK) 296 inCommentBlock = true; 297 else if (token.type == TOKEN_POUND) 298 inPreprocessor = true; 299 } 300 } catch (...) { 301 // if a parse exception was thrown, simply ignore it. 302 // in such a case, we can't guarantee correct highlight 303 // information anyhow, so simply return whatever we started 304 // with. 305 } 306 307 _lastType = inCommentBlock 308 ? SYNTAX_HIGHLIGHT_COMMENT : SYNTAX_HIGHLIGHT_NONE; 309 infoDeleter.Detach(); 310 return B_OK; 311 } 312 313 314 syntax_highlight_type 315 CLanguageFamilySyntaxHighlightInfo::_MapTokenToSyntaxType(const Token& token) 316 { 317 static TypeLookupConstraints constraints; 318 319 switch (token.type) { 320 case TOKEN_IDENTIFIER: 321 if (IsLanguageKeyword(token)) 322 return SYNTAX_HIGHLIGHT_KEYWORD; 323 else if (fTypeInfo->TypeExistsByName(token.string, constraints)) 324 return SYNTAX_HIGHLIGHT_TYPE; 325 break; 326 327 case TOKEN_CONSTANT: 328 return SYNTAX_HIGHLIGHT_NUMERIC_LITERAL; 329 330 case TOKEN_END_OF_LINE: 331 break; 332 333 case TOKEN_PLUS: 334 case TOKEN_MINUS: 335 case TOKEN_STAR: 336 case TOKEN_SLASH: 337 case TOKEN_MODULO: 338 case TOKEN_OPENING_PAREN: 339 case TOKEN_CLOSING_PAREN: 340 case TOKEN_OPENING_SQUARE_BRACKET: 341 case TOKEN_CLOSING_SQUARE_BRACKET: 342 case TOKEN_OPENING_CURLY_BRACE: 343 case TOKEN_CLOSING_CURLY_BRACE: 344 case TOKEN_LOGICAL_AND: 345 case TOKEN_LOGICAL_OR: 346 case TOKEN_LOGICAL_NOT: 347 case TOKEN_BITWISE_AND: 348 case TOKEN_BITWISE_OR: 349 case TOKEN_BITWISE_NOT: 350 case TOKEN_BITWISE_XOR: 351 case TOKEN_EQ: 352 case TOKEN_NE: 353 case TOKEN_GT: 354 case TOKEN_GE: 355 case TOKEN_LT: 356 case TOKEN_LE: 357 case TOKEN_MEMBER_PTR: 358 case TOKEN_CONDITION: 359 case TOKEN_COLON: 360 case TOKEN_SEMICOLON: 361 case TOKEN_BACKSLASH: 362 return SYNTAX_HIGHLIGHT_OPERATOR; 363 364 case TOKEN_STRING_LITERAL: 365 return SYNTAX_HIGHLIGHT_STRING_LITERAL; 366 367 case TOKEN_POUND: 368 return SYNTAX_HIGHLIGHT_PREPROCESSOR_KEYWORD; 369 370 case TOKEN_BEGIN_COMMENT_BLOCK: 371 case TOKEN_END_COMMENT_BLOCK: 372 case TOKEN_INLINE_COMMENT: 373 return SYNTAX_HIGHLIGHT_COMMENT; 374 } 375 376 return SYNTAX_HIGHLIGHT_NONE; 377 } 378