1 /* 2 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2013, Rene Gollent, rene@gollent.com. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8 #include <RegExp.h> 9 10 #include <new> 11 12 #include <regex.h> 13 14 #include <String.h> 15 16 #include <Referenceable.h> 17 18 19 // #pragma mark - RegExp::Data 20 21 22 struct RegExp::Data : public BReferenceable { 23 Data(const char* pattern, PatternType patternType, bool caseSensitive) 24 : 25 BReferenceable() 26 { 27 // convert the shell pattern to a regular expression 28 BString patternString; 29 if (patternType == PATTERN_TYPE_WILDCARD) { 30 while (*pattern != '\0') { 31 char c = *pattern++; 32 switch (c) { 33 case '?': 34 patternString += '.'; 35 continue; 36 case '*': 37 patternString += ".*"; 38 continue; 39 case '[': 40 { 41 // find the matching ']' first 42 const char* end = pattern; 43 while (*end != ']') { 44 if (*end++ == '\0') { 45 fError = REG_EBRACK; 46 return; 47 } 48 } 49 50 if (pattern == end) { 51 // Empty bracket expression. It will never match 52 // anything. Strictly speaking this is not 53 // considered an error, but we handle it like one. 54 fError = REG_EBRACK; 55 return; 56 } 57 58 patternString += '['; 59 60 // We need to avoid "[." ... ".]", "[=" ... "=]", and 61 // "[:" ... ":]" sequences, since those have special 62 // meaning in regular expressions. If we encounter 63 // a '[' followed by either of '.', '=', or ':', we 64 // replace the '[' by "[.[.]". 65 while (pattern < end) { 66 c = *pattern++; 67 if (c == '[' && pattern < end) { 68 switch (*pattern) { 69 case '.': 70 case '=': 71 case ':': 72 patternString += "[.[.]"; 73 continue; 74 } 75 } 76 patternString += c; 77 } 78 79 pattern++; 80 patternString += ']'; 81 break; 82 } 83 84 case '\\': 85 { 86 // Quotes the next character. Works the same way for 87 // regular expressions. 88 if (*pattern == '\0') { 89 fError = REG_EESCAPE; 90 return; 91 } 92 93 patternString += '\\'; 94 patternString += *pattern++; 95 break; 96 } 97 98 case '^': 99 case '.': 100 case '$': 101 case '(': 102 case ')': 103 case '|': 104 case '+': 105 case '{': 106 // need to be quoted 107 patternString += '\\'; 108 // fall through 109 default: 110 patternString += c; 111 break; 112 } 113 } 114 115 pattern = patternString.String(); 116 } 117 118 int flags = REG_EXTENDED; 119 if (!caseSensitive) 120 flags |= REG_ICASE; 121 122 fError = regcomp(&fCompiledExpression, pattern, flags); 123 } 124 125 ~Data() 126 { 127 if (fError == 0) 128 regfree(&fCompiledExpression); 129 } 130 131 bool IsValid() const 132 { 133 return fError == 0; 134 } 135 136 const regex_t* CompiledExpression() const 137 { 138 return &fCompiledExpression; 139 } 140 141 private: 142 int fError; 143 regex_t fCompiledExpression; 144 }; 145 146 147 // #pragma mark - RegExp::MatchResultData 148 149 150 struct RegExp::MatchResultData : public BReferenceable { 151 MatchResultData(const regex_t* compiledExpression, const char* string) 152 : 153 BReferenceable(), 154 fMatchCount(0), 155 fMatches(NULL) 156 { 157 // Do the matching: Since we need to provide a buffer for the matches 158 // for regexec() to fill in, but don't know the number of matches 159 // beforehand, we need to guess and retry with a larger buffer, if it 160 // wasn't large enough. 161 size_t maxMatchCount = 32; 162 for (;;) { 163 fMatches = new regmatch_t[maxMatchCount]; 164 if (regexec(compiledExpression, string, maxMatchCount, fMatches, 0) 165 != 0) { 166 delete[] fMatches; 167 fMatches = NULL; 168 fMatchCount = 0; 169 break; 170 } 171 172 if (fMatches[maxMatchCount - 1].rm_so == -1) { 173 // determine the match count 174 size_t lower = 0; 175 size_t upper = maxMatchCount; 176 while (lower < upper) { 177 size_t mid = (lower + upper) / 2; 178 if (fMatches[mid].rm_so == -1) 179 upper = mid; 180 else 181 lower = mid + 1; 182 } 183 fMatchCount = lower; 184 break; 185 } 186 187 // buffer too small -- try again with larger buffer 188 delete[] fMatches; 189 fMatches = NULL; 190 maxMatchCount *= 2; 191 } 192 } 193 194 ~MatchResultData() 195 { 196 delete[] fMatches; 197 } 198 199 size_t MatchCount() const 200 { 201 return fMatchCount; 202 } 203 204 const regmatch_t* Matches() const 205 { 206 return fMatches; 207 } 208 209 private: 210 size_t fMatchCount; 211 regmatch_t* fMatches; 212 }; 213 214 215 // #pragma mark - RegExp 216 217 218 RegExp::RegExp() 219 : 220 fData(NULL) 221 { 222 } 223 224 225 RegExp::RegExp(const char* pattern, PatternType patternType, 226 bool caseSensitive) 227 : 228 fData(NULL) 229 { 230 SetPattern(pattern, patternType, caseSensitive); 231 } 232 233 234 RegExp::RegExp(const RegExp& other) 235 : 236 fData(other.fData) 237 { 238 if (fData != NULL) 239 fData->AcquireReference(); 240 } 241 242 243 RegExp::~RegExp() 244 { 245 if (fData != NULL) 246 fData->ReleaseReference(); 247 } 248 249 250 bool 251 RegExp::SetPattern(const char* pattern, PatternType patternType, 252 bool caseSensitive) 253 { 254 if (fData != NULL) { 255 fData->ReleaseReference(); 256 fData = NULL; 257 } 258 259 Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive); 260 if (newData == NULL) 261 return false; 262 263 BReference<Data> dataReference(newData, true); 264 if (!newData->IsValid()) 265 return false; 266 267 fData = dataReference.Detach(); 268 return true; 269 } 270 271 272 RegExp::MatchResult 273 RegExp::Match(const char* string) const 274 { 275 if (!IsValid()) 276 return MatchResult(); 277 278 return MatchResult( 279 new(std::nothrow) MatchResultData(fData->CompiledExpression(), 280 string)); 281 } 282 283 284 RegExp& 285 RegExp::operator=(const RegExp& other) 286 { 287 if (fData != NULL) 288 fData->ReleaseReference(); 289 290 fData = other.fData; 291 292 if (fData != NULL) 293 fData->AcquireReference(); 294 295 return *this; 296 } 297 298 299 // #pragma mark - RegExp::MatchResult 300 301 302 RegExp::MatchResult::MatchResult() 303 : 304 fData(NULL) 305 { 306 } 307 308 309 RegExp::MatchResult::MatchResult(MatchResultData* data) 310 : 311 fData(data) 312 { 313 } 314 315 316 RegExp::MatchResult::MatchResult(const MatchResult& other) 317 : 318 fData(other.fData) 319 { 320 if (fData != NULL) 321 fData->AcquireReference(); 322 } 323 324 325 RegExp::MatchResult::~MatchResult() 326 { 327 if (fData != NULL) 328 fData->ReleaseReference(); 329 } 330 331 332 bool 333 RegExp::MatchResult::HasMatched() const 334 { 335 return fData != NULL && fData->MatchCount() > 0; 336 } 337 338 339 size_t 340 RegExp::MatchResult::StartOffset() const 341 { 342 return fData != NULL && fData->MatchCount() > 0 343 ? fData->Matches()[0].rm_so : 0; 344 } 345 346 347 size_t 348 RegExp::MatchResult::EndOffset() const 349 { 350 return fData != NULL && fData->MatchCount() > 0 351 ? fData->Matches()[0].rm_eo : 0; 352 } 353 354 355 size_t 356 RegExp::MatchResult::GroupCount() const 357 { 358 if (fData == NULL) 359 return 0; 360 361 size_t matchCount = fData->MatchCount(); 362 return matchCount > 0 ? matchCount - 1 : 0; 363 } 364 365 366 size_t 367 RegExp::MatchResult::GroupStartOffsetAt(size_t index) const 368 { 369 return fData != NULL && fData->MatchCount() > index + 1 370 ? fData->Matches()[index + 1].rm_so : 0; 371 } 372 373 374 size_t 375 RegExp::MatchResult::GroupEndOffsetAt(size_t index) const 376 { 377 return fData != NULL && fData->MatchCount() > index + 1 378 ? fData->Matches()[index + 1].rm_eo : 0; 379 } 380 381 382 RegExp::MatchResult& 383 RegExp::MatchResult::operator=(const MatchResult& other) 384 { 385 if (fData != NULL) 386 fData->ReleaseReference(); 387 388 fData = other.fData; 389 390 if (fData != NULL) 391 fData->AcquireReference(); 392 393 return *this; 394 } 395