1 /* 2 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2013, Rene Gollent, rene@gollent.com. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8 #include <RegExp.h> 9 10 #include <new> 11 12 #include <regex.h> 13 14 #include <String.h> 15 16 #include <Referenceable.h> 17 18 19 // #pragma mark - RegExp::Data 20 21 22 struct RegExp::Data : public BReferenceable { 23 Data(const char* pattern, PatternType patternType, bool caseSensitive) 24 : 25 BReferenceable() 26 { 27 // convert the shell pattern to a regular expression 28 BString patternString; 29 if (patternType == PATTERN_TYPE_WILDCARD) { 30 while (*pattern != '\0') { 31 char c = *pattern++; 32 switch (c) { 33 case '?': 34 patternString += '.'; 35 continue; 36 case '*': 37 patternString += ".*"; 38 continue; 39 case '[': 40 { 41 // find the matching ']' first 42 const char* end = pattern; 43 while (*end != ']') { 44 if (*end++ == '\0') { 45 fError = REG_EBRACK; 46 return; 47 } 48 } 49 50 if (pattern == end) { 51 // Empty bracket expression. It will never match 52 // anything. Strictly speaking this is not 53 // considered an error, but we handle it like one. 54 fError = REG_EBRACK; 55 return; 56 } 57 58 patternString += '['; 59 60 // We need to avoid "[." ... ".]", "[=" ... "=]", and 61 // "[:" ... ":]" sequences, since those have special 62 // meaning in regular expressions. If we encounter 63 // a '[' followed by either of '.', '=', or ':', we 64 // replace the '[' by "[.[.]". 65 while (pattern < end) { 66 c = *pattern++; 67 if (c == '[' && pattern < end) { 68 switch (*pattern) { 69 case '.': 70 case '=': 71 case ':': 72 patternString += "[.[.]"; 73 continue; 74 } 75 } 76 patternString += c; 77 } 78 79 pattern++; 80 patternString += ']'; 81 break; 82 } 83 84 case '\\': 85 { 86 // Quotes the next character. Works the same way for 87 // regular expressions. 88 if (*pattern == '\0') { 89 fError = REG_EESCAPE; 90 return; 91 } 92 93 patternString += '\\'; 94 patternString += *pattern++; 95 break; 96 } 97 98 case '^': 99 case '.': 100 case '$': 101 case '(': 102 case ')': 103 case '|': 104 case '+': 105 case '{': 106 // need to be quoted 107 patternString += '\\'; 108 // fall through 109 default: 110 patternString += c; 111 break; 112 } 113 } 114 115 pattern = patternString.String(); 116 } 117 118 int flags = REG_EXTENDED; 119 if (!caseSensitive) 120 flags |= REG_ICASE; 121 122 fError = regcomp(&fCompiledExpression, pattern, flags); 123 } 124 125 ~Data() 126 { 127 if (fError == 0) 128 regfree(&fCompiledExpression); 129 } 130 131 bool IsValid() const 132 { 133 return fError == 0; 134 } 135 136 const regex_t* CompiledExpression() const 137 { 138 return &fCompiledExpression; 139 } 140 141 private: 142 int fError; 143 regex_t fCompiledExpression; 144 }; 145 146 147 // #pragma mark - RegExp::MatchResultData 148 149 150 struct RegExp::MatchResultData : public BReferenceable { 151 MatchResultData(const regex_t* compiledExpression, const char* string) 152 : 153 BReferenceable(), 154 fMatchCount(0), 155 fMatches(NULL) 156 { 157 // fMatchCount is always set to the number of matching groups in the 158 // expression (or 0 if an error occured). Some of the "matches" in 159 // the array may still point to the (-1,-1) range if they don't 160 // actually match anything. 161 fMatchCount = compiledExpression->re_nsub + 1; 162 fMatches = new regmatch_t[fMatchCount]; 163 if (regexec(compiledExpression, string, fMatchCount, fMatches, 0) 164 != 0) { 165 delete[] fMatches; 166 fMatches = NULL; 167 fMatchCount = 0; 168 } 169 } 170 171 ~MatchResultData() 172 { 173 delete[] fMatches; 174 } 175 176 size_t MatchCount() const 177 { 178 return fMatchCount; 179 } 180 181 const regmatch_t* Matches() const 182 { 183 return fMatches; 184 } 185 186 private: 187 size_t fMatchCount; 188 regmatch_t* fMatches; 189 }; 190 191 192 // #pragma mark - RegExp 193 194 195 RegExp::RegExp() 196 : 197 fData(NULL) 198 { 199 } 200 201 202 RegExp::RegExp(const char* pattern, PatternType patternType, 203 bool caseSensitive) 204 : 205 fData(NULL) 206 { 207 SetPattern(pattern, patternType, caseSensitive); 208 } 209 210 211 RegExp::RegExp(const RegExp& other) 212 : 213 fData(other.fData) 214 { 215 if (fData != NULL) 216 fData->AcquireReference(); 217 } 218 219 220 RegExp::~RegExp() 221 { 222 if (fData != NULL) 223 fData->ReleaseReference(); 224 } 225 226 227 bool 228 RegExp::SetPattern(const char* pattern, PatternType patternType, 229 bool caseSensitive) 230 { 231 if (fData != NULL) { 232 fData->ReleaseReference(); 233 fData = NULL; 234 } 235 236 Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive); 237 if (newData == NULL) 238 return false; 239 240 BReference<Data> dataReference(newData, true); 241 if (!newData->IsValid()) 242 return false; 243 244 fData = dataReference.Detach(); 245 return true; 246 } 247 248 249 RegExp::MatchResult 250 RegExp::Match(const char* string) const 251 { 252 if (!IsValid()) 253 return MatchResult(); 254 255 return MatchResult( 256 new(std::nothrow) MatchResultData(fData->CompiledExpression(), 257 string)); 258 } 259 260 261 RegExp& 262 RegExp::operator=(const RegExp& other) 263 { 264 if (fData != NULL) 265 fData->ReleaseReference(); 266 267 fData = other.fData; 268 269 if (fData != NULL) 270 fData->AcquireReference(); 271 272 return *this; 273 } 274 275 276 // #pragma mark - RegExp::MatchResult 277 278 279 RegExp::MatchResult::MatchResult() 280 : 281 fData(NULL) 282 { 283 } 284 285 286 RegExp::MatchResult::MatchResult(MatchResultData* data) 287 : 288 fData(data) 289 { 290 } 291 292 293 RegExp::MatchResult::MatchResult(const MatchResult& other) 294 : 295 fData(other.fData) 296 { 297 if (fData != NULL) 298 fData->AcquireReference(); 299 } 300 301 302 RegExp::MatchResult::~MatchResult() 303 { 304 if (fData != NULL) 305 fData->ReleaseReference(); 306 } 307 308 309 bool 310 RegExp::MatchResult::HasMatched() const 311 { 312 return fData != NULL && fData->MatchCount() > 0; 313 } 314 315 316 size_t 317 RegExp::MatchResult::StartOffset() const 318 { 319 return fData != NULL && fData->MatchCount() > 0 320 ? fData->Matches()[0].rm_so : 0; 321 } 322 323 324 size_t 325 RegExp::MatchResult::EndOffset() const 326 { 327 return fData != NULL && fData->MatchCount() > 0 328 ? fData->Matches()[0].rm_eo : 0; 329 } 330 331 332 size_t 333 RegExp::MatchResult::GroupCount() const 334 { 335 if (fData == NULL) 336 return 0; 337 338 size_t matchCount = fData->MatchCount(); 339 return matchCount > 0 ? matchCount - 1 : 0; 340 } 341 342 343 size_t 344 RegExp::MatchResult::GroupStartOffsetAt(size_t index) const 345 { 346 return fData != NULL && fData->MatchCount() > index + 1 347 ? fData->Matches()[index + 1].rm_so : 0; 348 } 349 350 351 size_t 352 RegExp::MatchResult::GroupEndOffsetAt(size_t index) const 353 { 354 return fData != NULL && fData->MatchCount() > index + 1 355 ? fData->Matches()[index + 1].rm_eo : 0; 356 } 357 358 359 RegExp::MatchResult& 360 RegExp::MatchResult::operator=(const MatchResult& other) 361 { 362 if (fData != NULL) 363 fData->ReleaseReference(); 364 365 fData = other.fData; 366 367 if (fData != NULL) 368 fData->AcquireReference(); 369 370 return *this; 371 } 372