1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 #include "TrackerString.h" 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 40 TrackerString::TrackerString() 41 { 42 } 43 44 45 TrackerString::TrackerString(const char *string) 46 : BString(string) 47 { 48 } 49 50 51 TrackerString::TrackerString(const TrackerString &string) 52 : BString(string) 53 { 54 } 55 56 57 TrackerString::TrackerString(const char *string, int32 maxLength) 58 : BString(string, maxLength) 59 { 60 } 61 62 63 TrackerString::~TrackerString() 64 { 65 } 66 67 68 bool 69 TrackerString::Matches(const char *string, bool caseSensitivity, 70 TrackerStringExpressionType expressionType) const 71 { 72 switch (expressionType) { 73 default: 74 case kNone: 75 return false; 76 77 case kStartsWith: 78 return StartsWith(string, caseSensitivity); 79 80 case kEndsWith: 81 return EndsWith(string, caseSensitivity); 82 83 case kContains: 84 return Contains(string, caseSensitivity); 85 86 case kGlobMatch: 87 return MatchesGlob(string, caseSensitivity); 88 89 case kRegexpMatch: 90 return MatchesRegExp(string, caseSensitivity); 91 } 92 } 93 94 95 bool 96 TrackerString::MatchesRegExp(const char *pattern, bool caseSensitivity) const 97 { 98 BString patternString(pattern); 99 BString textString(String()); 100 101 if (caseSensitivity == false) { 102 patternString.ToLower(); 103 textString.ToLower(); 104 } 105 106 RegExp expression(patternString); 107 108 if (expression.InitCheck() != B_OK) 109 return false; 110 111 return expression.Matches(textString); 112 } 113 114 115 bool 116 TrackerString::MatchesGlob(const char *string, bool caseSensitivity) const 117 { 118 return StringMatchesPattern(String(), string, caseSensitivity); 119 } 120 121 122 bool 123 TrackerString::EndsWith(const char *string, bool caseSensitivity) const 124 { 125 // If "string" is longer than "this", 126 // we should simply return false 127 int32 position = Length() - (int32)strlen(string); 128 if (position < 0) 129 return false; 130 131 if (caseSensitivity) 132 return FindLast(string) == position; 133 else 134 return IFindLast(string) == position; 135 } 136 137 138 bool 139 TrackerString::StartsWith(const char *string, bool caseSensitivity) const 140 { 141 if (caseSensitivity) 142 return FindFirst(string) == 0; 143 else 144 return IFindFirst(string) == 0; 145 } 146 147 148 bool 149 TrackerString::Contains(const char *string, bool caseSensitivity) const 150 { 151 if (caseSensitivity) 152 return FindFirst(string) > -1; 153 else 154 return IFindFirst(string) > -1; 155 } 156 157 158 // About the ?Find* functions: 159 // The leading star here has been compliance with BString, 160 // simplicity and performance. Therefore unncessary copying 161 // has been avoided, as unncessary function calls. 162 // The copying has been avoided by implementing the 163 // ?Find*(const char*) functions rather than 164 // the ?Find*(TrackerString &) functions. 165 // The function calls has been avoided by 166 // inserting a check on the first character 167 // before calling the str*cmp functions. 168 169 170 int32 171 TrackerString::FindFirst(const BString &string) const 172 { 173 return FindFirst(string.String(), 0); 174 } 175 176 177 int32 178 TrackerString::FindFirst(const char *string) const 179 { 180 return FindFirst(string, 0); 181 } 182 183 184 int32 185 TrackerString::FindFirst(const BString &string, int32 fromOffset) const 186 { 187 return FindFirst(string.String(), fromOffset); 188 } 189 190 191 int32 192 TrackerString::FindFirst(const char *string, int32 fromOffset) const 193 { 194 if (!string) 195 return -1; 196 197 int32 length = Length(); 198 uint32 stringLength = strlen(string); 199 200 // The following two checks are required to be compatible 201 // with BString: 202 if (length <= 0) 203 return -1; 204 205 if (stringLength == 0) 206 return fromOffset; 207 208 int32 stop = length - static_cast<int32>(stringLength); 209 int32 start = MAX(0, MIN(fromOffset, stop)); 210 int32 position = -1; 211 212 213 for (int32 i = start; i <= stop; i++) 214 if (string[0] == ByteAt(i)) 215 // This check is to avoid mute str*cmp() calls. Performance. 216 if (strncmp(string, String() + i, stringLength) == 0) { 217 position = i; 218 break; 219 } 220 221 return position; 222 } 223 224 225 int32 226 TrackerString::FindFirst(char ch) const 227 { 228 char string[2] = {ch, '\0'}; 229 return FindFirst(string, 0); 230 } 231 232 233 int32 234 TrackerString::FindFirst(char ch, int32 fromOffset) const 235 { 236 char string[2] = {ch, '\0'}; 237 return FindFirst(string, fromOffset); 238 } 239 240 241 int32 242 TrackerString::FindLast(const BString &string) const 243 { 244 return FindLast(string.String(), Length() - 1); 245 } 246 247 248 int32 249 TrackerString::FindLast(const char *string) const 250 { 251 return FindLast(string, Length() - 1); 252 } 253 254 255 int32 256 TrackerString::FindLast(const BString &string, int32 beforeOffset) const 257 { 258 return FindLast(string.String(), beforeOffset); 259 } 260 261 262 int32 263 TrackerString::FindLast(const char *string, int32 beforeOffset) const 264 { 265 if (!string) 266 return -1; 267 268 int32 length = Length(); 269 uint32 stringLength = strlen(string); 270 271 // The following two checks are required to be compatible 272 // with BString: 273 if (length <= 0) 274 return -1; 275 276 if (stringLength == 0) 277 return beforeOffset; 278 279 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 280 int32 stop = 0; 281 int32 position = -1; 282 283 for (int32 i = start; i >= stop; i--) 284 if (string[0] == ByteAt(i)) 285 // This check is to avoid mute str*cmp() calls. Performance. 286 if (strncmp(string, String() + i, stringLength) == 0) { 287 position = i; 288 break; 289 } 290 291 return position; 292 } 293 294 295 int32 296 TrackerString::FindLast(char ch) const 297 { 298 char string[2] = {ch, '\0'}; 299 return FindLast(string, Length() - 1); 300 } 301 302 303 int32 304 TrackerString::FindLast(char ch, int32 beforeOffset) const 305 { 306 char string[2] = {ch, '\0'}; 307 return FindLast(string, beforeOffset); 308 } 309 310 311 int32 312 TrackerString::IFindFirst(const BString &string) const 313 { 314 return IFindFirst(string.String(), 0); 315 } 316 317 318 int32 319 TrackerString::IFindFirst(const char *string) const 320 { 321 return IFindFirst(string, 0); 322 } 323 324 325 int32 326 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const 327 { 328 return IFindFirst(string.String(), fromOffset); 329 } 330 331 332 int32 333 TrackerString::IFindFirst(const char *string, int32 fromOffset) const 334 { 335 if (!string) 336 return -1; 337 338 int32 length = Length(); 339 uint32 stringLength = strlen(string); 340 341 // The following two checks are required to be compatible 342 // with BString: 343 if (length <= 0) 344 return -1; 345 346 if (stringLength == 0) 347 return fromOffset; 348 349 int32 stop = length - static_cast<int32>(stringLength); 350 int32 start = MAX(0, MIN(fromOffset, stop)); 351 int32 position = -1; 352 353 for (int32 i = start; i <= stop; i++) 354 if (tolower(string[0]) == tolower(ByteAt(i))) 355 // This check is to avoid mute str*cmp() calls. Performance. 356 if (strncasecmp(string, String() + i, stringLength) == 0) { 357 position = i; 358 break; 359 } 360 361 return position; 362 } 363 364 365 int32 366 TrackerString::IFindLast(const BString &string) const 367 { 368 return IFindLast(string.String(), Length() - 1); 369 } 370 371 372 int32 373 TrackerString::IFindLast(const char *string) const 374 { 375 return IFindLast(string, Length() - 1); 376 } 377 378 379 int32 380 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const 381 { 382 return IFindLast(string.String(), beforeOffset); 383 } 384 385 386 int32 387 TrackerString::IFindLast(const char *string, int32 beforeOffset) const 388 { 389 if (!string) 390 return -1; 391 392 int32 length = Length(); 393 uint32 stringLength = strlen(string); 394 395 // The following two checks are required to be compatible 396 // with BString: 397 if (length <= 0) 398 return -1; 399 400 if (stringLength == 0) 401 return beforeOffset; 402 403 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 404 int32 stop = 0; 405 int32 position = -1; 406 407 for (int32 i = start; i >= stop; i--) 408 if (tolower(string[0]) == tolower(ByteAt(i))) 409 // This check is to avoid mute str*cmp() calls. Performance. 410 if (strncasecmp(string, String() + i, stringLength) == 0) { 411 position = i; 412 break; 413 } 414 415 return position; 416 } 417 418 419 // MatchesBracketExpression() assumes 'pattern' to point to the 420 // character following the initial '[' in a bracket expression. 421 // The reason is that an encountered '[' will be taken literally. 422 // (Makes it possible to match a '[' with the expression '[[]'). 423 bool 424 TrackerString::MatchesBracketExpression(const char *string, const char *pattern, 425 bool caseSensitivity) const 426 { 427 bool GlyphMatch = IsStartOfGlyph(string[0]); 428 429 if (IsInsideGlyph(string[0])) 430 return false; 431 432 char testChar = ConditionalToLower(string[0], caseSensitivity); 433 bool match = false; 434 435 bool inverse = *pattern == '^' || *pattern == '!'; 436 // We allow both ^ and ! as a initial inverting character. 437 438 if (inverse) 439 pattern++; 440 441 while (!match && *pattern != ']' && *pattern != '\0') { 442 switch (*pattern) { 443 case '-': 444 { 445 char start = ConditionalToLower(*(pattern - 1), caseSensitivity), 446 stop = ConditionalToLower(*(pattern + 1), caseSensitivity); 447 448 if (IsGlyph(start) || IsGlyph(stop)) 449 return false; 450 // Not a valid range! 451 452 if (islower(start) && islower(stop) 453 || isupper(start) && isupper(stop) 454 || isdigit(start) && isdigit(stop)) 455 // Make sure 'start' and 'stop' are of the same type. 456 match = start <= testChar && testChar <= stop; 457 else 458 return false; 459 // If no valid range, we've got a syntax error. 460 } 461 break; 462 463 default: 464 if (GlyphMatch) 465 match = UTF8CharsAreEqual(string, pattern); 466 else 467 match = CharsAreEqual(testChar, *pattern, caseSensitivity); 468 break; 469 } 470 471 if (!match) { 472 pattern++; 473 if (IsInsideGlyph(pattern[0])) 474 pattern = MoveToEndOfGlyph(pattern); 475 } 476 } 477 // Consider an unmatched bracket a failure 478 // (i.e. when detecting a '\0' instead of a ']'.) 479 if (*pattern == '\0') 480 return false; 481 482 return (match ^ inverse) != 0; 483 } 484 485 486 bool 487 TrackerString::StringMatchesPattern(const char *string, const char *pattern, 488 bool caseSensitivity) const 489 { 490 // One could do this dynamically, counting the number of *'s, 491 // but then you have to free them at every exit of this 492 // function, which is awkward and ugly. 493 const int32 kWildCardMaximum = 100; 494 const char *pStorage[kWildCardMaximum]; 495 const char *sStorage[kWildCardMaximum]; 496 497 int32 patternLevel = 0; 498 499 if (string == NULL || pattern == NULL) 500 return false; 501 502 while (*pattern != '\0') { 503 504 switch (*pattern) { 505 506 case '?': 507 pattern++; 508 string++; 509 if (IsInsideGlyph(string[0])) 510 string = MoveToEndOfGlyph(string); 511 break; 512 513 case '*': 514 { 515 // Collapse any ** and *? constructions: 516 while (*pattern == '*' || *pattern == '?') { 517 pattern++; 518 if (*pattern == '?' && string != '\0') { 519 string++; 520 if (IsInsideGlyph(string[0])) 521 string = MoveToEndOfGlyph(string); 522 } 523 } 524 525 if (*pattern == '\0') 526 // An ending * matches all strings. 527 return true; 528 529 bool match = false; 530 const char *pBefore = pattern - 1; 531 532 if (*pattern == '[') { 533 pattern++; 534 535 while (!match && *string != '\0') 536 match = MatchesBracketExpression(string++, pattern, caseSensitivity); 537 538 // Skip the rest of the bracket: 539 while (*pattern != ']' && *pattern != '\0') 540 pattern++; 541 542 // Failure if no closing bracket; 543 if (*pattern == '\0') 544 return false; 545 546 } 547 else { 548 // No bracket, just one character: 549 while (!match && *string != '\0') { 550 if (IsGlyph(string[0])) 551 match = UTF8CharsAreEqual(string++, pattern); 552 else 553 match = CharsAreEqual(*string++, *pattern, caseSensitivity); 554 } 555 } 556 if (!match) 557 return false; 558 else { 559 pStorage[patternLevel] = pBefore; 560 if (IsInsideGlyph(string[0])) 561 string = MoveToEndOfGlyph(string); 562 sStorage[patternLevel++] = string; 563 if (patternLevel > kWildCardMaximum) 564 return false; 565 pattern++; 566 if (IsInsideGlyph(pattern[0])) 567 pattern = MoveToEndOfGlyph(pattern); 568 } 569 } 570 break; 571 572 case '[': 573 pattern++; 574 575 if (!MatchesBracketExpression(string, pattern, caseSensitivity)) 576 if (patternLevel > 0) { 577 pattern = pStorage[--patternLevel]; 578 string = sStorage[patternLevel]; 579 } else 580 return false; 581 else { 582 583 // Skip the rest of the bracket: 584 while (*pattern != ']' && *pattern != '\0') 585 pattern++; 586 587 // Failure if no closing bracket; 588 if (*pattern == '\0') 589 return false; 590 591 string++; 592 if (IsInsideGlyph(string[0])) 593 string = MoveToEndOfGlyph(string); 594 pattern++; 595 } 596 break; 597 598 default: 599 { 600 bool equal = false; 601 if (IsGlyph(string[0])) 602 equal = UTF8CharsAreEqual(string, pattern); 603 else 604 equal = CharsAreEqual(*string, *pattern, caseSensitivity); 605 606 if (equal) { 607 pattern++; 608 if (IsInsideGlyph(pattern[0])) 609 pattern = MoveToEndOfGlyph(pattern); 610 string++; 611 if (IsInsideGlyph(string[0])) 612 string = MoveToEndOfGlyph(string); 613 } else if (patternLevel > 0) { 614 pattern = pStorage[--patternLevel]; 615 string = sStorage[patternLevel]; 616 } else 617 return false; 618 } 619 break; 620 } 621 622 if (*pattern == '\0' && *string != '\0' && patternLevel > 0) { 623 pattern = pStorage[--patternLevel]; 624 string = sStorage[patternLevel]; 625 } 626 } 627 628 return *string == '\0' && *pattern == '\0'; 629 } 630 631 632 bool 633 TrackerString::UTF8CharsAreEqual(const char *string1, const char *string2) const 634 { 635 const char *s1 = string1; 636 const char *s2 = string2; 637 638 if (IsStartOfGlyph(*s1) && *s1 == *s2) { 639 s1++; 640 s2++; 641 642 while (IsInsideGlyph(*s1) && *s1 == *s2) { 643 s1++; 644 s2++; 645 } 646 647 return !IsInsideGlyph(*s1) && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1); 648 649 } else 650 return false; 651 } 652 653 654 const char * 655 TrackerString::MoveToEndOfGlyph(const char *string) const 656 { 657 const char *ptr = string; 658 659 while (IsInsideGlyph(*ptr)) 660 ptr++; 661 662 return ptr; 663 } 664 665 666 bool 667 TrackerString::IsGlyph(char ch) const 668 { 669 return (ch & 0x80) == 0x80; 670 } 671 672 673 bool 674 TrackerString::IsInsideGlyph(char ch) const 675 { 676 return (ch & 0xC0) == 0x80; 677 } 678 679 680 bool 681 TrackerString::IsStartOfGlyph(char ch) const 682 { 683 return (ch & 0xC0) == 0xC0; 684 } 685