1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 #include "TrackerString.h" 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 40 41 // #pragma mark - TrackerString 42 43 44 TrackerString::TrackerString() 45 { 46 } 47 48 49 TrackerString::TrackerString(const char* string) 50 : 51 BString(string) 52 { 53 } 54 55 56 TrackerString::TrackerString(const TrackerString &string) 57 : 58 BString(string) 59 { 60 } 61 62 63 TrackerString::TrackerString(const char* string, int32 maxLength) 64 : 65 BString(string, maxLength) 66 { 67 } 68 69 70 TrackerString::~TrackerString() 71 { 72 } 73 74 75 bool 76 TrackerString::Matches(const char* string, bool caseSensitivity, 77 TrackerStringExpressionType expressionType) const 78 { 79 switch (expressionType) { 80 default: 81 case kNone: 82 return false; 83 84 case kStartsWith: 85 return StartsWith(string, caseSensitivity); 86 87 case kEndsWith: 88 return EndsWith(string, caseSensitivity); 89 90 case kContains: 91 return Contains(string, caseSensitivity); 92 93 case kGlobMatch: 94 return MatchesGlob(string, caseSensitivity); 95 96 case kRegexpMatch: 97 return MatchesRegExp(string, caseSensitivity); 98 } 99 } 100 101 102 bool 103 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const 104 { 105 BString patternString(pattern); 106 BString textString(String()); 107 108 if (caseSensitivity == false) { 109 patternString.ToLower(); 110 textString.ToLower(); 111 } 112 113 RegExp expression(patternString); 114 115 if (expression.InitCheck() != B_OK) 116 return false; 117 118 return expression.Matches(textString); 119 } 120 121 122 bool 123 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const 124 { 125 return StringMatchesPattern(String(), string, caseSensitivity); 126 } 127 128 129 bool 130 TrackerString::EndsWith(const char* string, bool caseSensitivity) const 131 { 132 // If "string" is longer than "this", 133 // we should simply return false 134 int32 position = Length() - (int32)strlen(string); 135 if (position < 0) 136 return false; 137 138 if (caseSensitivity) 139 return FindLast(string) == position; 140 else 141 return IFindLast(string) == position; 142 } 143 144 145 bool 146 TrackerString::StartsWith(const char* string, bool caseSensitivity) const 147 { 148 if (caseSensitivity) 149 return FindFirst(string) == 0; 150 else 151 return IFindFirst(string) == 0; 152 } 153 154 155 bool 156 TrackerString::Contains(const char* string, bool caseSensitivity) const 157 { 158 if (caseSensitivity) 159 return FindFirst(string) > -1; 160 else 161 return IFindFirst(string) > -1; 162 } 163 164 165 // About the ?Find* functions: 166 // The leading star here has been compliance with BString, 167 // simplicity and performance. Therefore unncessary copying 168 // has been avoided, as unncessary function calls. 169 // The copying has been avoided by implementing the 170 // ?Find*(const char*) functions rather than 171 // the ?Find*(TrackerString &) functions. 172 // The function calls has been avoided by 173 // inserting a check on the first character 174 // before calling the str*cmp functions. 175 176 177 int32 178 TrackerString::FindFirst(const BString &string) const 179 { 180 return FindFirst(string.String(), 0); 181 } 182 183 184 int32 185 TrackerString::FindFirst(const char* string) const 186 { 187 return FindFirst(string, 0); 188 } 189 190 191 int32 192 TrackerString::FindFirst(const BString &string, int32 fromOffset) const 193 { 194 return FindFirst(string.String(), fromOffset); 195 } 196 197 198 int32 199 TrackerString::FindFirst(const char* string, int32 fromOffset) const 200 { 201 if (string == NULL) 202 return -1; 203 204 int32 length = Length(); 205 uint32 stringLength = strlen(string); 206 207 // The following two checks are required to be compatible 208 // with BString: 209 if (length <= 0) 210 return -1; 211 212 if (stringLength == 0) 213 return fromOffset; 214 215 int32 stop = length - static_cast<int32>(stringLength); 216 int32 start = MAX(0, MIN(fromOffset, stop)); 217 int32 position = -1; 218 219 for (int32 i = start; i <= stop; i++) { 220 if (string[0] == ByteAt(i)) { 221 // This check is to avoid mute str*cmp() calls. Performance. 222 if (strncmp(string, String() + i, stringLength) == 0) { 223 position = i; 224 break; 225 } 226 } 227 } 228 229 return position; 230 } 231 232 233 int32 234 TrackerString::FindFirst(char ch) const 235 { 236 char string[2] = {ch, '\0'}; 237 return FindFirst(string, 0); 238 } 239 240 241 int32 242 TrackerString::FindFirst(char ch, int32 fromOffset) const 243 { 244 char string[2] = {ch, '\0'}; 245 return FindFirst(string, fromOffset); 246 } 247 248 249 int32 250 TrackerString::FindLast(const BString &string) const 251 { 252 return FindLast(string.String(), Length() - 1); 253 } 254 255 256 int32 257 TrackerString::FindLast(const char* string) const 258 { 259 return FindLast(string, Length() - 1); 260 } 261 262 263 int32 264 TrackerString::FindLast(const BString &string, int32 beforeOffset) const 265 { 266 return FindLast(string.String(), beforeOffset); 267 } 268 269 270 int32 271 TrackerString::FindLast(const char* string, int32 beforeOffset) const 272 { 273 if (string == NULL) 274 return -1; 275 276 int32 length = Length(); 277 uint32 stringLength = strlen(string); 278 279 // The following two checks are required to be compatible 280 // with BString: 281 if (length <= 0) 282 return -1; 283 284 if (stringLength == 0) 285 return beforeOffset; 286 287 int32 start = MIN(beforeOffset, 288 length - static_cast<int32>(stringLength)); 289 int32 stop = 0; 290 int32 position = -1; 291 292 for (int32 i = start; i >= stop; i--) { 293 if (string[0] == ByteAt(i)) { 294 // This check is to avoid mute str*cmp() calls. Performance. 295 if (strncmp(string, String() + i, stringLength) == 0) { 296 position = i; 297 break; 298 } 299 } 300 } 301 302 return position; 303 } 304 305 306 int32 307 TrackerString::FindLast(char ch) const 308 { 309 char string[2] = {ch, '\0'}; 310 return FindLast(string, Length() - 1); 311 } 312 313 314 int32 315 TrackerString::FindLast(char ch, int32 beforeOffset) const 316 { 317 char string[2] = {ch, '\0'}; 318 return FindLast(string, beforeOffset); 319 } 320 321 322 int32 323 TrackerString::IFindFirst(const BString &string) const 324 { 325 return IFindFirst(string.String(), 0); 326 } 327 328 329 int32 330 TrackerString::IFindFirst(const char* string) const 331 { 332 return IFindFirst(string, 0); 333 } 334 335 336 int32 337 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const 338 { 339 return IFindFirst(string.String(), fromOffset); 340 } 341 342 343 int32 344 TrackerString::IFindFirst(const char* string, int32 fromOffset) const 345 { 346 if (string == NULL) 347 return -1; 348 349 int32 length = Length(); 350 uint32 stringLength = strlen(string); 351 352 // The following two checks are required to be compatible 353 // with BString: 354 if (length <= 0) 355 return -1; 356 357 if (stringLength == 0) 358 return fromOffset; 359 360 int32 stop = length - static_cast<int32>(stringLength); 361 int32 start = MAX(0, MIN(fromOffset, stop)); 362 int32 position = -1; 363 364 for (int32 i = start; i <= stop; i++) { 365 if (tolower(string[0]) == tolower(ByteAt(i))) { 366 // This check is to avoid mute str*cmp() calls. Performance. 367 if (strncasecmp(string, String() + i, stringLength) == 0) { 368 position = i; 369 break; 370 } 371 } 372 } 373 374 return position; 375 } 376 377 378 int32 379 TrackerString::IFindLast(const BString &string) const 380 { 381 return IFindLast(string.String(), Length() - 1); 382 } 383 384 385 int32 386 TrackerString::IFindLast(const char* string) const 387 { 388 return IFindLast(string, Length() - 1); 389 } 390 391 392 int32 393 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const 394 { 395 return IFindLast(string.String(), beforeOffset); 396 } 397 398 399 int32 400 TrackerString::IFindLast(const char* string, int32 beforeOffset) const 401 { 402 if (string == NULL) 403 return -1; 404 405 int32 length = Length(); 406 uint32 stringLength = strlen(string); 407 408 // The following two checks are required to be compatible 409 // with BString: 410 if (length <= 0) 411 return -1; 412 413 if (stringLength == 0) 414 return beforeOffset; 415 416 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 417 int32 stop = 0; 418 int32 position = -1; 419 420 for (int32 i = start; i >= stop; i--) { 421 if (tolower(string[0]) == tolower(ByteAt(i))) { 422 // This check is to avoid mute str*cmp() calls. Performance. 423 if (strncasecmp(string, String() + i, stringLength) == 0) { 424 position = i; 425 break; 426 } 427 } 428 } 429 430 return position; 431 } 432 433 434 // MatchesBracketExpression() assumes 'pattern' to point to the 435 // character following the initial '[' in a bracket expression. 436 // The reason is that an encountered '[' will be taken literally. 437 // (Makes it possible to match a '[' with the expression '[[]'). 438 bool 439 TrackerString::MatchesBracketExpression(const char* string, 440 const char* pattern, bool caseSensitivity) const 441 { 442 bool GlyphMatch = IsStartOfGlyph(string[0]); 443 444 if (IsInsideGlyph(string[0])) 445 return false; 446 447 char testChar = ConditionalToLower(string[0], caseSensitivity); 448 bool match = false; 449 450 bool inverse = *pattern == '^' || *pattern == '!'; 451 // We allow both ^ and ! as a initial inverting character. 452 453 if (inverse) 454 pattern++; 455 456 while (!match && *pattern != ']' && *pattern != '\0') { 457 switch (*pattern) { 458 case '-': 459 { 460 char start = ConditionalToLower(*(pattern - 1), 461 caseSensitivity); 462 char stop = ConditionalToLower(*(pattern + 1), 463 caseSensitivity); 464 465 if (IsGlyph(start) || IsGlyph(stop)) 466 return false; 467 // Not a valid range! 468 469 if ((islower(start) && islower(stop)) 470 || (isupper(start) && isupper(stop)) 471 || (isdigit(start) && isdigit(stop))) { 472 // Make sure 'start' and 'stop' are of the same type. 473 match = start <= testChar && testChar <= stop; 474 } else { 475 // If no valid range, we've got a syntax error. 476 return false; 477 } 478 479 break; 480 } 481 482 default: 483 if (GlyphMatch) 484 match = UTF8CharsAreEqual(string, pattern); 485 else 486 match = CharsAreEqual(testChar, *pattern, caseSensitivity); 487 break; 488 } 489 490 if (!match) { 491 pattern++; 492 if (IsInsideGlyph(pattern[0])) 493 pattern = MoveToEndOfGlyph(pattern); 494 } 495 } 496 497 // Consider an unmatched bracket a failure 498 // (i.e. when detecting a '\0' instead of a ']'.) 499 if (*pattern == '\0') 500 return false; 501 502 return (match ^ inverse) != 0; 503 } 504 505 506 bool 507 TrackerString::StringMatchesPattern(const char* string, const char* pattern, 508 bool caseSensitivity) const 509 { 510 // One could do this dynamically, counting the number of *'s, 511 // but then you have to free them at every exit of this 512 // function, which is awkward and ugly. 513 const int32 kWildCardMaximum = 100; 514 const char* pStorage[kWildCardMaximum]; 515 const char* sStorage[kWildCardMaximum]; 516 517 int32 patternLevel = 0; 518 519 if (string == NULL || pattern == NULL) 520 return false; 521 522 while (*pattern != '\0') { 523 switch (*pattern) { 524 case '?': 525 pattern++; 526 string++; 527 if (IsInsideGlyph(string[0])) 528 string = MoveToEndOfGlyph(string); 529 break; 530 531 case '*': 532 { 533 // Collapse any ** and *? constructions: 534 while (*pattern == '*' || *pattern == '?') { 535 pattern++; 536 if (*pattern == '?' && string != '\0') { 537 string++; 538 if (IsInsideGlyph(string[0])) 539 string = MoveToEndOfGlyph(string); 540 } 541 } 542 543 if (*pattern == '\0') { 544 // An ending * matches all strings. 545 return true; 546 } 547 548 bool match = false; 549 const char* pBefore = pattern - 1; 550 551 if (*pattern == '[') { 552 pattern++; 553 554 while (!match && *string != '\0') { 555 match = MatchesBracketExpression(string++, pattern, 556 caseSensitivity); 557 } 558 559 while (*pattern != ']' && *pattern != '\0') { 560 // Skip the rest of the bracket: 561 pattern++; 562 } 563 564 if (*pattern == '\0') { 565 // Failure if no closing bracket; 566 return false; 567 } 568 } else { 569 // No bracket, just one character: 570 while (!match && *string != '\0') { 571 if (IsGlyph(string[0])) 572 match = UTF8CharsAreEqual(string++, pattern); 573 else { 574 match = CharsAreEqual(*string++, *pattern, 575 caseSensitivity); 576 } 577 } 578 } 579 580 if (!match) 581 return false; 582 else { 583 pStorage[patternLevel] = pBefore; 584 if (IsInsideGlyph(string[0])) 585 string = MoveToEndOfGlyph(string); 586 587 sStorage[patternLevel++] = string; 588 if (patternLevel > kWildCardMaximum) 589 return false; 590 591 pattern++; 592 if (IsInsideGlyph(pattern[0])) 593 pattern = MoveToEndOfGlyph(pattern); 594 } 595 break; 596 } 597 598 case '[': 599 pattern++; 600 601 if (!MatchesBracketExpression(string, pattern, 602 caseSensitivity)) { 603 if (patternLevel > 0) { 604 pattern = pStorage[--patternLevel]; 605 string = sStorage[patternLevel]; 606 } else 607 return false; 608 } else { 609 // Skip the rest of the bracket: 610 while (*pattern != ']' && *pattern != '\0') 611 pattern++; 612 613 // Failure if no closing bracket; 614 if (*pattern == '\0') 615 return false; 616 617 string++; 618 if (IsInsideGlyph(string[0])) 619 string = MoveToEndOfGlyph(string); 620 pattern++; 621 } 622 break; 623 624 default: 625 { 626 bool equal = false; 627 if (IsGlyph(string[0])) 628 equal = UTF8CharsAreEqual(string, pattern); 629 else 630 equal = CharsAreEqual(*string, *pattern, caseSensitivity); 631 632 if (equal) { 633 pattern++; 634 if (IsInsideGlyph(pattern[0])) 635 pattern = MoveToEndOfGlyph(pattern); 636 string++; 637 if (IsInsideGlyph(string[0])) 638 string = MoveToEndOfGlyph(string); 639 } else if (patternLevel > 0) { 640 pattern = pStorage[--patternLevel]; 641 string = sStorage[patternLevel]; 642 } else 643 return false; 644 break; 645 } 646 } 647 648 if (*pattern == '\0' && *string != '\0' && patternLevel > 0) { 649 pattern = pStorage[--patternLevel]; 650 string = sStorage[patternLevel]; 651 } 652 } 653 654 return *string == '\0' && *pattern == '\0'; 655 } 656 657 658 bool 659 TrackerString::UTF8CharsAreEqual(const char* string1, 660 const char* string2) const 661 { 662 const char* s1 = string1; 663 const char* s2 = string2; 664 665 if (IsStartOfGlyph(*s1) && *s1 == *s2) { 666 s1++; 667 s2++; 668 669 while (IsInsideGlyph(*s1) && *s1 == *s2) { 670 s1++; 671 s2++; 672 } 673 674 return !IsInsideGlyph(*s1) 675 && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1); 676 } else 677 return false; 678 } 679 680 681 const char* 682 TrackerString::MoveToEndOfGlyph(const char* string) const 683 { 684 const char* ptr = string; 685 686 while (IsInsideGlyph(*ptr)) 687 ptr++; 688 689 return ptr; 690 } 691 692 693 bool 694 TrackerString::IsGlyph(char ch) const 695 { 696 return (ch & 0x80) == 0x80; 697 } 698 699 700 bool 701 TrackerString::IsInsideGlyph(char ch) const 702 { 703 return (ch & 0xC0) == 0x80; 704 } 705 706 707 bool 708 TrackerString::IsStartOfGlyph(char ch) const 709 { 710 return (ch & 0xC0) == 0xC0; 711 } 712