1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 #include "TrackerString.h" 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <strings.h> 40 41 42 // #pragma mark - TrackerString 43 44 45 TrackerString::TrackerString() 46 { 47 } 48 49 50 TrackerString::TrackerString(const char* string) 51 : 52 BString(string) 53 { 54 } 55 56 57 TrackerString::TrackerString(const TrackerString &string) 58 : 59 BString(string) 60 { 61 } 62 63 64 TrackerString::TrackerString(const char* string, int32 maxLength) 65 : 66 BString(string, maxLength) 67 { 68 } 69 70 71 TrackerString::~TrackerString() 72 { 73 } 74 75 76 bool 77 TrackerString::Matches(const char* string, bool caseSensitivity, 78 TrackerStringExpressionType expressionType) const 79 { 80 switch (expressionType) { 81 default: 82 case kNone: 83 return false; 84 85 case kStartsWith: 86 return StartsWith(string, caseSensitivity); 87 88 case kEndsWith: 89 return EndsWith(string, caseSensitivity); 90 91 case kContains: 92 return Contains(string, caseSensitivity); 93 94 case kGlobMatch: 95 return MatchesGlob(string, caseSensitivity); 96 97 case kRegexpMatch: 98 return MatchesRegExp(string, caseSensitivity); 99 } 100 } 101 102 103 bool 104 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const 105 { 106 BString patternString(pattern); 107 BString textString(String()); 108 109 if (caseSensitivity == false) { 110 patternString.ToLower(); 111 textString.ToLower(); 112 } 113 114 RegExp expression(patternString); 115 116 if (expression.InitCheck() != B_OK) 117 return false; 118 119 return expression.Matches(textString); 120 } 121 122 123 bool 124 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const 125 { 126 return StringMatchesPattern(String(), string, caseSensitivity); 127 } 128 129 130 bool 131 TrackerString::EndsWith(const char* string, bool caseSensitivity) const 132 { 133 // If "string" is longer than "this", 134 // we should simply return false 135 int32 position = Length() - (int32)strlen(string); 136 if (position < 0) 137 return false; 138 139 if (caseSensitivity) 140 return FindLast(string) == position; 141 else 142 return IFindLast(string) == position; 143 } 144 145 146 bool 147 TrackerString::StartsWith(const char* string, bool caseSensitivity) const 148 { 149 if (caseSensitivity) 150 return FindFirst(string) == 0; 151 else 152 return IFindFirst(string) == 0; 153 } 154 155 156 bool 157 TrackerString::Contains(const char* string, bool caseSensitivity) const 158 { 159 if (caseSensitivity) 160 return FindFirst(string) > -1; 161 else 162 return IFindFirst(string) > -1; 163 } 164 165 166 // About the ?Find* functions: 167 // The leading star here has been compliance with BString, 168 // simplicity and performance. Therefore unncessary copying 169 // has been avoided, as unncessary function calls. 170 // The copying has been avoided by implementing the 171 // ?Find*(const char*) functions rather than 172 // the ?Find*(TrackerString &) functions. 173 // The function calls has been avoided by 174 // inserting a check on the first character 175 // before calling the str*cmp functions. 176 177 178 int32 179 TrackerString::FindFirst(const BString &string) const 180 { 181 return FindFirst(string.String(), 0); 182 } 183 184 185 int32 186 TrackerString::FindFirst(const char* string) const 187 { 188 return FindFirst(string, 0); 189 } 190 191 192 int32 193 TrackerString::FindFirst(const BString &string, int32 fromOffset) const 194 { 195 return FindFirst(string.String(), fromOffset); 196 } 197 198 199 int32 200 TrackerString::FindFirst(const char* string, int32 fromOffset) const 201 { 202 if (string == NULL) 203 return -1; 204 205 int32 length = Length(); 206 uint32 stringLength = strlen(string); 207 208 // The following two checks are required to be compatible 209 // with BString: 210 if (length <= 0) 211 return -1; 212 213 if (stringLength == 0) 214 return fromOffset; 215 216 int32 stop = length - static_cast<int32>(stringLength); 217 int32 start = MAX(0, MIN(fromOffset, stop)); 218 int32 position = -1; 219 220 for (int32 i = start; i <= stop; i++) { 221 if (string[0] == ByteAt(i)) { 222 // This check is to avoid mute str*cmp() calls. Performance. 223 if (strncmp(string, String() + i, stringLength) == 0) { 224 position = i; 225 break; 226 } 227 } 228 } 229 230 return position; 231 } 232 233 234 int32 235 TrackerString::FindFirst(char ch) const 236 { 237 char string[2] = {ch, '\0'}; 238 return FindFirst(string, 0); 239 } 240 241 242 int32 243 TrackerString::FindFirst(char ch, int32 fromOffset) const 244 { 245 char string[2] = {ch, '\0'}; 246 return FindFirst(string, fromOffset); 247 } 248 249 250 int32 251 TrackerString::FindLast(const BString &string) const 252 { 253 return FindLast(string.String(), Length() - 1); 254 } 255 256 257 int32 258 TrackerString::FindLast(const char* string) const 259 { 260 return FindLast(string, Length() - 1); 261 } 262 263 264 int32 265 TrackerString::FindLast(const BString &string, int32 beforeOffset) const 266 { 267 return FindLast(string.String(), beforeOffset); 268 } 269 270 271 int32 272 TrackerString::FindLast(const char* string, int32 beforeOffset) const 273 { 274 if (string == NULL) 275 return -1; 276 277 int32 length = Length(); 278 uint32 stringLength = strlen(string); 279 280 // The following two checks are required to be compatible 281 // with BString: 282 if (length <= 0) 283 return -1; 284 285 if (stringLength == 0) 286 return beforeOffset; 287 288 int32 start = MIN(beforeOffset, 289 length - static_cast<int32>(stringLength)); 290 int32 stop = 0; 291 int32 position = -1; 292 293 for (int32 i = start; i >= stop; i--) { 294 if (string[0] == ByteAt(i)) { 295 // This check is to avoid mute str*cmp() calls. Performance. 296 if (strncmp(string, String() + i, stringLength) == 0) { 297 position = i; 298 break; 299 } 300 } 301 } 302 303 return position; 304 } 305 306 307 int32 308 TrackerString::FindLast(char ch) const 309 { 310 char string[2] = {ch, '\0'}; 311 return FindLast(string, Length() - 1); 312 } 313 314 315 int32 316 TrackerString::FindLast(char ch, int32 beforeOffset) const 317 { 318 char string[2] = {ch, '\0'}; 319 return FindLast(string, beforeOffset); 320 } 321 322 323 int32 324 TrackerString::IFindFirst(const BString &string) const 325 { 326 return IFindFirst(string.String(), 0); 327 } 328 329 330 int32 331 TrackerString::IFindFirst(const char* string) const 332 { 333 return IFindFirst(string, 0); 334 } 335 336 337 int32 338 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const 339 { 340 return IFindFirst(string.String(), fromOffset); 341 } 342 343 344 int32 345 TrackerString::IFindFirst(const char* string, int32 fromOffset) const 346 { 347 if (string == NULL) 348 return -1; 349 350 int32 length = Length(); 351 uint32 stringLength = strlen(string); 352 353 // The following two checks are required to be compatible 354 // with BString: 355 if (length <= 0) 356 return -1; 357 358 if (stringLength == 0) 359 return fromOffset; 360 361 int32 stop = length - static_cast<int32>(stringLength); 362 int32 start = MAX(0, MIN(fromOffset, stop)); 363 int32 position = -1; 364 365 for (int32 i = start; i <= stop; i++) { 366 if (tolower(string[0]) == tolower(ByteAt(i))) { 367 // This check is to avoid mute str*cmp() calls. Performance. 368 if (strncasecmp(string, String() + i, stringLength) == 0) { 369 position = i; 370 break; 371 } 372 } 373 } 374 375 return position; 376 } 377 378 379 int32 380 TrackerString::IFindLast(const BString &string) const 381 { 382 return IFindLast(string.String(), Length() - 1); 383 } 384 385 386 int32 387 TrackerString::IFindLast(const char* string) const 388 { 389 return IFindLast(string, Length() - 1); 390 } 391 392 393 int32 394 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const 395 { 396 return IFindLast(string.String(), beforeOffset); 397 } 398 399 400 int32 401 TrackerString::IFindLast(const char* string, int32 beforeOffset) const 402 { 403 if (string == NULL) 404 return -1; 405 406 int32 length = Length(); 407 uint32 stringLength = strlen(string); 408 409 // The following two checks are required to be compatible 410 // with BString: 411 if (length <= 0) 412 return -1; 413 414 if (stringLength == 0) 415 return beforeOffset; 416 417 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 418 int32 stop = 0; 419 int32 position = -1; 420 421 for (int32 i = start; i >= stop; i--) { 422 if (tolower(string[0]) == tolower(ByteAt(i))) { 423 // This check is to avoid mute str*cmp() calls. Performance. 424 if (strncasecmp(string, String() + i, stringLength) == 0) { 425 position = i; 426 break; 427 } 428 } 429 } 430 431 return position; 432 } 433 434 435 // MatchesBracketExpression() assumes 'pattern' to point to the 436 // character following the initial '[' in a bracket expression. 437 // The reason is that an encountered '[' will be taken literally. 438 // (Makes it possible to match a '[' with the expression '[[]'). 439 bool 440 TrackerString::MatchesBracketExpression(const char* string, 441 const char* pattern, bool caseSensitivity) const 442 { 443 bool GlyphMatch = IsStartOfGlyph(string[0]); 444 445 if (IsInsideGlyph(string[0])) 446 return false; 447 448 char testChar = ConditionalToLower(string[0], caseSensitivity); 449 bool match = false; 450 451 bool inverse = *pattern == '^' || *pattern == '!'; 452 // We allow both ^ and ! as a initial inverting character. 453 454 if (inverse) 455 pattern++; 456 457 while (!match && *pattern != ']' && *pattern != '\0') { 458 switch (*pattern) { 459 case '-': 460 { 461 char start = ConditionalToLower(*(pattern - 1), 462 caseSensitivity); 463 char stop = ConditionalToLower(*(pattern + 1), 464 caseSensitivity); 465 466 if (IsGlyph(start) || IsGlyph(stop)) 467 return false; 468 // Not a valid range! 469 470 if ((islower(start) && islower(stop)) 471 || (isupper(start) && isupper(stop)) 472 || (isdigit(start) && isdigit(stop))) { 473 // Make sure 'start' and 'stop' are of the same type. 474 match = start <= testChar && testChar <= stop; 475 } else { 476 // If no valid range, we've got a syntax error. 477 return false; 478 } 479 480 break; 481 } 482 483 default: 484 if (GlyphMatch) 485 match = UTF8CharsAreEqual(string, pattern); 486 else 487 match = CharsAreEqual(testChar, *pattern, caseSensitivity); 488 break; 489 } 490 491 if (!match) { 492 pattern++; 493 if (IsInsideGlyph(pattern[0])) 494 pattern = MoveToEndOfGlyph(pattern); 495 } 496 } 497 498 // Consider an unmatched bracket a failure 499 // (i.e. when detecting a '\0' instead of a ']'.) 500 if (*pattern == '\0') 501 return false; 502 503 return (match ^ inverse) != 0; 504 } 505 506 507 bool 508 TrackerString::StringMatchesPattern(const char* string, const char* pattern, 509 bool caseSensitivity) const 510 { 511 // One could do this dynamically, counting the number of *'s, 512 // but then you have to free them at every exit of this 513 // function, which is awkward and ugly. 514 const int32 kWildCardMaximum = 100; 515 const char* pStorage[kWildCardMaximum]; 516 const char* sStorage[kWildCardMaximum]; 517 518 int32 patternLevel = 0; 519 520 if (string == NULL || pattern == NULL) 521 return false; 522 523 while (*pattern != '\0') { 524 switch (*pattern) { 525 case '?': 526 pattern++; 527 string++; 528 if (IsInsideGlyph(string[0])) 529 string = MoveToEndOfGlyph(string); 530 break; 531 532 case '*': 533 { 534 // Collapse any ** and *? constructions: 535 while (*pattern == '*' || *pattern == '?') { 536 pattern++; 537 if (*pattern == '?' && string != '\0') { 538 string++; 539 if (IsInsideGlyph(string[0])) 540 string = MoveToEndOfGlyph(string); 541 } 542 } 543 544 if (*pattern == '\0') { 545 // An ending * matches all strings. 546 return true; 547 } 548 549 bool match = false; 550 const char* pBefore = pattern - 1; 551 552 if (*pattern == '[') { 553 pattern++; 554 555 while (!match && *string != '\0') { 556 match = MatchesBracketExpression(string++, pattern, 557 caseSensitivity); 558 } 559 560 while (*pattern != ']' && *pattern != '\0') { 561 // Skip the rest of the bracket: 562 pattern++; 563 } 564 565 if (*pattern == '\0') { 566 // Failure if no closing bracket; 567 return false; 568 } 569 } else { 570 // No bracket, just one character: 571 while (!match && *string != '\0') { 572 if (IsGlyph(string[0])) 573 match = UTF8CharsAreEqual(string++, pattern); 574 else { 575 match = CharsAreEqual(*string++, *pattern, 576 caseSensitivity); 577 } 578 } 579 } 580 581 if (!match) 582 return false; 583 else { 584 pStorage[patternLevel] = pBefore; 585 if (IsInsideGlyph(string[0])) 586 string = MoveToEndOfGlyph(string); 587 588 sStorage[patternLevel++] = string; 589 if (patternLevel > kWildCardMaximum) 590 return false; 591 592 pattern++; 593 if (IsInsideGlyph(pattern[0])) 594 pattern = MoveToEndOfGlyph(pattern); 595 } 596 break; 597 } 598 599 case '[': 600 pattern++; 601 602 if (!MatchesBracketExpression(string, pattern, 603 caseSensitivity)) { 604 if (patternLevel > 0) { 605 pattern = pStorage[--patternLevel]; 606 string = sStorage[patternLevel]; 607 } else 608 return false; 609 } else { 610 // Skip the rest of the bracket: 611 while (*pattern != ']' && *pattern != '\0') 612 pattern++; 613 614 // Failure if no closing bracket; 615 if (*pattern == '\0') 616 return false; 617 618 string++; 619 if (IsInsideGlyph(string[0])) 620 string = MoveToEndOfGlyph(string); 621 pattern++; 622 } 623 break; 624 625 default: 626 { 627 bool equal = false; 628 if (IsGlyph(string[0])) 629 equal = UTF8CharsAreEqual(string, pattern); 630 else 631 equal = CharsAreEqual(*string, *pattern, caseSensitivity); 632 633 if (equal) { 634 pattern++; 635 if (IsInsideGlyph(pattern[0])) 636 pattern = MoveToEndOfGlyph(pattern); 637 string++; 638 if (IsInsideGlyph(string[0])) 639 string = MoveToEndOfGlyph(string); 640 } else if (patternLevel > 0) { 641 pattern = pStorage[--patternLevel]; 642 string = sStorage[patternLevel]; 643 } else 644 return false; 645 break; 646 } 647 } 648 649 if (*pattern == '\0' && *string != '\0' && patternLevel > 0) { 650 pattern = pStorage[--patternLevel]; 651 string = sStorage[patternLevel]; 652 } 653 } 654 655 return *string == '\0' && *pattern == '\0'; 656 } 657 658 659 bool 660 TrackerString::UTF8CharsAreEqual(const char* string1, 661 const char* string2) const 662 { 663 const char* s1 = string1; 664 const char* s2 = string2; 665 666 if (IsStartOfGlyph(*s1) && *s1 == *s2) { 667 s1++; 668 s2++; 669 670 while (IsInsideGlyph(*s1) && *s1 == *s2) { 671 s1++; 672 s2++; 673 } 674 675 return !IsInsideGlyph(*s1) 676 && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1); 677 } else 678 return false; 679 } 680 681 682 const char* 683 TrackerString::MoveToEndOfGlyph(const char* string) const 684 { 685 const char* ptr = string; 686 687 while (IsInsideGlyph(*ptr)) 688 ptr++; 689 690 return ptr; 691 } 692 693 694 bool 695 TrackerString::IsGlyph(char ch) const 696 { 697 return (ch & 0x80) == 0x80; 698 } 699 700 701 bool 702 TrackerString::IsInsideGlyph(char ch) const 703 { 704 return (ch & 0xC0) == 0x80; 705 } 706 707 708 bool 709 TrackerString::IsStartOfGlyph(char ch) const 710 { 711 return (ch & 0xC0) == 0xC0; 712 } 713