1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 #include "TrackerString.h" 36 37 #include <stdio.h> 38 #include <stdlib.h> 39 40 TrackerString::TrackerString() 41 { 42 } 43 44 45 TrackerString::TrackerString(const char* string) 46 : BString(string) 47 { 48 } 49 50 51 TrackerString::TrackerString(const TrackerString &string) 52 : BString(string) 53 { 54 } 55 56 57 TrackerString::TrackerString(const char* string, int32 maxLength) 58 : BString(string, maxLength) 59 { 60 } 61 62 63 TrackerString::~TrackerString() 64 { 65 } 66 67 68 bool 69 TrackerString::Matches(const char* string, bool caseSensitivity, 70 TrackerStringExpressionType expressionType) const 71 { 72 switch (expressionType) { 73 default: 74 case kNone: 75 return false; 76 77 case kStartsWith: 78 return StartsWith(string, caseSensitivity); 79 80 case kEndsWith: 81 return EndsWith(string, caseSensitivity); 82 83 case kContains: 84 return Contains(string, caseSensitivity); 85 86 case kGlobMatch: 87 return MatchesGlob(string, caseSensitivity); 88 89 case kRegexpMatch: 90 return MatchesRegExp(string, caseSensitivity); 91 } 92 } 93 94 95 bool 96 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const 97 { 98 BString patternString(pattern); 99 BString textString(String()); 100 101 if (caseSensitivity == false) { 102 patternString.ToLower(); 103 textString.ToLower(); 104 } 105 106 RegExp expression(patternString); 107 108 if (expression.InitCheck() != B_OK) 109 return false; 110 111 return expression.Matches(textString); 112 } 113 114 115 bool 116 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const 117 { 118 return StringMatchesPattern(String(), string, caseSensitivity); 119 } 120 121 122 bool 123 TrackerString::EndsWith(const char* string, bool caseSensitivity) const 124 { 125 // If "string" is longer than "this", 126 // we should simply return false 127 int32 position = Length() - (int32)strlen(string); 128 if (position < 0) 129 return false; 130 131 if (caseSensitivity) 132 return FindLast(string) == position; 133 else 134 return IFindLast(string) == position; 135 } 136 137 138 bool 139 TrackerString::StartsWith(const char* string, bool caseSensitivity) const 140 { 141 if (caseSensitivity) 142 return FindFirst(string) == 0; 143 else 144 return IFindFirst(string) == 0; 145 } 146 147 148 bool 149 TrackerString::Contains(const char* string, bool caseSensitivity) const 150 { 151 if (caseSensitivity) 152 return FindFirst(string) > -1; 153 else 154 return IFindFirst(string) > -1; 155 } 156 157 158 // About the ?Find* functions: 159 // The leading star here has been compliance with BString, 160 // simplicity and performance. Therefore unncessary copying 161 // has been avoided, as unncessary function calls. 162 // The copying has been avoided by implementing the 163 // ?Find*(const char*) functions rather than 164 // the ?Find*(TrackerString &) functions. 165 // The function calls has been avoided by 166 // inserting a check on the first character 167 // before calling the str*cmp functions. 168 169 170 int32 171 TrackerString::FindFirst(const BString &string) const 172 { 173 return FindFirst(string.String(), 0); 174 } 175 176 177 int32 178 TrackerString::FindFirst(const char* string) const 179 { 180 return FindFirst(string, 0); 181 } 182 183 184 int32 185 TrackerString::FindFirst(const BString &string, int32 fromOffset) const 186 { 187 return FindFirst(string.String(), fromOffset); 188 } 189 190 191 int32 192 TrackerString::FindFirst(const char* string, int32 fromOffset) const 193 { 194 if (!string) 195 return -1; 196 197 int32 length = Length(); 198 uint32 stringLength = strlen(string); 199 200 // The following two checks are required to be compatible 201 // with BString: 202 if (length <= 0) 203 return -1; 204 205 if (stringLength == 0) 206 return fromOffset; 207 208 int32 stop = length - static_cast<int32>(stringLength); 209 int32 start = MAX(0, MIN(fromOffset, stop)); 210 int32 position = -1; 211 212 for (int32 i = start; i <= stop; i++) { 213 if (string[0] == ByteAt(i)) { 214 // This check is to avoid mute str*cmp() calls. Performance. 215 if (strncmp(string, String() + i, stringLength) == 0) { 216 position = i; 217 break; 218 } 219 } 220 } 221 222 return position; 223 } 224 225 226 int32 227 TrackerString::FindFirst(char ch) const 228 { 229 char string[2] = {ch, '\0'}; 230 return FindFirst(string, 0); 231 } 232 233 234 int32 235 TrackerString::FindFirst(char ch, int32 fromOffset) const 236 { 237 char string[2] = {ch, '\0'}; 238 return FindFirst(string, fromOffset); 239 } 240 241 242 int32 243 TrackerString::FindLast(const BString &string) const 244 { 245 return FindLast(string.String(), Length() - 1); 246 } 247 248 249 int32 250 TrackerString::FindLast(const char* string) const 251 { 252 return FindLast(string, Length() - 1); 253 } 254 255 256 int32 257 TrackerString::FindLast(const BString &string, int32 beforeOffset) const 258 { 259 return FindLast(string.String(), beforeOffset); 260 } 261 262 263 int32 264 TrackerString::FindLast(const char* string, int32 beforeOffset) const 265 { 266 if (!string) 267 return -1; 268 269 int32 length = Length(); 270 uint32 stringLength = strlen(string); 271 272 // The following two checks are required to be compatible 273 // with BString: 274 if (length <= 0) 275 return -1; 276 277 if (stringLength == 0) 278 return beforeOffset; 279 280 int32 start = MIN(beforeOffset, 281 length - static_cast<int32>(stringLength)); 282 int32 stop = 0; 283 int32 position = -1; 284 285 for (int32 i = start; i >= stop; i--) { 286 if (string[0] == ByteAt(i)) { 287 // This check is to avoid mute str*cmp() calls. Performance. 288 if (strncmp(string, String() + i, stringLength) == 0) { 289 position = i; 290 break; 291 } 292 } 293 } 294 295 return position; 296 } 297 298 299 int32 300 TrackerString::FindLast(char ch) const 301 { 302 char string[2] = {ch, '\0'}; 303 return FindLast(string, Length() - 1); 304 } 305 306 307 int32 308 TrackerString::FindLast(char ch, int32 beforeOffset) const 309 { 310 char string[2] = {ch, '\0'}; 311 return FindLast(string, beforeOffset); 312 } 313 314 315 int32 316 TrackerString::IFindFirst(const BString &string) const 317 { 318 return IFindFirst(string.String(), 0); 319 } 320 321 322 int32 323 TrackerString::IFindFirst(const char* string) const 324 { 325 return IFindFirst(string, 0); 326 } 327 328 329 int32 330 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const 331 { 332 return IFindFirst(string.String(), fromOffset); 333 } 334 335 336 int32 337 TrackerString::IFindFirst(const char* string, int32 fromOffset) const 338 { 339 if (!string) 340 return -1; 341 342 int32 length = Length(); 343 uint32 stringLength = strlen(string); 344 345 // The following two checks are required to be compatible 346 // with BString: 347 if (length <= 0) 348 return -1; 349 350 if (stringLength == 0) 351 return fromOffset; 352 353 int32 stop = length - static_cast<int32>(stringLength); 354 int32 start = MAX(0, MIN(fromOffset, stop)); 355 int32 position = -1; 356 357 for (int32 i = start; i <= stop; i++) { 358 if (tolower(string[0]) == tolower(ByteAt(i))) { 359 // This check is to avoid mute str*cmp() calls. Performance. 360 if (strncasecmp(string, String() + i, stringLength) == 0) { 361 position = i; 362 break; 363 } 364 } 365 } 366 367 return position; 368 } 369 370 371 int32 372 TrackerString::IFindLast(const BString &string) const 373 { 374 return IFindLast(string.String(), Length() - 1); 375 } 376 377 378 int32 379 TrackerString::IFindLast(const char* string) const 380 { 381 return IFindLast(string, Length() - 1); 382 } 383 384 385 int32 386 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const 387 { 388 return IFindLast(string.String(), beforeOffset); 389 } 390 391 392 int32 393 TrackerString::IFindLast(const char* string, int32 beforeOffset) const 394 { 395 if (!string) 396 return -1; 397 398 int32 length = Length(); 399 uint32 stringLength = strlen(string); 400 401 // The following two checks are required to be compatible 402 // with BString: 403 if (length <= 0) 404 return -1; 405 406 if (stringLength == 0) 407 return beforeOffset; 408 409 int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength)); 410 int32 stop = 0; 411 int32 position = -1; 412 413 for (int32 i = start; i >= stop; i--) { 414 if (tolower(string[0]) == tolower(ByteAt(i))) { 415 // This check is to avoid mute str*cmp() calls. Performance. 416 if (strncasecmp(string, String() + i, stringLength) == 0) { 417 position = i; 418 break; 419 } 420 } 421 } 422 423 return position; 424 } 425 426 427 // MatchesBracketExpression() assumes 'pattern' to point to the 428 // character following the initial '[' in a bracket expression. 429 // The reason is that an encountered '[' will be taken literally. 430 // (Makes it possible to match a '[' with the expression '[[]'). 431 bool 432 TrackerString::MatchesBracketExpression(const char* string, 433 const char* pattern, bool caseSensitivity) const 434 { 435 bool GlyphMatch = IsStartOfGlyph(string[0]); 436 437 if (IsInsideGlyph(string[0])) 438 return false; 439 440 char testChar = ConditionalToLower(string[0], caseSensitivity); 441 bool match = false; 442 443 bool inverse = *pattern == '^' || *pattern == '!'; 444 // We allow both ^ and ! as a initial inverting character. 445 446 if (inverse) 447 pattern++; 448 449 while (!match && *pattern != ']' && *pattern != '\0') { 450 switch (*pattern) { 451 case '-': 452 { 453 char start = ConditionalToLower(*(pattern - 1), 454 caseSensitivity); 455 char stop = ConditionalToLower(*(pattern + 1), 456 caseSensitivity); 457 458 if (IsGlyph(start) || IsGlyph(stop)) 459 return false; 460 // Not a valid range! 461 462 if ((islower(start) && islower(stop)) 463 || (isupper(start) && isupper(stop)) 464 || (isdigit(start) && isdigit(stop))) { 465 // Make sure 'start' and 'stop' are of the same type. 466 match = start <= testChar && testChar <= stop; 467 } else { 468 // If no valid range, we've got a syntax error. 469 return false; 470 } 471 472 break; 473 } 474 475 default: 476 if (GlyphMatch) 477 match = UTF8CharsAreEqual(string, pattern); 478 else 479 match = CharsAreEqual(testChar, *pattern, caseSensitivity); 480 break; 481 } 482 483 if (!match) { 484 pattern++; 485 if (IsInsideGlyph(pattern[0])) 486 pattern = MoveToEndOfGlyph(pattern); 487 } 488 } 489 490 // Consider an unmatched bracket a failure 491 // (i.e. when detecting a '\0' instead of a ']'.) 492 if (*pattern == '\0') 493 return false; 494 495 return (match ^ inverse) != 0; 496 } 497 498 499 bool 500 TrackerString::StringMatchesPattern(const char* string, const char* pattern, 501 bool caseSensitivity) const 502 { 503 // One could do this dynamically, counting the number of *'s, 504 // but then you have to free them at every exit of this 505 // function, which is awkward and ugly. 506 const int32 kWildCardMaximum = 100; 507 const char* pStorage[kWildCardMaximum]; 508 const char* sStorage[kWildCardMaximum]; 509 510 int32 patternLevel = 0; 511 512 if (string == NULL || pattern == NULL) 513 return false; 514 515 while (*pattern != '\0') { 516 switch (*pattern) { 517 case '?': 518 pattern++; 519 string++; 520 if (IsInsideGlyph(string[0])) 521 string = MoveToEndOfGlyph(string); 522 523 break; 524 525 case '*': 526 { 527 // Collapse any ** and *? constructions: 528 while (*pattern == '*' || *pattern == '?') { 529 pattern++; 530 if (*pattern == '?' && string != '\0') { 531 string++; 532 if (IsInsideGlyph(string[0])) 533 string = MoveToEndOfGlyph(string); 534 } 535 } 536 537 if (*pattern == '\0') { 538 // An ending * matches all strings. 539 return true; 540 } 541 542 bool match = false; 543 const char* pBefore = pattern - 1; 544 545 if (*pattern == '[') { 546 pattern++; 547 548 while (!match && *string != '\0') { 549 match = MatchesBracketExpression(string++, pattern, 550 caseSensitivity); 551 } 552 553 while (*pattern != ']' && *pattern != '\0') { 554 // Skip the rest of the bracket: 555 pattern++; 556 } 557 558 if (*pattern == '\0') { 559 // Failure if no closing bracket; 560 return false; 561 } 562 } else { 563 // No bracket, just one character: 564 while (!match && *string != '\0') { 565 if (IsGlyph(string[0])) 566 match = UTF8CharsAreEqual(string++, pattern); 567 else { 568 match = CharsAreEqual(*string++, *pattern, 569 caseSensitivity); 570 } 571 } 572 } 573 574 if (!match) 575 return false; 576 else { 577 pStorage[patternLevel] = pBefore; 578 if (IsInsideGlyph(string[0])) 579 string = MoveToEndOfGlyph(string); 580 581 sStorage[patternLevel++] = string; 582 if (patternLevel > kWildCardMaximum) 583 return false; 584 585 pattern++; 586 if (IsInsideGlyph(pattern[0])) 587 pattern = MoveToEndOfGlyph(pattern); 588 } 589 break; 590 } 591 592 case '[': 593 pattern++; 594 595 if (!MatchesBracketExpression(string, pattern, 596 caseSensitivity)) { 597 if (patternLevel > 0) { 598 pattern = pStorage[--patternLevel]; 599 string = sStorage[patternLevel]; 600 } else 601 return false; 602 } else { 603 // Skip the rest of the bracket: 604 while (*pattern != ']' && *pattern != '\0') 605 pattern++; 606 607 // Failure if no closing bracket; 608 if (*pattern == '\0') 609 return false; 610 611 string++; 612 if (IsInsideGlyph(string[0])) 613 string = MoveToEndOfGlyph(string); 614 pattern++; 615 } 616 break; 617 618 default: 619 { 620 bool equal = false; 621 if (IsGlyph(string[0])) 622 equal = UTF8CharsAreEqual(string, pattern); 623 else 624 equal = CharsAreEqual(*string, *pattern, caseSensitivity); 625 626 if (equal) { 627 pattern++; 628 if (IsInsideGlyph(pattern[0])) 629 pattern = MoveToEndOfGlyph(pattern); 630 string++; 631 if (IsInsideGlyph(string[0])) 632 string = MoveToEndOfGlyph(string); 633 } else if (patternLevel > 0) { 634 pattern = pStorage[--patternLevel]; 635 string = sStorage[patternLevel]; 636 } else 637 return false; 638 639 break; 640 } 641 } 642 643 if (*pattern == '\0' && *string != '\0' && patternLevel > 0) { 644 pattern = pStorage[--patternLevel]; 645 string = sStorage[patternLevel]; 646 } 647 } 648 649 return *string == '\0' && *pattern == '\0'; 650 } 651 652 653 bool 654 TrackerString::UTF8CharsAreEqual(const char* string1, 655 const char* string2) const 656 { 657 const char* s1 = string1; 658 const char* s2 = string2; 659 660 if (IsStartOfGlyph(*s1) && *s1 == *s2) { 661 s1++; 662 s2++; 663 664 while (IsInsideGlyph(*s1) && *s1 == *s2) { 665 s1++; 666 s2++; 667 } 668 669 return !IsInsideGlyph(*s1) 670 && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1); 671 } else 672 return false; 673 } 674 675 676 const char* 677 TrackerString::MoveToEndOfGlyph(const char* string) const 678 { 679 const char* ptr = string; 680 681 while (IsInsideGlyph(*ptr)) 682 ptr++; 683 684 return ptr; 685 } 686 687 688 bool 689 TrackerString::IsGlyph(char ch) const 690 { 691 return (ch & 0x80) == 0x80; 692 } 693 694 695 bool 696 TrackerString::IsInsideGlyph(char ch) const 697 { 698 return (ch & 0xC0) == 0x80; 699 } 700 701 702 bool 703 TrackerString::IsStartOfGlyph(char ch) const 704 { 705 return (ch & 0xC0) == 0xC0; 706 } 707