1 /* 2 * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 /*! Query parsing and evaluation 7 8 The pattern matching is roughly based on code originally written 9 by J. Kercheval, and on code written by Kenneth Almquist, though 10 it shares no code. 11 */ 12 13 #include "Query.h" 14 #include "bfs.h" 15 #include "Debug.h" 16 #include "Volume.h" 17 #include "Inode.h" 18 #include "BPlusTree.h" 19 #include "Index.h" 20 21 22 // The parser has a very static design, but it will do what is required. 23 // 24 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 25 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 26 // Apparently, the "!" (not) can only be used with brackets. 27 // 28 // If you think that there are too few NULL pointer checks in some places 29 // of the code, just read the beginning of the query constructor. 30 // The API is not fully available, just the Query and the Expression class 31 // are. 32 33 34 enum ops { 35 OP_NONE, 36 37 OP_AND, 38 OP_OR, 39 40 OP_EQUATION, 41 // is only used for invalid equations 42 43 OP_EQUAL, 44 OP_UNEQUAL, 45 OP_GREATER_THAN, 46 OP_LESS_THAN, 47 OP_GREATER_THAN_OR_EQUAL, 48 OP_LESS_THAN_OR_EQUAL, 49 }; 50 51 enum match { 52 NO_MATCH = 0, 53 MATCH_OK = 1, 54 55 MATCH_BAD_PATTERN = -2, 56 MATCH_INVALID_CHARACTER 57 }; 58 59 // return values from isValidPattern() 60 enum { 61 PATTERN_INVALID_ESCAPE = -3, 62 PATTERN_INVALID_RANGE, 63 PATTERN_INVALID_SET 64 }; 65 66 union value { 67 int64 Int64; 68 uint64 Uint64; 69 int32 Int32; 70 uint32 Uint32; 71 float Float; 72 double Double; 73 char String[INODE_FILE_NAME_LENGTH]; 74 }; 75 76 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 77 // don't need the whole file here; the type can't change anyway 78 #ifndef _MIME_H 79 # define B_MIME_STRING_TYPE 'MIMS' 80 #endif 81 82 /*! Abstract base class for the operator/equation classes. 83 */ 84 class Term { 85 public: 86 Term(int8 op) : fOp(op), fParent(NULL) {} 87 virtual ~Term() {} 88 89 int8 Op() const { return fOp; } 90 91 void SetParent(Term* parent) { fParent = parent; } 92 Term* Parent() const { return fParent; } 93 94 virtual status_t Match(Inode* inode, const char* attribute = NULL, 95 int32 type = 0, const uint8* key = NULL, 96 size_t size = 0) = 0; 97 virtual void Complement() = 0; 98 99 virtual void CalculateScore(Index& index) = 0; 100 virtual int32 Score() const = 0; 101 102 virtual status_t InitCheck() = 0; 103 104 #ifdef DEBUG 105 virtual void PrintToStream() = 0; 106 #endif 107 108 protected: 109 int8 fOp; 110 Term* fParent; 111 }; 112 113 114 /*! An Equation object represents an "attribute-equation operator-value" pair. 115 116 Although an Equation object is quite independent from the volume on which 117 the query is run, there are some dependencies that are produced while 118 querying: 119 The type/size of the value, the score, and if it has an index or not. 120 So you could run more than one query on the same volume, but it might return 121 wrong values when it runs concurrently on another volume. 122 That's not an issue right now, because we run single-threaded and don't use 123 queries more than once. 124 */ 125 class Equation : public Term { 126 public: 127 Equation(char** expression); 128 virtual ~Equation(); 129 130 virtual status_t InitCheck(); 131 132 status_t ParseQuotedString(char** _start, char** _end); 133 char* CopyString(char* start, char* end); 134 135 virtual status_t Match(Inode* inode, const char* attribute = NULL, 136 int32 type = 0, const uint8* key = NULL, 137 size_t size = 0); 138 virtual void Complement(); 139 140 status_t PrepareQuery(Volume* volume, Index& index, 141 TreeIterator** iterator, bool queryNonIndexed); 142 status_t GetNextMatching(Volume* volume, TreeIterator* iterator, 143 struct dirent* dirent, size_t bufferSize); 144 145 virtual void CalculateScore(Index &index); 146 virtual int32 Score() const { return fScore; } 147 148 #ifdef DEBUG 149 virtual void PrintToStream(); 150 #endif 151 152 private: 153 Equation(const Equation& other); 154 Equation& operator=(const Equation& other); 155 // no implementation 156 157 status_t ConvertValue(type_code type); 158 bool CompareTo(const uint8* value, uint16 size); 159 uint8* Value() const { return (uint8*)&fValue; } 160 status_t MatchEmptyString(); 161 162 char* fAttribute; 163 char* fString; 164 union value fValue; 165 type_code fType; 166 size_t fSize; 167 bool fIsPattern; 168 bool fIsSpecialTime; 169 170 int32 fScore; 171 bool fHasIndex; 172 }; 173 174 175 /*! The Operator class does not represent a generic operator, but only those 176 that combine two equations, namely "or", and "and". 177 */ 178 class Operator : public Term { 179 public: 180 Operator(Term* left, int8 op, Term* right); 181 virtual ~Operator(); 182 183 Term* Left() const { return fLeft; } 184 Term* Right() const { return fRight; } 185 186 virtual status_t Match(Inode* inode, const char* attribute = NULL, 187 int32 type = 0, const uint8* key = NULL, 188 size_t size = 0); 189 virtual void Complement(); 190 191 virtual void CalculateScore(Index& index); 192 virtual int32 Score() const; 193 194 virtual status_t InitCheck(); 195 196 #ifdef DEBUG 197 virtual void PrintToStream(); 198 #endif 199 200 private: 201 Operator(const Operator& other); 202 Operator& operator=(const Operator& other); 203 // no implementation 204 205 Term* fLeft; 206 Term* fRight; 207 }; 208 209 210 // #pragma mark - 211 212 213 void 214 skipWhitespace(char** expr, int32 skip = 0) 215 { 216 char* string = (*expr) + skip; 217 while (*string == ' ' || *string == '\t') string++; 218 *expr = string; 219 } 220 221 222 void 223 skipWhitespaceReverse(char** expr, char* stop) 224 { 225 char* string = *expr; 226 while (string > stop && (*string == ' ' || *string == '\t')) 227 string--; 228 *expr = string; 229 } 230 231 232 // #pragma mark - 233 234 235 uint32 236 utf8ToUnicode(char** string) 237 { 238 uint8* bytes = (uint8*)*string; 239 int32 length; 240 uint8 mask = 0x1f; 241 242 switch (bytes[0] & 0xf0) { 243 case 0xc0: 244 case 0xd0: 245 length = 2; 246 break; 247 case 0xe0: 248 length = 3; 249 break; 250 case 0xf0: 251 mask = 0x0f; 252 length = 4; 253 break; 254 default: 255 // valid 1-byte character 256 // and invalid characters 257 (*string)++; 258 return bytes[0]; 259 } 260 uint32 c = bytes[0] & mask; 261 int32 i = 1; 262 for (; i < length && (bytes[i] & 0x80) > 0; i++) 263 c = (c << 6) | (bytes[i] & 0x3f); 264 265 if (i < length) { 266 // invalid character 267 (*string)++; 268 return (uint32)bytes[0]; 269 } 270 *string += length; 271 return c; 272 } 273 274 275 int32 276 getFirstPatternSymbol(char* string) 277 { 278 char c; 279 280 for (int32 index = 0; (c = *string++); index++) { 281 if (c == '*' || c == '?' || c == '[') 282 return index; 283 } 284 return -1; 285 } 286 287 288 bool 289 isPattern(char* string) 290 { 291 return getFirstPatternSymbol(string) >= 0 ? true : false; 292 } 293 294 295 status_t 296 isValidPattern(char* pattern) 297 { 298 while (*pattern) { 299 switch (*pattern++) { 300 case '\\': 301 // the escape character must not be at the end of the pattern 302 if (!*pattern++) 303 return PATTERN_INVALID_ESCAPE; 304 break; 305 306 case '[': 307 if (pattern[0] == ']' || !pattern[0]) 308 return PATTERN_INVALID_SET; 309 310 while (*pattern != ']') { 311 if (*pattern == '\\' && !*++pattern) 312 return PATTERN_INVALID_ESCAPE; 313 314 if (!*pattern) 315 return PATTERN_INVALID_SET; 316 317 if (pattern[0] == '-' && pattern[1] == '-') 318 return PATTERN_INVALID_RANGE; 319 320 pattern++; 321 } 322 break; 323 } 324 } 325 return B_OK; 326 } 327 328 329 /*! Matches the string against the given wildcard pattern. 330 Returns either MATCH_OK, or NO_MATCH when everything went fine, or 331 values < 0 (see enum at the top of Query.cpp) if an error occurs. 332 */ 333 status_t 334 matchString(char* pattern, char* string) 335 { 336 while (*pattern) { 337 // end of string == valid end of pattern? 338 if (!string[0]) { 339 while (pattern[0] == '*') 340 pattern++; 341 return !pattern[0] ? MATCH_OK : NO_MATCH; 342 } 343 344 switch (*pattern++) { 345 case '?': 346 { 347 // match exactly one UTF-8 character; we are 348 // not interested in the result 349 utf8ToUnicode(&string); 350 break; 351 } 352 353 case '*': 354 { 355 // compact pattern 356 while (true) { 357 if (pattern[0] == '?') { 358 if (!*++string) 359 return NO_MATCH; 360 } else if (pattern[0] != '*') 361 break; 362 363 pattern++; 364 } 365 366 // if the pattern is done, we have matched the string 367 if (!pattern[0]) 368 return MATCH_OK; 369 370 while(true) { 371 // we have removed all occurences of '*' and '?' 372 if (pattern[0] == string[0] 373 || pattern[0] == '[' 374 || pattern[0] == '\\') { 375 status_t status = matchString(pattern, string); 376 if (status < B_OK || status == MATCH_OK) 377 return status; 378 } 379 380 // we could be nice here and just jump to the next 381 // UTF-8 character - but we wouldn't gain that much 382 // and it'd be slower (since we're checking for 383 // equality before entering the recursion) 384 if (!*++string) 385 return NO_MATCH; 386 } 387 break; 388 } 389 390 case '[': 391 { 392 bool invert = false; 393 if (pattern[0] == '^' || pattern[0] == '!') { 394 invert = true; 395 pattern++; 396 } 397 398 if (!pattern[0] || pattern[0] == ']') 399 return MATCH_BAD_PATTERN; 400 401 uint32 c = utf8ToUnicode(&string); 402 bool matched = false; 403 404 while (pattern[0] != ']') { 405 if (!pattern[0]) 406 return MATCH_BAD_PATTERN; 407 408 if (pattern[0] == '\\') 409 pattern++; 410 411 uint32 first = utf8ToUnicode(&pattern); 412 413 // Does this character match, or is this a range? 414 if (first == c) { 415 matched = true; 416 break; 417 } else if (pattern[0] == '-' && pattern[1] != ']' 418 && pattern[1]) { 419 pattern++; 420 421 if (pattern[0] == '\\') { 422 pattern++; 423 if (!pattern[0]) 424 return MATCH_BAD_PATTERN; 425 } 426 uint32 last = utf8ToUnicode(&pattern); 427 428 if (c >= first && c <= last) { 429 matched = true; 430 break; 431 } 432 } 433 } 434 435 if (invert) 436 matched = !matched; 437 438 if (matched) { 439 while (pattern[0] != ']') { 440 if (!pattern[0]) 441 return MATCH_BAD_PATTERN; 442 pattern++; 443 } 444 pattern++; 445 break; 446 } 447 return NO_MATCH; 448 } 449 450 case '\\': 451 if (!pattern[0]) 452 return MATCH_BAD_PATTERN; 453 // supposed to fall through 454 default: 455 if (pattern[-1] != string[0]) 456 return NO_MATCH; 457 string++; 458 break; 459 } 460 } 461 462 if (string[0]) 463 return NO_MATCH; 464 465 return MATCH_OK; 466 } 467 468 469 // #pragma mark - 470 471 472 Equation::Equation(char** expr) 473 : Term(OP_EQUATION), 474 fAttribute(NULL), 475 fString(NULL), 476 fType(0), 477 fIsPattern(false) 478 { 479 char* string = *expr; 480 char* start = string; 481 char* end = NULL; 482 483 // Since the equation is the integral part of any query, we're just parsing 484 // the whole thing here. 485 // The whitespace at the start is already removed in 486 // Expression::ParseEquation() 487 488 if (*start == '"' || *start == '\'') { 489 // string is quoted (start has to be on the beginning of a string) 490 if (ParseQuotedString(&start, &end) < B_OK) 491 return; 492 493 // set string to a valid start of the equation symbol 494 string = end + 2; 495 skipWhitespace(&string); 496 if (*string != '=' && *string != '<' && *string != '>' 497 && *string != '!') { 498 *expr = string; 499 return; 500 } 501 } else { 502 // search the (in)equation for the actual equation symbol (and for other operators 503 // in case the equation is malformed) 504 while (*string && *string != '=' && *string != '<' && *string != '>' 505 && *string != '!' && *string != '&' && *string != '|') { 506 string++; 507 } 508 509 // get the attribute string (and trim whitespace), in case 510 // the string was not quoted 511 end = string - 1; 512 skipWhitespaceReverse(&end, start); 513 } 514 515 // attribute string is empty (which is not allowed) 516 if (start > end) 517 return; 518 519 // At this point, "start" points to the beginning of the string, "end" 520 // points to the last character of the string, and "string" points to the 521 // first character of the equation symbol 522 523 // test for the right symbol (as this doesn't need any memory) 524 switch (*string) { 525 case '=': 526 fOp = OP_EQUAL; 527 break; 528 case '>': 529 fOp = *(string + 1) == '=' 530 ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 531 break; 532 case '<': 533 fOp = *(string + 1) == '=' 534 ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 535 break; 536 case '!': 537 if (*(string + 1) != '=') 538 return; 539 fOp = OP_UNEQUAL; 540 break; 541 542 // any invalid characters will be rejected 543 default: 544 *expr = string; 545 return; 546 } 547 548 // lets change "start" to point to the first character after the symbol 549 if (*(string + 1) == '=') 550 string++; 551 string++; 552 skipWhitespace(&string); 553 554 // allocate & copy the attribute string 555 556 fAttribute = CopyString(start, end); 557 if (fAttribute == NULL) 558 return; 559 560 start = string; 561 if (*start == '"' || *start == '\'') { 562 // string is quoted (start has to be on the beginning of a string) 563 if (ParseQuotedString(&start, &end) < B_OK) 564 return; 565 566 string = end + 2; 567 skipWhitespace(&string); 568 } else { 569 while (*string && *string != '&' && *string != '|' && *string != ')') 570 string++; 571 572 end = string - 1; 573 skipWhitespaceReverse(&end, start); 574 } 575 576 // at this point, "start" will point to the first character of the value, 577 // "end" will point to its last character, and "start" to the first non- 578 // whitespace character after the value string 579 580 fString = CopyString(start, end); 581 if (fString == NULL) 582 return; 583 584 // patterns are only allowed for these operations (and strings) 585 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 586 fIsPattern = isPattern(fString); 587 if (fIsPattern && isValidPattern(fString) < B_OK) { 588 // we only want to have valid patterns; setting fString 589 // to NULL will cause InitCheck() to fail 590 free(fString); 591 fString = NULL; 592 } 593 } 594 595 // The special time flag is set if the time values are shifted 596 // 64-bit values to reduce the number of duplicates. 597 // We have to be able to compare them against unshifted values 598 // later. The only index which needs this is the last_modified 599 // index, but we may want to open that feature for other indices, 600 // too one day. 601 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 602 603 *expr = string; 604 } 605 606 607 Equation::~Equation() 608 { 609 free(fAttribute); 610 free(fString); 611 } 612 613 614 status_t 615 Equation::InitCheck() 616 { 617 if (fAttribute == NULL || fString == NULL || fOp == OP_NONE) 618 return B_BAD_VALUE; 619 620 return B_OK; 621 } 622 623 624 status_t 625 Equation::ParseQuotedString(char** _start, char** _end) 626 { 627 char* start = *_start; 628 char quote = *start++; 629 char* end = start; 630 631 for (; *end && *end != quote; end++) { 632 if (*end == '\\') 633 end++; 634 } 635 if (*end == '\0') 636 return B_BAD_VALUE; 637 638 *_start = start; 639 *_end = end - 1; 640 641 return B_OK; 642 } 643 644 645 char* 646 Equation::CopyString(char* start, char* end) 647 { 648 // end points to the last character of the string - and the length 649 // also has to include the null-termination 650 int32 length = end + 2 - start; 651 // just to make sure; since that's the max. attribute name length and 652 // the max. string in an index, it make sense to have it that way 653 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 654 return NULL; 655 656 char* copy = (char*)malloc(length); 657 if (copy == NULL) 658 return NULL; 659 660 memcpy(copy, start, length - 1); 661 copy[length - 1] = '\0'; 662 663 return copy; 664 } 665 666 667 status_t 668 Equation::ConvertValue(type_code type) 669 { 670 // Has the type already been converted? 671 if (type == fType) 672 return B_OK; 673 674 char* string = fString; 675 676 switch (type) { 677 case B_MIME_STRING_TYPE: 678 type = B_STRING_TYPE; 679 // supposed to fall through 680 case B_STRING_TYPE: 681 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 682 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 683 fSize = strlen(fValue.String); 684 break; 685 case B_INT32_TYPE: 686 fValue.Int32 = strtol(string, &string, 0); 687 fSize = sizeof(int32); 688 break; 689 case B_UINT32_TYPE: 690 fValue.Int32 = strtoul(string, &string, 0); 691 fSize = sizeof(uint32); 692 break; 693 case B_INT64_TYPE: 694 fValue.Int64 = strtoll(string, &string, 0); 695 fSize = sizeof(int64); 696 break; 697 case B_UINT64_TYPE: 698 fValue.Uint64 = strtoull(string, &string, 0); 699 fSize = sizeof(uint64); 700 break; 701 case B_FLOAT_TYPE: 702 fValue.Float = strtod(string, &string); 703 fSize = sizeof(float); 704 break; 705 case B_DOUBLE_TYPE: 706 fValue.Double = strtod(string, &string); 707 fSize = sizeof(double); 708 break; 709 default: 710 FATAL(("query value conversion to 0x%x requested!\n", (int)type)); 711 // should we fail here or just do a safety int32 conversion? 712 return B_ERROR; 713 } 714 715 fType = type; 716 717 // patterns are only allowed for string types 718 if (fType != B_STRING_TYPE && fIsPattern) 719 fIsPattern = false; 720 721 return B_OK; 722 } 723 724 725 /*! Returns true when the key matches the equation. You have to 726 call ConvertValue() before this one. 727 */ 728 bool 729 Equation::CompareTo(const uint8* value, uint16 size) 730 { 731 int32 compare; 732 733 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or 734 // OP_UNEQUAL 735 if (fIsPattern) { 736 // we have already validated the pattern, so we don't check for failing 737 // here - if something is broken, and matchString() returns an error, 738 // we just don't match 739 compare = matchString(fValue.String, (char*)value) == MATCH_OK ? 0 : 1; 740 } else if (fIsSpecialTime) { 741 // the index is a shifted int64 index, but we have to match 742 // against an unshifted value (i.e. the last_modified index) 743 int64 timeValue = *(int64*)value >> INODE_TIME_SHIFT; 744 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, 745 sizeof(int64)); 746 } else 747 compare = compareKeys(fType, value, size, Value(), fSize); 748 749 switch (fOp) { 750 case OP_EQUAL: 751 return compare == 0; 752 case OP_UNEQUAL: 753 return compare != 0; 754 case OP_LESS_THAN: 755 return compare < 0; 756 case OP_LESS_THAN_OR_EQUAL: 757 return compare <= 0; 758 case OP_GREATER_THAN: 759 return compare > 0; 760 case OP_GREATER_THAN_OR_EQUAL: 761 return compare >= 0; 762 } 763 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 764 return false; 765 } 766 767 768 void 769 Equation::Complement() 770 { 771 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 772 FATAL(("op out of range!")); 773 return; 774 }); 775 776 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 777 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 778 fOp = complementOp[fOp - OP_EQUAL]; 779 } 780 781 782 status_t 783 Equation::MatchEmptyString() 784 { 785 // There is no matching attribute, we will just bail out if we 786 // already know that our value is not of a string type. 787 // If not, it will be converted to a string - and then be compared with "". 788 // That's why we have to call ConvertValue() here - but it will be 789 // a cheap call for the next time 790 // TODO: Should we do this only for OP_UNEQUAL? 791 if (fType != 0 && fType != B_STRING_TYPE) 792 return NO_MATCH; 793 794 status_t status = ConvertValue(B_STRING_TYPE); 795 if (status == B_OK) 796 status = CompareTo((const uint8*)"", fSize) ? MATCH_OK : NO_MATCH; 797 798 return status; 799 } 800 801 802 /*! Matches the inode's attribute value with the equation. 803 Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went 804 wrong. 805 */ 806 status_t 807 Equation::Match(Inode* inode, const char* attributeName, int32 type, 808 const uint8* key, size_t size) 809 { 810 // get a pointer to the attribute in question 811 NodeGetter nodeGetter(inode->GetVolume()); 812 union value value; 813 uint8* buffer = (uint8*)&value; 814 bool locked = false; 815 816 // first, check if we are matching for a live query and use that value 817 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 818 if (key == NULL) { 819 if (type == B_STRING_TYPE) 820 return MatchEmptyString(); 821 822 return NO_MATCH; 823 } 824 buffer = const_cast<uint8*>(key); 825 } else if (!strcmp(fAttribute, "name")) { 826 // we need to lock before accessing Inode::Name() 827 nodeGetter.SetToNode(inode); 828 829 recursive_lock_lock(&inode->SmallDataLock()); 830 locked = true; 831 832 // if not, check for "fake" attributes ("name", "size", "last_modified") 833 buffer = (uint8*)inode->Name(nodeGetter.Node()); 834 if (buffer == NULL) { 835 recursive_lock_unlock(&inode->SmallDataLock()); 836 return B_ERROR; 837 } 838 839 type = B_STRING_TYPE; 840 size = strlen((const char*)buffer); 841 } else if (!strcmp(fAttribute, "size")) { 842 #ifdef BFS_NATIVE_ENDIAN 843 buffer = (uint8*)&inode->Node().data.size; 844 #else 845 value.Int64 = inode->Size(); 846 #endif 847 type = B_INT64_TYPE; 848 } else if (!strcmp(fAttribute, "last_modified")) { 849 #ifdef BFS_NATIVE_ENDIAN 850 buffer = (uint8*)&inode->Node().last_modified_time; 851 #else 852 value.Int64 = inode->Node().LastModifiedTime(); 853 #endif 854 type = B_INT64_TYPE; 855 } else { 856 // then for attributes in the small_data section, and finally for the 857 // real attributes 858 nodeGetter.SetToNode(inode); 859 Inode* attribute; 860 861 recursive_lock_lock(&inode->SmallDataLock()); 862 small_data* smallData = inode->FindSmallData(nodeGetter.Node(), 863 fAttribute); 864 if (smallData != NULL) { 865 buffer = smallData->Data(); 866 type = smallData->type; 867 size = smallData->data_size; 868 locked = true; 869 } else { 870 // needed to unlock the small_data section as fast as possible 871 recursive_lock_unlock(&inode->SmallDataLock()); 872 nodeGetter.Unset(); 873 874 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 875 buffer = (uint8*)&value; 876 type = attribute->Type(); 877 size = attribute->Size(); 878 879 if (size > INODE_FILE_NAME_LENGTH) 880 size = INODE_FILE_NAME_LENGTH; 881 882 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 883 inode->ReleaseAttribute(attribute); 884 return B_IO_ERROR; 885 } 886 inode->ReleaseAttribute(attribute); 887 } else 888 return MatchEmptyString(); 889 } 890 } 891 // prepare own value for use, if it is possible to convert it 892 status_t status = ConvertValue(type); 893 if (status == B_OK) 894 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 895 896 if (locked) 897 recursive_lock_unlock(&inode->SmallDataLock()); 898 899 RETURN_ERROR(status); 900 } 901 902 903 void 904 Equation::CalculateScore(Index &index) 905 { 906 // As always, these values could be tuned and refined. 907 // And the code could also need some real world testing :-) 908 909 // do we have to operate on a "foreign" index? 910 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 911 fScore = 0; 912 return; 913 } 914 915 // if we have a pattern, how much does it help our search? 916 if (fIsPattern) 917 fScore = getFirstPatternSymbol(fString) << 3; 918 else { 919 // Score by operator 920 if (fOp == OP_EQUAL) 921 // higher than pattern="255 chars+*" 922 fScore = 2048; 923 else 924 // the pattern search is regarded cheaper when you have at 925 // least one character to set your index to 926 fScore = 5; 927 } 928 929 // take index size into account (1024 is the current node size 930 // in our B+trees) 931 // 2048 * 2048 == 4194304 is the maximum score (for an empty 932 // tree, since the header + 1 node are already 2048 bytes) 933 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 934 } 935 936 937 status_t 938 Equation::PrepareQuery(Volume* /*volume*/, Index& index, 939 TreeIterator** iterator, bool queryNonIndexed) 940 { 941 status_t status = index.SetTo(fAttribute); 942 943 // if we should query attributes without an index, we can just proceed here 944 if (status != B_OK && !queryNonIndexed) 945 return B_ENTRY_NOT_FOUND; 946 947 type_code type; 948 949 // Special case for OP_UNEQUAL - it will always operate through the whole 950 // index but we need the call to the original index to get the correct type 951 if (status != B_OK || fOp == OP_UNEQUAL) { 952 // Try to get an index that holds all files (name) 953 // Also sets the default type for all attributes without index 954 // to string. 955 type = status < B_OK ? B_STRING_TYPE : index.Type(); 956 957 if (index.SetTo("name") != B_OK) 958 return B_ENTRY_NOT_FOUND; 959 960 fHasIndex = false; 961 } else { 962 fHasIndex = true; 963 type = index.Type(); 964 } 965 966 if (ConvertValue(type) < B_OK) 967 return B_BAD_VALUE; 968 969 BPlusTree* tree = index.Node()->Tree(); 970 if (tree == NULL) 971 return B_ERROR; 972 973 *iterator = new TreeIterator(tree); 974 if (*iterator == NULL) 975 return B_NO_MEMORY; 976 977 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN 978 || fOp == OP_GREATER_THAN_OR_EQUAL || fIsPattern) 979 && fHasIndex) { 980 // set iterator to the exact position 981 982 int32 keySize = index.KeySize(); 983 984 // At this point, fIsPattern is only true if it's a string type, and fOp 985 // is either OP_EQUAL or OP_UNEQUAL 986 if (fIsPattern) { 987 // let's see if we can use the beginning of the key for positioning 988 // the iterator and adjust the key size; if not, just leave the 989 // iterator at the start and return success 990 keySize = getFirstPatternSymbol(fString); 991 if (keySize <= 0) 992 return B_OK; 993 } 994 995 if (keySize == 0) { 996 // B_STRING_TYPE doesn't have a fixed length, so it was set 997 // to 0 before - we compute the correct value here 998 if (fType == B_STRING_TYPE) { 999 keySize = strlen(fValue.String); 1000 1001 // The empty string is a special case - we normally don't check 1002 // for the trailing null byte, in the case for the empty string 1003 // we do it explicitly, because there can't be keys in the 1004 // B+tree with a length of zero 1005 if (keySize == 0) 1006 keySize = 1; 1007 } else 1008 RETURN_ERROR(B_ENTRY_NOT_FOUND); 1009 } 1010 1011 if (fIsSpecialTime) { 1012 // we have to find the first matching shifted value 1013 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 1014 status = (*iterator)->Find((uint8*)&value, keySize); 1015 if (status == B_ENTRY_NOT_FOUND) 1016 return B_OK; 1017 } else { 1018 status = (*iterator)->Find(Value(), keySize); 1019 if (fOp == OP_EQUAL && !fIsPattern) 1020 return status; 1021 else if (status == B_ENTRY_NOT_FOUND 1022 && (fIsPattern || fOp == OP_GREATER_THAN 1023 || fOp == OP_GREATER_THAN_OR_EQUAL)) 1024 return B_OK; 1025 } 1026 1027 RETURN_ERROR(status); 1028 } 1029 1030 return B_OK; 1031 } 1032 1033 1034 status_t 1035 Equation::GetNextMatching(Volume* volume, TreeIterator* iterator, 1036 struct dirent* dirent, size_t bufferSize) 1037 { 1038 while (true) { 1039 union value indexValue; 1040 uint16 keyLength; 1041 uint16 duplicate; 1042 off_t offset; 1043 1044 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 1045 (uint16)sizeof(indexValue), &offset, &duplicate); 1046 if (status != B_OK) 1047 return status; 1048 1049 // only compare against the index entry when this is the correct 1050 // index for the equation 1051 if (fHasIndex && duplicate < 2 1052 && !CompareTo((uint8*)&indexValue, keyLength)) { 1053 // They aren't equal? Let the operation decide what to do. Since 1054 // we always start at the beginning of the index (or the correct 1055 // position), only some needs to be stopped if the entry doesn't 1056 // fit. 1057 if (fOp == OP_LESS_THAN 1058 || fOp == OP_LESS_THAN_OR_EQUAL 1059 || (fOp == OP_EQUAL && !fIsPattern)) 1060 return B_ENTRY_NOT_FOUND; 1061 1062 if (duplicate > 0) 1063 iterator->SkipDuplicates(); 1064 continue; 1065 } 1066 1067 Vnode vnode(volume, offset); 1068 Inode* inode; 1069 if ((status = vnode.Get(&inode)) != B_OK) { 1070 REPORT_ERROR(status); 1071 FATAL(("could not get inode %" B_PRIdOFF " in index \"%s\"!\n", 1072 offset, fAttribute)); 1073 // try with next 1074 continue; 1075 } 1076 1077 // TODO: check user permissions here - but which one?! 1078 // we could filter out all those where we don't have 1079 // read access... (we should check for every parent 1080 // directory if the X_OK is allowed) 1081 // Although it's quite expensive to open all parents, 1082 // it's likely that the application that runs the 1083 // query will do something similar (and we don't have 1084 // to do it for root, either). 1085 1086 // go up in the tree until a &&-operator is found, and check if the 1087 // inode matches with the rest of the expression - we don't have to 1088 // check ||-operators for that 1089 Term* term = this; 1090 status = MATCH_OK; 1091 1092 if (!fHasIndex) 1093 status = Match(inode); 1094 1095 while (term != NULL && status == MATCH_OK) { 1096 Operator* parent = (Operator*)term->Parent(); 1097 if (parent == NULL) 1098 break; 1099 1100 if (parent->Op() == OP_AND) { 1101 // choose the other child of the parent 1102 Term* other = parent->Right(); 1103 if (other == term) 1104 other = parent->Left(); 1105 1106 if (other == NULL) { 1107 FATAL(("&&-operator has only one child... (parent = %p)\n", 1108 parent)); 1109 break; 1110 } 1111 status = other->Match(inode); 1112 if (status < 0) { 1113 REPORT_ERROR(status); 1114 status = NO_MATCH; 1115 } 1116 } 1117 term = (Term*)parent; 1118 } 1119 1120 if (status == MATCH_OK) { 1121 dirent->d_dev = volume->ID(); 1122 dirent->d_ino = offset; 1123 dirent->d_pdev = volume->ID(); 1124 dirent->d_pino = volume->ToVnode(inode->Parent()); 1125 1126 if (inode->GetName(dirent->d_name) < B_OK) { 1127 FATAL(("inode %" B_PRIdOFF " in query has no name!\n", 1128 inode->BlockNumber())); 1129 } 1130 1131 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1132 } 1133 1134 if (status == MATCH_OK) 1135 return B_OK; 1136 } 1137 RETURN_ERROR(B_ERROR); 1138 } 1139 1140 1141 // #pragma mark - 1142 1143 1144 Operator::Operator(Term* left, int8 op, Term* right) 1145 : Term(op), 1146 fLeft(left), 1147 fRight(right) 1148 { 1149 if (left) 1150 left->SetParent(this); 1151 if (right) 1152 right->SetParent(this); 1153 } 1154 1155 1156 Operator::~Operator() 1157 { 1158 delete fLeft; 1159 delete fRight; 1160 } 1161 1162 1163 status_t 1164 Operator::Match(Inode* inode, const char* attribute, int32 type, 1165 const uint8* key, size_t size) 1166 { 1167 if (fOp == OP_AND) { 1168 status_t status = fLeft->Match(inode, attribute, type, key, size); 1169 if (status != MATCH_OK) 1170 return status; 1171 1172 return fRight->Match(inode, attribute, type, key, size); 1173 } else { 1174 // choose the term with the better score for OP_OR 1175 Term* first; 1176 Term* second; 1177 if (fRight->Score() > fLeft->Score()) { 1178 first = fLeft; 1179 second = fRight; 1180 } else { 1181 first = fRight; 1182 second = fLeft; 1183 } 1184 1185 status_t status = first->Match(inode, attribute, type, key, size); 1186 if (status != NO_MATCH) 1187 return status; 1188 1189 return second->Match(inode, attribute, type, key, size); 1190 } 1191 } 1192 1193 1194 void 1195 Operator::Complement() 1196 { 1197 if (fOp == OP_AND) 1198 fOp = OP_OR; 1199 else 1200 fOp = OP_AND; 1201 1202 fLeft->Complement(); 1203 fRight->Complement(); 1204 } 1205 1206 1207 void 1208 Operator::CalculateScore(Index &index) 1209 { 1210 fLeft->CalculateScore(index); 1211 fRight->CalculateScore(index); 1212 } 1213 1214 1215 int32 1216 Operator::Score() const 1217 { 1218 if (fOp == OP_AND) { 1219 // return the one with the better score 1220 if (fRight->Score() > fLeft->Score()) 1221 return fRight->Score(); 1222 1223 return fLeft->Score(); 1224 } 1225 1226 // for OP_OR, be honest, and return the one with the worse score 1227 if (fRight->Score() < fLeft->Score()) 1228 return fRight->Score(); 1229 1230 return fLeft->Score(); 1231 } 1232 1233 1234 status_t 1235 Operator::InitCheck() 1236 { 1237 if ((fOp != OP_AND && fOp != OP_OR) 1238 || fLeft == NULL || fLeft->InitCheck() < B_OK 1239 || fRight == NULL || fRight->InitCheck() < B_OK) 1240 return B_ERROR; 1241 1242 return B_OK; 1243 } 1244 1245 1246 #if 0 1247 Term* 1248 Operator::Copy() const 1249 { 1250 if (fEquation != NULL) { 1251 Equation* equation = new Equation(*fEquation); 1252 if (equation == NULL) 1253 return NULL; 1254 1255 Term* term = new Term(equation); 1256 if (term == NULL) 1257 delete equation; 1258 1259 return term; 1260 } 1261 1262 Term* left = NULL; 1263 Term* right = NULL; 1264 1265 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1266 return NULL; 1267 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1268 delete left; 1269 return NULL; 1270 } 1271 1272 Term* term = new Term(left, fOp, right); 1273 if (term == NULL) { 1274 delete left; 1275 delete right; 1276 return NULL; 1277 } 1278 return term; 1279 } 1280 #endif 1281 1282 1283 // #pragma mark - 1284 1285 #ifdef DEBUG 1286 1287 void 1288 Operator::PrintToStream() 1289 { 1290 D(__out("( ")); 1291 if (fLeft != NULL) 1292 fLeft->PrintToStream(); 1293 1294 const char* op; 1295 switch (fOp) { 1296 case OP_OR: op = "OR"; break; 1297 case OP_AND: op = "AND"; break; 1298 default: op = "?"; break; 1299 } 1300 D(__out(" %s ", op)); 1301 1302 if (fRight != NULL) 1303 fRight->PrintToStream(); 1304 1305 D(__out(" )")); 1306 } 1307 1308 1309 void 1310 Equation::PrintToStream() 1311 { 1312 const char* symbol = "???"; 1313 switch (fOp) { 1314 case OP_EQUAL: symbol = "=="; break; 1315 case OP_UNEQUAL: symbol = "!="; break; 1316 case OP_GREATER_THAN: symbol = ">"; break; 1317 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1318 case OP_LESS_THAN: symbol = "<"; break; 1319 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1320 } 1321 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1322 } 1323 1324 #endif // DEBUG 1325 1326 // #pragma mark - 1327 1328 1329 Expression::Expression(char* expr) 1330 { 1331 if (expr == NULL) 1332 return; 1333 1334 fTerm = ParseOr(&expr); 1335 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1336 FATAL(("Corrupt tree in expression!\n")); 1337 delete fTerm; 1338 fTerm = NULL; 1339 } 1340 D(if (fTerm != NULL) { 1341 fTerm->PrintToStream(); 1342 D(__out("\n")); 1343 if (*expr != '\0') 1344 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1345 }); 1346 fPosition = expr; 1347 } 1348 1349 1350 Expression::~Expression() 1351 { 1352 delete fTerm; 1353 } 1354 1355 1356 Term* 1357 Expression::ParseEquation(char** expr) 1358 { 1359 skipWhitespace(expr); 1360 1361 bool _not = false; 1362 if (**expr == '!') { 1363 skipWhitespace(expr, 1); 1364 if (**expr != '(') 1365 return NULL; 1366 1367 _not = true; 1368 } 1369 1370 if (**expr == ')') { 1371 // shouldn't be handled here 1372 return NULL; 1373 } else if (**expr == '(') { 1374 skipWhitespace(expr, 1); 1375 1376 Term* term = ParseOr(expr); 1377 1378 skipWhitespace(expr); 1379 1380 if (**expr != ')') { 1381 delete term; 1382 return NULL; 1383 } 1384 1385 // If the term is negated, we just complement the tree, to get 1386 // rid of the not, a.k.a. DeMorgan's Law. 1387 if (_not) 1388 term->Complement(); 1389 1390 skipWhitespace(expr, 1); 1391 1392 return term; 1393 } 1394 1395 Equation* equation = new Equation(expr); 1396 if (equation == NULL || equation->InitCheck() < B_OK) { 1397 delete equation; 1398 return NULL; 1399 } 1400 return equation; 1401 } 1402 1403 1404 Term* 1405 Expression::ParseAnd(char** expr) 1406 { 1407 Term* left = ParseEquation(expr); 1408 if (left == NULL) 1409 return NULL; 1410 1411 while (IsOperator(expr, '&')) { 1412 Term* right = ParseAnd(expr); 1413 Term* newParent = NULL; 1414 1415 if (right == NULL 1416 || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1417 delete left; 1418 delete right; 1419 1420 return NULL; 1421 } 1422 left = newParent; 1423 } 1424 1425 return left; 1426 } 1427 1428 1429 Term* 1430 Expression::ParseOr(char** expr) 1431 { 1432 Term* left = ParseAnd(expr); 1433 if (left == NULL) 1434 return NULL; 1435 1436 while (IsOperator(expr, '|')) { 1437 Term* right = ParseAnd(expr); 1438 Term* newParent = NULL; 1439 1440 if (right == NULL 1441 || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1442 delete left; 1443 delete right; 1444 1445 return NULL; 1446 } 1447 left = newParent; 1448 } 1449 1450 return left; 1451 } 1452 1453 1454 bool 1455 Expression::IsOperator(char** expr, char op) 1456 { 1457 char* string = *expr; 1458 1459 if (*string == op && *(string + 1) == op) { 1460 *expr += 2; 1461 return true; 1462 } 1463 return false; 1464 } 1465 1466 1467 status_t 1468 Expression::InitCheck() 1469 { 1470 if (fTerm == NULL) 1471 return B_BAD_VALUE; 1472 1473 return B_OK; 1474 } 1475 1476 1477 // #pragma mark - 1478 1479 1480 Query::Query(Volume* volume, Expression* expression, uint32 flags) 1481 : 1482 fVolume(volume), 1483 fExpression(expression), 1484 fCurrent(NULL), 1485 fIterator(NULL), 1486 fIndex(volume), 1487 fFlags(flags), 1488 fPort(-1) 1489 { 1490 // If the expression has a valid root pointer, the whole tree has 1491 // already passed the sanity check, so that we don't have to check 1492 // every pointer 1493 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1494 return; 1495 1496 // create index on the stack and delete it afterwards 1497 fExpression->Root()->CalculateScore(fIndex); 1498 fIndex.Unset(); 1499 1500 Rewind(); 1501 1502 if ((fFlags & B_LIVE_QUERY) != 0) 1503 volume->AddQuery(this); 1504 } 1505 1506 1507 Query::~Query() 1508 { 1509 if ((fFlags & B_LIVE_QUERY) != 0) 1510 fVolume->RemoveQuery(this); 1511 } 1512 1513 1514 status_t 1515 Query::Rewind() 1516 { 1517 // free previous stuff 1518 1519 fStack.MakeEmpty(); 1520 1521 delete fIterator; 1522 fIterator = NULL; 1523 fCurrent = NULL; 1524 1525 // put the whole expression on the stack 1526 1527 Stack<Term*> stack; 1528 stack.Push(fExpression->Root()); 1529 1530 Term* term; 1531 while (stack.Pop(&term)) { 1532 if (term->Op() < OP_EQUATION) { 1533 Operator* op = (Operator*)term; 1534 1535 if (op->Op() == OP_OR) { 1536 stack.Push(op->Left()); 1537 stack.Push(op->Right()); 1538 } else { 1539 // For OP_AND, we can use the scoring system to decide which 1540 // path to add 1541 if (op->Right()->Score() > op->Left()->Score()) 1542 stack.Push(op->Right()); 1543 else 1544 stack.Push(op->Left()); 1545 } 1546 } else if (term->Op() == OP_EQUATION 1547 || fStack.Push((Equation*)term) != B_OK) 1548 FATAL(("Unknown term on stack or stack error")); 1549 } 1550 1551 return B_OK; 1552 } 1553 1554 1555 status_t 1556 Query::GetNextEntry(struct dirent* dirent, size_t size) 1557 { 1558 // If we don't have an equation to use yet/anymore, get a new one 1559 // from the stack 1560 while (true) { 1561 if (fIterator == NULL) { 1562 if (!fStack.Pop(&fCurrent) 1563 || fCurrent == NULL) 1564 return B_ENTRY_NOT_FOUND; 1565 1566 status_t status = fCurrent->PrepareQuery(fVolume, fIndex, 1567 &fIterator, fFlags & B_QUERY_NON_INDEXED); 1568 if (status == B_ENTRY_NOT_FOUND) { 1569 // try next equation 1570 continue; 1571 } 1572 1573 if (status != B_OK) 1574 return status; 1575 } 1576 if (fCurrent == NULL) 1577 RETURN_ERROR(B_ERROR); 1578 1579 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, 1580 size); 1581 if (status != B_OK) { 1582 delete fIterator; 1583 fIterator = NULL; 1584 fCurrent = NULL; 1585 } else { 1586 // only return if we have another entry 1587 return B_OK; 1588 } 1589 } 1590 } 1591 1592 1593 void 1594 Query::SetLiveMode(port_id port, int32 token) 1595 { 1596 fPort = port; 1597 fToken = token; 1598 1599 if ((fFlags & B_LIVE_QUERY) == 0) { 1600 // you can decide at any point to set the live query mode, 1601 // only live queries have to be updated by attribute changes 1602 fFlags |= B_LIVE_QUERY; 1603 fVolume->AddQuery(this); 1604 } 1605 } 1606 1607 1608 void 1609 Query::LiveUpdate(Inode* inode, const char* attribute, int32 type, 1610 const uint8* oldKey, size_t oldLength, const uint8* newKey, 1611 size_t newLength) 1612 { 1613 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1614 return; 1615 1616 // TODO: check if the attribute is part of the query at all... 1617 1618 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, 1619 oldKey, oldLength); 1620 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, 1621 newKey, newLength); 1622 1623 bool entryCreated; 1624 1625 if (oldStatus != MATCH_OK) { 1626 if (newStatus != MATCH_OK) { 1627 // nothing has changed 1628 return; 1629 } 1630 entryCreated = true; 1631 } else if (newStatus != MATCH_OK) { 1632 // entry got removed 1633 entryCreated = false; 1634 } else { 1635 // The entry stays in the query 1636 return; 1637 } 1638 1639 // we may need to get the name of the inode 1640 1641 char nameBuffer[B_FILE_NAME_LENGTH]; 1642 const char* name; 1643 1644 if (strcmp(attribute, "name")) { 1645 if (inode->GetName(nameBuffer) != B_OK) 1646 nameBuffer[0] = '\0'; 1647 name = nameBuffer; 1648 } else { 1649 // a shortcut to prevent having to scan the attribute section 1650 name = (const char*)newKey; 1651 } 1652 1653 // notify query listeners 1654 1655 if (entryCreated) { 1656 notify_query_entry_created(fPort, fToken, fVolume->ID(), 1657 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1658 } else { 1659 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1660 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1661 } 1662 } 1663 1664 1665 void 1666 Query::LiveUpdateRenameMove(Inode* inode, ino_t oldDirectoryID, 1667 const char* oldName, size_t oldLength, ino_t newDirectoryID, 1668 const char* newName, size_t newLength) 1669 { 1670 if (fPort < 0 || fExpression == NULL) 1671 return; 1672 1673 // TODO: check if the attribute is part of the query at all... 1674 1675 status_t oldStatus = fExpression->Root()->Match(inode, "name", 1676 B_STRING_TYPE, (const uint8*)oldName, oldLength); 1677 status_t newStatus = fExpression->Root()->Match(inode, "name", 1678 B_STRING_TYPE, (const uint8*)newName, newLength); 1679 1680 if (oldStatus != MATCH_OK || oldStatus != newStatus) 1681 return; 1682 1683 // The entry stays in the query, notify query listeners about the rename 1684 // or move 1685 1686 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1687 oldDirectoryID, oldName, inode->ID()); 1688 1689 notify_query_entry_created(fPort, fToken, fVolume->ID(), 1690 newDirectoryID, newName, inode->ID()); 1691 } 1692