1 /* 2 * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 /*! Query parsing and evaluation 7 8 The pattern matching is roughly based on code originally written 9 by J. Kercheval, and on code written by Kenneth Almquist, though 10 it shares no code. 11 */ 12 13 #include "Query.h" 14 #include "bfs.h" 15 #include "Debug.h" 16 #include "Volume.h" 17 #include "Inode.h" 18 #include "BPlusTree.h" 19 #include "Index.h" 20 21 22 // The parser has a very static design, but it will do what is required. 23 // 24 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 25 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 26 // Apparently, the "!" (not) can only be used with brackets. 27 // 28 // If you think that there are too few NULL pointer checks in some places 29 // of the code, just read the beginning of the query constructor. 30 // The API is not fully available, just the Query and the Expression class 31 // are. 32 33 34 enum ops { 35 OP_NONE, 36 37 OP_AND, 38 OP_OR, 39 40 OP_EQUATION, 41 42 OP_EQUAL, 43 OP_UNEQUAL, 44 OP_GREATER_THAN, 45 OP_LESS_THAN, 46 OP_GREATER_THAN_OR_EQUAL, 47 OP_LESS_THAN_OR_EQUAL, 48 }; 49 50 enum match { 51 NO_MATCH = 0, 52 MATCH_OK = 1, 53 54 MATCH_BAD_PATTERN = -2, 55 MATCH_INVALID_CHARACTER 56 }; 57 58 // return values from isValidPattern() 59 enum { 60 PATTERN_INVALID_ESCAPE = -3, 61 PATTERN_INVALID_RANGE, 62 PATTERN_INVALID_SET 63 }; 64 65 union value { 66 int64 Int64; 67 uint64 Uint64; 68 int32 Int32; 69 uint32 Uint32; 70 float Float; 71 double Double; 72 char String[INODE_FILE_NAME_LENGTH]; 73 }; 74 75 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 76 // don't need the whole file here; the type can't change anyway 77 #ifndef _MIME_H 78 # define B_MIME_STRING_TYPE 'MIMS' 79 #endif 80 81 class Term { 82 public: 83 Term(int8 op) : fOp(op), fParent(NULL) {} 84 virtual ~Term() {} 85 86 int8 Op() const { return fOp; } 87 88 void SetParent(Term *parent) { fParent = parent; } 89 Term *Parent() const { return fParent; } 90 91 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 92 const uint8 *key = NULL,size_t size = 0) = 0; 93 virtual void Complement() = 0; 94 95 virtual void CalculateScore(Index &index) = 0; 96 virtual int32 Score() const = 0; 97 98 virtual status_t InitCheck() = 0; 99 100 #ifdef DEBUG 101 virtual void PrintToStream() = 0; 102 #endif 103 104 protected: 105 int8 fOp; 106 Term *fParent; 107 }; 108 109 // Although an Equation object is quite independent from the volume on which 110 // the query is run, there are some dependencies that are produced while 111 // querying: 112 // The type/size of the value, the score, and if it has an index or not. 113 // So you could run more than one query on the same volume, but it might return 114 // wrong values when it runs concurrently on another volume. 115 // That's not an issue right now, because we run single-threaded and don't use 116 // queries more than once. 117 118 class Equation : public Term { 119 public: 120 Equation(char **expr); 121 virtual ~Equation(); 122 123 virtual status_t InitCheck(); 124 125 status_t ParseQuotedString(char **_start, char **_end); 126 char *CopyString(char *start, char *end); 127 128 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 129 const uint8 *key = NULL, size_t size = 0); 130 virtual void Complement(); 131 132 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator, 133 bool queryNonIndexed); 134 status_t GetNextMatching(Volume *volume, TreeIterator *iterator, 135 struct dirent *dirent, size_t bufferSize); 136 137 virtual void CalculateScore(Index &index); 138 virtual int32 Score() const { return fScore; } 139 140 #ifdef DEBUG 141 virtual void PrintToStream(); 142 #endif 143 144 private: 145 Equation(const Equation &); 146 Equation &operator=(const Equation &); 147 // no implementation 148 149 status_t ConvertValue(type_code type); 150 bool CompareTo(const uint8 *value, uint16 size); 151 uint8 *Value() const { return (uint8 *)&fValue; } 152 status_t MatchEmptyString(); 153 154 char *fAttribute; 155 char *fString; 156 union value fValue; 157 type_code fType; 158 size_t fSize; 159 bool fIsPattern; 160 bool fIsSpecialTime; 161 162 int32 fScore; 163 bool fHasIndex; 164 }; 165 166 class Operator : public Term { 167 public: 168 Operator(Term *,int8,Term *); 169 virtual ~Operator(); 170 171 Term *Left() const { return fLeft; } 172 Term *Right() const { return fRight; } 173 174 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 175 const uint8 *key = NULL, size_t size = 0); 176 virtual void Complement(); 177 178 virtual void CalculateScore(Index &index); 179 virtual int32 Score() const; 180 181 virtual status_t InitCheck(); 182 183 //Term *Copy() const; 184 #ifdef DEBUG 185 virtual void PrintToStream(); 186 #endif 187 188 private: 189 Operator(const Operator &); 190 Operator &operator=(const Operator &); 191 // no implementation 192 193 Term *fLeft,*fRight; 194 }; 195 196 197 //--------------------------------- 198 199 200 void 201 skipWhitespace(char **expr, int32 skip = 0) 202 { 203 char *string = (*expr) + skip; 204 while (*string == ' ' || *string == '\t') string++; 205 *expr = string; 206 } 207 208 209 void 210 skipWhitespaceReverse(char **expr,char *stop) 211 { 212 char *string = *expr; 213 while (string > stop && (*string == ' ' || *string == '\t')) string--; 214 *expr = string; 215 } 216 217 218 // #pragma mark - 219 220 221 uint32 222 utf8ToUnicode(char **string) 223 { 224 uint8 *bytes = (uint8 *)*string; 225 int32 length; 226 uint8 mask = 0x1f; 227 228 switch (bytes[0] & 0xf0) { 229 case 0xc0: 230 case 0xd0: length = 2; break; 231 case 0xe0: length = 3; break; 232 case 0xf0: 233 mask = 0x0f; 234 length = 4; 235 break; 236 default: 237 // valid 1-byte character 238 // and invalid characters 239 (*string)++; 240 return bytes[0]; 241 } 242 uint32 c = bytes[0] & mask; 243 int32 i = 1; 244 for (;i < length && (bytes[i] & 0x80) > 0;i++) 245 c = (c << 6) | (bytes[i] & 0x3f); 246 247 if (i < length) { 248 // invalid character 249 (*string)++; 250 return (uint32)bytes[0]; 251 } 252 *string += length; 253 return c; 254 } 255 256 257 int32 258 getFirstPatternSymbol(char *string) 259 { 260 char c; 261 262 for (int32 index = 0;(c = *string++);index++) { 263 if (c == '*' || c == '?' || c == '[') 264 return index; 265 } 266 return -1; 267 } 268 269 270 bool 271 isPattern(char *string) 272 { 273 return getFirstPatternSymbol(string) >= 0 ? true : false; 274 } 275 276 277 status_t 278 isValidPattern(char *pattern) 279 { 280 while (*pattern) { 281 switch (*pattern++) { 282 case '\\': 283 // the escape character must not be at the end of the pattern 284 if (!*pattern++) 285 return PATTERN_INVALID_ESCAPE; 286 break; 287 288 case '[': 289 if (pattern[0] == ']' || !pattern[0]) 290 return PATTERN_INVALID_SET; 291 292 while (*pattern != ']') { 293 if (*pattern == '\\' && !*++pattern) 294 return PATTERN_INVALID_ESCAPE; 295 296 if (!*pattern) 297 return PATTERN_INVALID_SET; 298 299 if (pattern[0] == '-' && pattern[1] == '-') 300 return PATTERN_INVALID_RANGE; 301 302 pattern++; 303 } 304 break; 305 } 306 } 307 return B_OK; 308 } 309 310 311 /** Matches the string against the given wildcard pattern. 312 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 313 * or values < 0 (see enum at the top of Query.cpp) if an error 314 * occurs 315 */ 316 317 status_t 318 matchString(char *pattern, char *string) 319 { 320 while (*pattern) { 321 // end of string == valid end of pattern? 322 if (!string[0]) { 323 while (pattern[0] == '*') 324 pattern++; 325 return !pattern[0] ? MATCH_OK : NO_MATCH; 326 } 327 328 switch (*pattern++) { 329 case '?': 330 { 331 // match exactly one UTF-8 character; we are 332 // not interested in the result 333 utf8ToUnicode(&string); 334 break; 335 } 336 337 case '*': 338 { 339 // compact pattern 340 while (true) { 341 if (pattern[0] == '?') { 342 if (!*++string) 343 return NO_MATCH; 344 } else if (pattern[0] != '*') 345 break; 346 347 pattern++; 348 } 349 350 // if the pattern is done, we have matched the string 351 if (!pattern[0]) 352 return MATCH_OK; 353 354 while(true) { 355 // we have removed all occurences of '*' and '?' 356 if (pattern[0] == string[0] 357 || pattern[0] == '[' 358 || pattern[0] == '\\') { 359 status_t status = matchString(pattern,string); 360 if (status < B_OK || status == MATCH_OK) 361 return status; 362 } 363 364 // we could be nice here and just jump to the next 365 // UTF-8 character - but we wouldn't gain that much 366 // and it'd be slower (since we're checking for 367 // equality before entering the recursion) 368 if (!*++string) 369 return NO_MATCH; 370 } 371 break; 372 } 373 374 case '[': 375 { 376 bool invert = false; 377 if (pattern[0] == '^' || pattern[0] == '!') { 378 invert = true; 379 pattern++; 380 } 381 382 if (!pattern[0] || pattern[0] == ']') 383 return MATCH_BAD_PATTERN; 384 385 uint32 c = utf8ToUnicode(&string); 386 bool matched = false; 387 388 while (pattern[0] != ']') { 389 if (!pattern[0]) 390 return MATCH_BAD_PATTERN; 391 392 if (pattern[0] == '\\') 393 pattern++; 394 395 uint32 first = utf8ToUnicode(&pattern); 396 397 // Does this character match, or is this a range? 398 if (first == c) { 399 matched = true; 400 break; 401 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 402 pattern++; 403 404 if (pattern[0] == '\\') { 405 pattern++; 406 if (!pattern[0]) 407 return MATCH_BAD_PATTERN; 408 } 409 uint32 last = utf8ToUnicode(&pattern); 410 411 if (c >= first && c <= last) { 412 matched = true; 413 break; 414 } 415 } 416 } 417 418 if (invert) 419 matched = !matched; 420 421 if (matched) { 422 while (pattern[0] != ']') { 423 if (!pattern[0]) 424 return MATCH_BAD_PATTERN; 425 pattern++; 426 } 427 pattern++; 428 break; 429 } 430 return NO_MATCH; 431 } 432 433 case '\\': 434 if (!pattern[0]) 435 return MATCH_BAD_PATTERN; 436 // supposed to fall through 437 default: 438 if (pattern[-1] != string[0]) 439 return NO_MATCH; 440 string++; 441 break; 442 } 443 } 444 445 if (string[0]) 446 return NO_MATCH; 447 448 return MATCH_OK; 449 } 450 451 452 // #pragma mark - 453 454 455 Equation::Equation(char **expr) 456 : Term(OP_EQUATION), 457 fAttribute(NULL), 458 fString(NULL), 459 fType(0), 460 fIsPattern(false) 461 { 462 char *string = *expr; 463 char *start = string; 464 char *end = NULL; 465 466 // Since the equation is the integral part of any query, we're just parsing 467 // the whole thing here. 468 // The whitespace at the start is already removed in Expression::ParseEquation() 469 470 if (*start == '"' || *start == '\'') { 471 // string is quoted (start has to be on the beginning of a string) 472 if (ParseQuotedString(&start, &end) < B_OK) 473 return; 474 475 // set string to a valid start of the equation symbol 476 string = end + 2; 477 skipWhitespace(&string); 478 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 479 *expr = string; 480 return; 481 } 482 } else { 483 // search the (in)equation for the actual equation symbol (and for other operators 484 // in case the equation is malformed) 485 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 486 && *string != '&' && *string != '|') 487 string++; 488 489 // get the attribute string (and trim whitespace), in case 490 // the string was not quoted 491 end = string - 1; 492 skipWhitespaceReverse(&end, start); 493 } 494 495 // attribute string is empty (which is not allowed) 496 if (start > end) 497 return; 498 499 // at this point, "start" points to the beginning of the string, "end" points 500 // to the last character of the string, and "string" points to the first 501 // character of the equation symbol 502 503 // test for the right symbol (as this doesn't need any memory) 504 switch (*string) { 505 case '=': 506 fOp = OP_EQUAL; 507 break; 508 case '>': 509 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 510 break; 511 case '<': 512 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 513 break; 514 case '!': 515 if (*(string + 1) != '=') 516 return; 517 fOp = OP_UNEQUAL; 518 break; 519 520 // any invalid characters will be rejected 521 default: 522 *expr = string; 523 return; 524 } 525 // lets change "start" to point to the first character after the symbol 526 if (*(string + 1) == '=') 527 string++; 528 string++; 529 skipWhitespace(&string); 530 531 // allocate & copy the attribute string 532 533 fAttribute = CopyString(start, end); 534 if (fAttribute == NULL) 535 return; 536 537 start = string; 538 if (*start == '"' || *start == '\'') { 539 // string is quoted (start has to be on the beginning of a string) 540 if (ParseQuotedString(&start, &end) < B_OK) 541 return; 542 543 string = end + 2; 544 skipWhitespace(&string); 545 } else { 546 while (*string && *string != '&' && *string != '|' && *string != ')') 547 string++; 548 549 end = string - 1; 550 skipWhitespaceReverse(&end, start); 551 } 552 553 // at this point, "start" will point to the first character of the value, 554 // "end" will point to its last character, and "start" to the first non- 555 // whitespace character after the value string 556 557 fString = CopyString(start, end); 558 if (fString == NULL) 559 return; 560 561 // patterns are only allowed for these operations (and strings) 562 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 563 fIsPattern = isPattern(fString); 564 if (fIsPattern && isValidPattern(fString) < B_OK) { 565 // we only want to have valid patterns; setting fString 566 // to NULL will cause InitCheck() to fail 567 free(fString); 568 fString = NULL; 569 } 570 } 571 572 // The special time flag is set if the time values are shifted 573 // 64-bit values to reduce the number of duplicates. 574 // We have to be able to compare them against unshifted values 575 // later. The only index which needs this is the last_modified 576 // index, but we may want to open that feature for other indices, 577 // too one day. 578 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 579 580 *expr = string; 581 } 582 583 584 Equation::~Equation() 585 { 586 if (fAttribute != NULL) 587 free(fAttribute); 588 if (fString != NULL) 589 free(fString); 590 } 591 592 593 status_t 594 Equation::InitCheck() 595 { 596 if (fAttribute == NULL 597 || fString == NULL 598 || fOp == OP_NONE) 599 return B_BAD_VALUE; 600 601 return B_OK; 602 } 603 604 605 status_t 606 Equation::ParseQuotedString(char **_start, char **_end) 607 { 608 char *start = *_start; 609 char quote = *start++; 610 char *end = start; 611 612 for (;*end && *end != quote;end++) { 613 if (*end == '\\') 614 end++; 615 } 616 if (*end == '\0') 617 return B_BAD_VALUE; 618 619 *_start = start; 620 *_end = end - 1; 621 622 return B_OK; 623 } 624 625 626 char * 627 Equation::CopyString(char *start, char *end) 628 { 629 // end points to the last character of the string - and the length 630 // also has to include the null-termination 631 int32 length = end + 2 - start; 632 // just to make sure; since that's the max. attribute name length and 633 // the max. string in an index, it make sense to have it that way 634 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 635 return NULL; 636 637 char *copy = (char *)malloc(length); 638 if (copy == NULL) 639 return NULL; 640 641 memcpy(copy,start,length - 1); 642 copy[length - 1] = '\0'; 643 644 return copy; 645 } 646 647 648 status_t 649 Equation::ConvertValue(type_code type) 650 { 651 // Has the type already been converted? 652 if (type == fType) 653 return B_OK; 654 655 char *string = fString; 656 657 switch (type) { 658 case B_MIME_STRING_TYPE: 659 type = B_STRING_TYPE; 660 // supposed to fall through 661 case B_STRING_TYPE: 662 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 663 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 664 fSize = strlen(fValue.String); 665 break; 666 case B_INT32_TYPE: 667 fValue.Int32 = strtol(string, &string, 0); 668 fSize = sizeof(int32); 669 break; 670 case B_UINT32_TYPE: 671 fValue.Int32 = strtoul(string, &string, 0); 672 fSize = sizeof(uint32); 673 break; 674 case B_INT64_TYPE: 675 fValue.Int64 = strtoll(string, &string, 0); 676 fSize = sizeof(int64); 677 break; 678 case B_UINT64_TYPE: 679 fValue.Uint64 = strtoull(string, &string, 0); 680 fSize = sizeof(uint64); 681 break; 682 case B_FLOAT_TYPE: 683 fValue.Float = strtod(string, &string); 684 fSize = sizeof(float); 685 break; 686 case B_DOUBLE_TYPE: 687 fValue.Double = strtod(string, &string); 688 fSize = sizeof(double); 689 break; 690 default: 691 FATAL(("query value conversion to 0x%x requested!\n", (int)type)); 692 // should we fail here or just do a safety int32 conversion? 693 return B_ERROR; 694 } 695 696 fType = type; 697 698 // patterns are only allowed for string types 699 if (fType != B_STRING_TYPE && fIsPattern) 700 fIsPattern = false; 701 702 return B_OK; 703 } 704 705 706 /** Returns true when the key matches the equation. You have to 707 * call ConvertValue() before this one. 708 */ 709 710 bool 711 Equation::CompareTo(const uint8 *value, uint16 size) 712 { 713 int32 compare; 714 715 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 716 if (fIsPattern) { 717 // we have already validated the pattern, so we don't check for failing 718 // here - if something is broken, and matchString() returns an error, 719 // we just don't match 720 compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1; 721 } else if (fIsSpecialTime) { 722 // the index is a shifted int64 index, but we have to match 723 // against an unshifted value (i.e. the last_modified index) 724 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 725 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64)); 726 } else 727 compare = compareKeys(fType, value, size, Value(), fSize); 728 729 switch (fOp) { 730 case OP_EQUAL: 731 return compare == 0; 732 case OP_UNEQUAL: 733 return compare != 0; 734 case OP_LESS_THAN: 735 return compare < 0; 736 case OP_LESS_THAN_OR_EQUAL: 737 return compare <= 0; 738 case OP_GREATER_THAN: 739 return compare > 0; 740 case OP_GREATER_THAN_OR_EQUAL: 741 return compare >= 0; 742 } 743 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 744 return false; 745 } 746 747 748 void 749 Equation::Complement() 750 { 751 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 752 FATAL(("op out of range!")); 753 return; 754 }); 755 756 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 757 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 758 fOp = complementOp[fOp - OP_EQUAL]; 759 } 760 761 762 status_t 763 Equation::MatchEmptyString() 764 { 765 // there is no matching attribute, we will just bail out if we 766 // already know that our value is not of a string type. 767 // If not, it will be converted to a string - and then be compared with "". 768 // That's why we have to call ConvertValue() here - but it will be 769 // a cheap call for the next time 770 // Should we do this only for OP_UNEQUAL? 771 if (fType != 0 && fType != B_STRING_TYPE) 772 return NO_MATCH; 773 774 status_t status = ConvertValue(B_STRING_TYPE); 775 if (status == B_OK) 776 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH; 777 778 return status; 779 } 780 781 782 /** Matches the inode's attribute value with the equation. 783 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 784 */ 785 786 status_t 787 Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size) 788 { 789 // get a pointer to the attribute in question 790 NodeGetter nodeGetter(inode->GetVolume()); 791 union value value; 792 uint8 *buffer = (uint8 *)&value; 793 bool locked = false; 794 795 // first, check if we are matching for a live query and use that value 796 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 797 if (key == NULL) { 798 if (type == B_STRING_TYPE) 799 return MatchEmptyString(); 800 801 return NO_MATCH; 802 } 803 buffer = const_cast<uint8 *>(key); 804 } else if (!strcmp(fAttribute, "name")) { 805 // we need to lock before accessing Inode::Name() 806 nodeGetter.SetToNode(inode); 807 808 inode->SmallDataLock().Lock(); 809 locked = true; 810 811 // if not, check for "fake" attributes, "name", "size", "last_modified", 812 buffer = (uint8 *)inode->Name(nodeGetter.Node()); 813 if (buffer == NULL) { 814 inode->SmallDataLock().Unlock(); 815 return B_ERROR; 816 } 817 818 type = B_STRING_TYPE; 819 size = strlen((const char *)buffer); 820 } else if (!strcmp(fAttribute, "size")) { 821 #ifdef BFS_NATIVE_ENDIAN 822 buffer = (uint8 *)&inode->Node().data.size; 823 #else 824 value.Int64 = inode->Size(); 825 #endif 826 type = B_INT64_TYPE; 827 } else if (!strcmp(fAttribute, "last_modified")) { 828 #ifdef BFS_NATIVE_ENDIAN 829 buffer = (uint8 *)&inode->Node().last_modified_time; 830 #else 831 value.Int64 = inode->Node().LastModifiedTime(); 832 #endif 833 type = B_INT64_TYPE; 834 } else { 835 // then for attributes in the small_data section, and finally for the 836 // real attributes 837 nodeGetter.SetToNode(inode); 838 Inode *attribute; 839 840 inode->SmallDataLock().Lock(); 841 small_data *smallData = inode->FindSmallData(nodeGetter.Node(), fAttribute); 842 if (smallData != NULL) { 843 buffer = smallData->Data(); 844 type = smallData->type; 845 size = smallData->data_size; 846 locked = true; 847 } else { 848 // needed to unlock the small_data section as fast as possible 849 inode->SmallDataLock().Unlock(); 850 nodeGetter.Unset(); 851 852 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 853 buffer = (uint8 *)&value; 854 type = attribute->Type(); 855 size = attribute->Size(); 856 857 if (size > INODE_FILE_NAME_LENGTH) 858 size = INODE_FILE_NAME_LENGTH; 859 860 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 861 inode->ReleaseAttribute(attribute); 862 return B_IO_ERROR; 863 } 864 inode->ReleaseAttribute(attribute); 865 } else 866 return MatchEmptyString(); 867 } 868 } 869 // prepare own value for use, if it is possible to convert it 870 status_t status = ConvertValue(type); 871 if (status == B_OK) 872 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 873 874 if (locked) 875 inode->SmallDataLock().Unlock(); 876 877 RETURN_ERROR(status); 878 } 879 880 881 void 882 Equation::CalculateScore(Index &index) 883 { 884 // As always, these values could be tuned and refined. 885 // And the code could also need some real world testing :-) 886 887 // do we have to operate on a "foreign" index? 888 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 889 fScore = 0; 890 return; 891 } 892 893 // if we have a pattern, how much does it help our search? 894 if (fIsPattern) 895 fScore = getFirstPatternSymbol(fString) << 3; 896 else { 897 // Score by operator 898 if (fOp == OP_EQUAL) 899 // higher than pattern="255 chars+*" 900 fScore = 2048; 901 else 902 // the pattern search is regarded cheaper when you have at 903 // least one character to set your index to 904 fScore = 5; 905 } 906 907 // take index size into account (1024 is the current node size 908 // in our B+trees) 909 // 2048 * 2048 == 4194304 is the maximum score (for an empty 910 // tree, since the header + 1 node are already 2048 bytes) 911 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 912 } 913 914 915 status_t 916 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed) 917 { 918 status_t status = index.SetTo(fAttribute); 919 920 // if we should query attributes without an index, we can just proceed here 921 if (status < B_OK && !queryNonIndexed) 922 return B_ENTRY_NOT_FOUND; 923 924 type_code type; 925 926 // special case for OP_UNEQUAL - it will always operate through the whole index 927 // but we need the call to the original index to get the correct type 928 if (status < B_OK || fOp == OP_UNEQUAL) { 929 // Try to get an index that holds all files (name) 930 // Also sets the default type for all attributes without index 931 // to string. 932 type = status < B_OK ? B_STRING_TYPE : index.Type(); 933 934 if (index.SetTo("name") < B_OK) 935 return B_ENTRY_NOT_FOUND; 936 937 fHasIndex = false; 938 } else { 939 fHasIndex = true; 940 type = index.Type(); 941 } 942 943 if (ConvertValue(type) < B_OK) 944 return B_BAD_VALUE; 945 946 BPlusTree *tree; 947 if (index.Node()->GetTree(&tree) < B_OK) 948 return B_ERROR; 949 950 *iterator = new TreeIterator(tree); 951 if (*iterator == NULL) 952 return B_NO_MEMORY; 953 954 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 955 || fIsPattern) 956 && fHasIndex) { 957 // set iterator to the exact position 958 959 int32 keySize = index.KeySize(); 960 961 // at this point, fIsPattern is only true if it's a string type, and fOp 962 // is either OP_EQUAL or OP_UNEQUAL 963 if (fIsPattern) { 964 // let's see if we can use the beginning of the key for positioning 965 // the iterator and adjust the key size; if not, just leave the 966 // iterator at the start and return success 967 keySize = getFirstPatternSymbol(fString); 968 if (keySize <= 0) 969 return B_OK; 970 } 971 972 if (keySize == 0) { 973 // B_STRING_TYPE doesn't have a fixed length, so it was set 974 // to 0 before - we compute the correct value here 975 if (fType == B_STRING_TYPE) { 976 keySize = strlen(fValue.String); 977 978 // The empty string is a special case - we normally don't check 979 // for the trailing null byte, in the case for the empty string 980 // we do it explicitly, because there can't be keys in the B+tree 981 // with a length of zero 982 if (keySize == 0) 983 keySize = 1; 984 } else 985 RETURN_ERROR(B_ENTRY_NOT_FOUND); 986 } 987 988 if (fIsSpecialTime) { 989 // we have to find the first matching shifted value 990 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 991 status = (*iterator)->Find((uint8 *)&value, keySize); 992 if (status == B_ENTRY_NOT_FOUND) 993 return B_OK; 994 } else { 995 status = (*iterator)->Find(Value(), keySize); 996 if (fOp == OP_EQUAL && !fIsPattern) 997 return status; 998 else if (status == B_ENTRY_NOT_FOUND 999 && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 1000 return B_OK; 1001 } 1002 1003 RETURN_ERROR(status); 1004 } 1005 1006 return B_OK; 1007 } 1008 1009 1010 status_t 1011 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 1012 struct dirent *dirent, size_t bufferSize) 1013 { 1014 while (true) { 1015 union value indexValue; 1016 uint16 keyLength; 1017 uint16 duplicate; 1018 off_t offset; 1019 1020 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 1021 (uint16)sizeof(indexValue), &offset, &duplicate); 1022 if (status < B_OK) 1023 return status; 1024 1025 // only compare against the index entry when this is the correct 1026 // index for the equation 1027 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) { 1028 // They aren't equal? let the operation decide what to do 1029 // Since we always start at the beginning of the index (or the correct 1030 // position), only some needs to be stopped if the entry doesn't fit. 1031 if (fOp == OP_LESS_THAN 1032 || fOp == OP_LESS_THAN_OR_EQUAL 1033 || (fOp == OP_EQUAL && !fIsPattern)) 1034 return B_ENTRY_NOT_FOUND; 1035 1036 if (duplicate > 0) 1037 iterator->SkipDuplicates(); 1038 continue; 1039 } 1040 1041 Vnode vnode(volume, offset); 1042 Inode *inode; 1043 if ((status = vnode.Get(&inode)) != B_OK) { 1044 REPORT_ERROR(status); 1045 FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute)); 1046 // try with next 1047 continue; 1048 } 1049 1050 // ToDo: check user permissions here - but which one?! 1051 // we could filter out all those where we don't have 1052 // read access... (we should check for every parent 1053 // directory if the X_OK is allowed) 1054 // Although it's quite expensive to open all parents, 1055 // it's likely that the application that runs the 1056 // query will do something similar (and we don't have 1057 // to do it for root, either). 1058 1059 // go up in the tree until a &&-operator is found, and check if the 1060 // inode matches with the rest of the expression - we don't have to 1061 // check ||-operators for that 1062 Term *term = this; 1063 status = MATCH_OK; 1064 1065 if (!fHasIndex) 1066 status = Match(inode); 1067 1068 while (term != NULL && status == MATCH_OK) { 1069 Operator *parent = (Operator *)term->Parent(); 1070 if (parent == NULL) 1071 break; 1072 1073 if (parent->Op() == OP_AND) { 1074 // choose the other child of the parent 1075 Term *other = parent->Right(); 1076 if (other == term) 1077 other = parent->Left(); 1078 1079 if (other == NULL) { 1080 FATAL(("&&-operator has only one child... (parent = %p)\n", parent)); 1081 break; 1082 } 1083 status = other->Match(inode); 1084 if (status < 0) { 1085 REPORT_ERROR(status); 1086 status = NO_MATCH; 1087 } 1088 } 1089 term = (Term *)parent; 1090 } 1091 1092 if (status == MATCH_OK) { 1093 dirent->d_dev = volume->ID(); 1094 dirent->d_ino = offset; 1095 dirent->d_pdev = volume->ID(); 1096 dirent->d_pino = volume->ToVnode(inode->Parent()); 1097 1098 if (inode->GetName(dirent->d_name) < B_OK) 1099 FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber())); 1100 1101 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1102 } 1103 1104 if (status == MATCH_OK) 1105 return B_OK; 1106 } 1107 RETURN_ERROR(B_ERROR); 1108 } 1109 1110 1111 // #pragma mark - 1112 1113 1114 Operator::Operator(Term *left, int8 op, Term *right) 1115 : Term(op), 1116 fLeft(left), 1117 fRight(right) 1118 { 1119 if (left) 1120 left->SetParent(this); 1121 if (right) 1122 right->SetParent(this); 1123 } 1124 1125 1126 Operator::~Operator() 1127 { 1128 delete fLeft; 1129 delete fRight; 1130 } 1131 1132 1133 status_t 1134 Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size) 1135 { 1136 if (fOp == OP_AND) { 1137 status_t status = fLeft->Match(inode, attribute, type, key, size); 1138 if (status != MATCH_OK) 1139 return status; 1140 1141 return fRight->Match(inode, attribute, type, key, size); 1142 } else { 1143 // choose the term with the better score for OP_OR 1144 if (fRight->Score() > fLeft->Score()) { 1145 status_t status = fRight->Match(inode, attribute, type, key, size); 1146 if (status != NO_MATCH) 1147 return status; 1148 } 1149 return fLeft->Match(inode, attribute, type, key, size); 1150 } 1151 } 1152 1153 1154 void 1155 Operator::Complement() 1156 { 1157 if (fOp == OP_AND) 1158 fOp = OP_OR; 1159 else 1160 fOp = OP_AND; 1161 1162 fLeft->Complement(); 1163 fRight->Complement(); 1164 } 1165 1166 1167 void 1168 Operator::CalculateScore(Index &index) 1169 { 1170 fLeft->CalculateScore(index); 1171 fRight->CalculateScore(index); 1172 } 1173 1174 1175 int32 1176 Operator::Score() const 1177 { 1178 if (fOp == OP_AND) { 1179 // return the one with the better score 1180 if (fRight->Score() > fLeft->Score()) 1181 return fRight->Score(); 1182 1183 return fLeft->Score(); 1184 } 1185 1186 // for OP_OR, be honest, and return the one with the worse score 1187 if (fRight->Score() < fLeft->Score()) 1188 return fRight->Score(); 1189 1190 return fLeft->Score(); 1191 } 1192 1193 1194 status_t 1195 Operator::InitCheck() 1196 { 1197 if ((fOp != OP_AND && fOp != OP_OR) 1198 || fLeft == NULL || fLeft->InitCheck() < B_OK 1199 || fRight == NULL || fRight->InitCheck() < B_OK) 1200 return B_ERROR; 1201 1202 return B_OK; 1203 } 1204 1205 1206 #if 0 1207 Term * 1208 Operator::Copy() const 1209 { 1210 if (fEquation != NULL) { 1211 Equation *equation = new Equation(*fEquation); 1212 if (equation == NULL) 1213 return NULL; 1214 1215 Term *term = new Term(equation); 1216 if (term == NULL) 1217 delete equation; 1218 1219 return term; 1220 } 1221 1222 Term *left = NULL, *right = NULL; 1223 1224 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1225 return NULL; 1226 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1227 delete left; 1228 return NULL; 1229 } 1230 1231 Term *term = new Term(left,fOp,right); 1232 if (term == NULL) { 1233 delete left; 1234 delete right; 1235 return NULL; 1236 } 1237 return term; 1238 } 1239 #endif 1240 1241 1242 // #pragma mark - 1243 1244 #ifdef DEBUG 1245 void 1246 Operator::PrintToStream() 1247 { 1248 D(__out("( ")); 1249 if (fLeft != NULL) 1250 fLeft->PrintToStream(); 1251 1252 char *op; 1253 switch (fOp) { 1254 case OP_OR: op = "OR"; break; 1255 case OP_AND: op = "AND"; break; 1256 default: op = "?"; break; 1257 } 1258 D(__out(" %s ",op)); 1259 1260 if (fRight != NULL) 1261 fRight->PrintToStream(); 1262 1263 D(__out(" )")); 1264 } 1265 1266 1267 void 1268 Equation::PrintToStream() 1269 { 1270 char *symbol = "???"; 1271 switch (fOp) { 1272 case OP_EQUAL: symbol = "=="; break; 1273 case OP_UNEQUAL: symbol = "!="; break; 1274 case OP_GREATER_THAN: symbol = ">"; break; 1275 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1276 case OP_LESS_THAN: symbol = "<"; break; 1277 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1278 } 1279 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1280 } 1281 1282 #endif /* DEBUG */ 1283 1284 // #pragma mark - 1285 1286 1287 Expression::Expression(char *expr) 1288 { 1289 if (expr == NULL) 1290 return; 1291 1292 fTerm = ParseOr(&expr); 1293 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1294 FATAL(("Corrupt tree in expression!\n")); 1295 delete fTerm; 1296 fTerm = NULL; 1297 } 1298 D(if (fTerm != NULL) { 1299 fTerm->PrintToStream(); 1300 D(__out("\n")); 1301 if (*expr != '\0') 1302 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1303 }); 1304 fPosition = expr; 1305 } 1306 1307 1308 Expression::~Expression() 1309 { 1310 delete fTerm; 1311 } 1312 1313 1314 Term * 1315 Expression::ParseEquation(char **expr) 1316 { 1317 skipWhitespace(expr); 1318 1319 bool _not = false; 1320 if (**expr == '!') { 1321 skipWhitespace(expr, 1); 1322 if (**expr != '(') 1323 return NULL; 1324 1325 _not = true; 1326 } 1327 1328 if (**expr == ')') { 1329 // shouldn't be handled here 1330 return NULL; 1331 } else if (**expr == '(') { 1332 skipWhitespace(expr, 1); 1333 1334 Term *term = ParseOr(expr); 1335 1336 skipWhitespace(expr); 1337 1338 if (**expr != ')') { 1339 delete term; 1340 return NULL; 1341 } 1342 1343 // If the term is negated, we just complement the tree, to get 1344 // rid of the not, a.k.a. DeMorgan's Law. 1345 if (_not) 1346 term->Complement(); 1347 1348 skipWhitespace(expr, 1); 1349 1350 return term; 1351 } 1352 1353 Equation *equation = new Equation(expr); 1354 if (equation == NULL || equation->InitCheck() < B_OK) { 1355 delete equation; 1356 return NULL; 1357 } 1358 return equation; 1359 } 1360 1361 1362 Term * 1363 Expression::ParseAnd(char **expr) 1364 { 1365 Term *left = ParseEquation(expr); 1366 if (left == NULL) 1367 return NULL; 1368 1369 while (IsOperator(expr,'&')) { 1370 Term *right = ParseAnd(expr); 1371 Term *newParent = NULL; 1372 1373 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1374 delete left; 1375 delete right; 1376 1377 return NULL; 1378 } 1379 left = newParent; 1380 } 1381 1382 return left; 1383 } 1384 1385 1386 Term * 1387 Expression::ParseOr(char **expr) 1388 { 1389 Term *left = ParseAnd(expr); 1390 if (left == NULL) 1391 return NULL; 1392 1393 while (IsOperator(expr,'|')) { 1394 Term *right = ParseAnd(expr); 1395 Term *newParent = NULL; 1396 1397 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1398 delete left; 1399 delete right; 1400 1401 return NULL; 1402 } 1403 left = newParent; 1404 } 1405 1406 return left; 1407 } 1408 1409 1410 bool 1411 Expression::IsOperator(char **expr, char op) 1412 { 1413 char *string = *expr; 1414 1415 if (*string == op && *(string + 1) == op) { 1416 *expr += 2; 1417 return true; 1418 } 1419 return false; 1420 } 1421 1422 1423 status_t 1424 Expression::InitCheck() 1425 { 1426 if (fTerm == NULL) 1427 return B_BAD_VALUE; 1428 1429 return B_OK; 1430 } 1431 1432 1433 // #pragma mark - 1434 1435 1436 Query::Query(Volume *volume, Expression *expression, uint32 flags) 1437 : 1438 fVolume(volume), 1439 fExpression(expression), 1440 fCurrent(NULL), 1441 fIterator(NULL), 1442 fIndex(volume), 1443 fFlags(flags), 1444 fPort(-1) 1445 { 1446 // if the expression has a valid root pointer, the whole tree has 1447 // already passed the sanity check, so that we don't have to check 1448 // every pointer 1449 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1450 return; 1451 1452 // create index on the stack and delete it afterwards 1453 fExpression->Root()->CalculateScore(fIndex); 1454 fIndex.Unset(); 1455 1456 Rewind(); 1457 1458 if (fFlags & B_LIVE_QUERY) 1459 volume->AddQuery(this); 1460 } 1461 1462 1463 Query::~Query() 1464 { 1465 if (fFlags & B_LIVE_QUERY) 1466 fVolume->RemoveQuery(this); 1467 } 1468 1469 1470 status_t 1471 Query::Rewind() 1472 { 1473 // free previous stuff 1474 1475 fStack.MakeEmpty(); 1476 1477 delete fIterator; 1478 fIterator = NULL; 1479 fCurrent = NULL; 1480 1481 // put the whole expression on the stack 1482 1483 Stack<Term *> stack; 1484 stack.Push(fExpression->Root()); 1485 1486 Term *term; 1487 while (stack.Pop(&term)) { 1488 if (term->Op() < OP_EQUATION) { 1489 Operator *op = (Operator *)term; 1490 1491 if (op->Op() == OP_OR) { 1492 stack.Push(op->Left()); 1493 stack.Push(op->Right()); 1494 } else { 1495 // For OP_AND, we can use the scoring system to decide which path to add 1496 if (op->Right()->Score() > op->Left()->Score()) 1497 stack.Push(op->Right()); 1498 else 1499 stack.Push(op->Left()); 1500 } 1501 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1502 FATAL(("Unknown term on stack or stack error")); 1503 } 1504 1505 return B_OK; 1506 } 1507 1508 1509 status_t 1510 Query::GetNextEntry(struct dirent *dirent, size_t size) 1511 { 1512 // If we don't have an equation to use yet/anymore, get a new one 1513 // from the stack 1514 while (true) { 1515 if (fIterator == NULL) { 1516 if (!fStack.Pop(&fCurrent) 1517 || fCurrent == NULL 1518 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator, 1519 fFlags & B_QUERY_NON_INDEXED) < B_OK) 1520 return B_ENTRY_NOT_FOUND; 1521 } 1522 if (fCurrent == NULL) 1523 RETURN_ERROR(B_ERROR); 1524 1525 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size); 1526 if (status < B_OK) { 1527 delete fIterator; 1528 fIterator = NULL; 1529 fCurrent = NULL; 1530 } else { 1531 // only return if we have another entry 1532 return B_OK; 1533 } 1534 } 1535 } 1536 1537 1538 void 1539 Query::SetLiveMode(port_id port, int32 token) 1540 { 1541 fPort = port; 1542 fToken = token; 1543 1544 if ((fFlags & B_LIVE_QUERY) == 0) { 1545 // you can decide at any point to set the live query mode, 1546 // only live queries have to be updated by attribute changes 1547 fFlags |= B_LIVE_QUERY; 1548 fVolume->AddQuery(this); 1549 } 1550 } 1551 1552 1553 void 1554 Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 1555 size_t oldLength, const uint8 *newKey, size_t newLength) 1556 { 1557 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1558 return; 1559 1560 // ToDo: check if the attribute is part of the query at all... 1561 1562 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength); 1563 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength); 1564 1565 const char *name = NULL; 1566 bool entryCreated; 1567 1568 if (oldStatus != MATCH_OK) { 1569 if (newStatus != MATCH_OK) { 1570 // nothing has changed 1571 return; 1572 } 1573 entryCreated = true; 1574 } else if (newStatus != MATCH_OK) { 1575 // entry got removed 1576 entryCreated = false; 1577 } else { 1578 // the entry stays in the query - only notify in case the name of the inode was changed 1579 if (oldKey == NULL || strcmp(attribute, "name")) 1580 return; 1581 1582 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1583 fVolume->ToVnode(inode->Parent()), (const char *)oldKey, inode->ID()); 1584 name = (const char *)newKey; 1585 entryCreated = true; 1586 } 1587 1588 // we may need to get the name of the inode 1589 1590 char nameBuffer[B_FILE_NAME_LENGTH]; 1591 1592 if (name == NULL) { 1593 if (strcmp(attribute, "name")) { 1594 if (inode->GetName(nameBuffer) != B_OK) 1595 nameBuffer[0] = '\0'; 1596 name = nameBuffer; 1597 } else { 1598 // a shortcut to prevent having to scan the attribute section 1599 name = (const char *)newKey; 1600 } 1601 } 1602 1603 // notify query listeners 1604 1605 if (entryCreated) { 1606 notify_query_entry_created(fPort, fToken, fVolume->ID(), 1607 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1608 } else { 1609 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1610 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1611 } 1612 } 1613 1614