1 /* Query - query parsing and evaluation 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** The pattern matching is roughly based on code originally written 5 ** by J. Kercheval, and on code written by Kenneth Almquist, though 6 ** it shares no code. 7 ** 8 ** This file may be used under the terms of the OpenBeOS License. 9 */ 10 11 12 #include "Query.h" 13 #include "bfs.h" 14 #include "Debug.h" 15 #include "Stack.h" 16 #include "Volume.h" 17 #include "Inode.h" 18 #include "BPlusTree.h" 19 #include "Index.h" 20 21 #include <kernel_cpp.h> 22 #include <SupportDefs.h> 23 #include <TypeConstants.h> 24 #include <AppDefs.h> 25 #include <fs_query.h> 26 27 #include <malloc.h> 28 #include <stdio.h> 29 #include <string.h> 30 31 32 // The parser has a very static design, but it will do what is required. 33 // 34 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 35 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 36 // Apparently, the "!" (not) can only be used with brackets. 37 // 38 // If you think that there are too few NULL pointer checks in some places 39 // of the code, just read the beginning of the query constructor. 40 // The API is not fully available, just the Query and the Expression class 41 // are. 42 43 44 enum ops { 45 OP_NONE, 46 47 OP_AND, 48 OP_OR, 49 50 OP_EQUATION, 51 52 OP_EQUAL, 53 OP_UNEQUAL, 54 OP_GREATER_THAN, 55 OP_LESS_THAN, 56 OP_GREATER_THAN_OR_EQUAL, 57 OP_LESS_THAN_OR_EQUAL, 58 }; 59 60 enum match { 61 NO_MATCH = 0, 62 MATCH_OK = 1, 63 64 MATCH_BAD_PATTERN = -2, 65 MATCH_INVALID_CHARACTER 66 }; 67 68 // return values from isValidPattern() 69 enum { 70 PATTERN_INVALID_ESCAPE = -3, 71 PATTERN_INVALID_RANGE, 72 PATTERN_INVALID_SET 73 }; 74 75 union value { 76 int64 Int64; 77 uint64 Uint64; 78 int32 Int32; 79 uint32 Uint32; 80 float Float; 81 double Double; 82 char String[INODE_FILE_NAME_LENGTH]; 83 }; 84 85 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 86 // don't need the whole file here; the type can't change anyway 87 #ifndef _MIME_H 88 # define B_MIME_STRING_TYPE 'MIMS' 89 #endif 90 91 class Term { 92 public: 93 Term(int8 op) : fOp(op), fParent(NULL) {} 94 virtual ~Term() {} 95 96 int8 Op() const { return fOp; } 97 98 void SetParent(Term *parent) { fParent = parent; } 99 Term *Parent() const { return fParent; } 100 101 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 102 const uint8 *key = NULL,size_t size = 0) = 0; 103 virtual void Complement() = 0; 104 105 virtual void CalculateScore(Index &index) = 0; 106 virtual int32 Score() const = 0; 107 108 virtual status_t InitCheck() = 0; 109 110 #ifdef DEBUG 111 virtual void PrintToStream() = 0; 112 #endif 113 114 protected: 115 int8 fOp; 116 Term *fParent; 117 }; 118 119 // Although an Equation object is quite independent from the volume on which 120 // the query is run, there are some dependencies that are produced while 121 // querying: 122 // The type/size of the value, the score, and if it has an index or not. 123 // So you could run more than one query on the same volume, but it might return 124 // wrong values when it runs concurrently on another volume. 125 // That's not an issue right now, because we run single-threaded and don't use 126 // queries more than once. 127 128 class Equation : public Term { 129 public: 130 Equation(char **expr); 131 virtual ~Equation(); 132 133 virtual status_t InitCheck(); 134 135 status_t ParseQuotedString(char **_start, char **_end); 136 char *CopyString(char *start, char *end); 137 138 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 139 const uint8 *key = NULL, size_t size = 0); 140 virtual void Complement(); 141 142 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator, 143 bool queryNonIndexed); 144 status_t GetNextMatching(Volume *volume, TreeIterator *iterator, 145 struct dirent *dirent, size_t bufferSize); 146 147 virtual void CalculateScore(Index &index); 148 virtual int32 Score() const { return fScore; } 149 150 #ifdef DEBUG 151 virtual void PrintToStream(); 152 #endif 153 154 private: 155 Equation(const Equation &); 156 Equation &operator=(const Equation &); 157 // no implementation 158 159 status_t ConvertValue(type_code type); 160 bool CompareTo(const uint8 *value, uint16 size); 161 uint8 *Value() const { return (uint8 *)&fValue; } 162 status_t MatchEmptyString(); 163 164 char *fAttribute; 165 char *fString; 166 union value fValue; 167 type_code fType; 168 size_t fSize; 169 bool fIsPattern; 170 bool fIsSpecialTime; 171 172 int32 fScore; 173 bool fHasIndex; 174 }; 175 176 class Operator : public Term { 177 public: 178 Operator(Term *,int8,Term *); 179 virtual ~Operator(); 180 181 Term *Left() const { return fLeft; } 182 Term *Right() const { return fRight; } 183 184 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 185 const uint8 *key = NULL, size_t size = 0); 186 virtual void Complement(); 187 188 virtual void CalculateScore(Index &index); 189 virtual int32 Score() const; 190 191 virtual status_t InitCheck(); 192 193 //Term *Copy() const; 194 #ifdef DEBUG 195 virtual void PrintToStream(); 196 #endif 197 198 private: 199 Operator(const Operator &); 200 Operator &operator=(const Operator &); 201 // no implementation 202 203 Term *fLeft,*fRight; 204 }; 205 206 207 //--------------------------------- 208 209 210 void 211 skipWhitespace(char **expr, int32 skip = 0) 212 { 213 char *string = (*expr) + skip; 214 while (*string == ' ' || *string == '\t') string++; 215 *expr = string; 216 } 217 218 219 void 220 skipWhitespaceReverse(char **expr,char *stop) 221 { 222 char *string = *expr; 223 while (string > stop && (*string == ' ' || *string == '\t')) string--; 224 *expr = string; 225 } 226 227 228 // #pragma mark - 229 230 231 uint32 232 utf8ToUnicode(char **string) 233 { 234 uint8 *bytes = (uint8 *)*string; 235 int32 length; 236 uint8 mask = 0x1f; 237 238 switch (bytes[0] & 0xf0) { 239 case 0xc0: 240 case 0xd0: length = 2; break; 241 case 0xe0: length = 3; break; 242 case 0xf0: 243 mask = 0x0f; 244 length = 4; 245 break; 246 default: 247 // valid 1-byte character 248 // and invalid characters 249 (*string)++; 250 return bytes[0]; 251 } 252 uint32 c = bytes[0] & mask; 253 int32 i = 1; 254 for (;i < length && (bytes[i] & 0x80) > 0;i++) 255 c = (c << 6) | (bytes[i] & 0x3f); 256 257 if (i < length) { 258 // invalid character 259 (*string)++; 260 return (uint32)bytes[0]; 261 } 262 *string += length; 263 return c; 264 } 265 266 267 int32 268 getFirstPatternSymbol(char *string) 269 { 270 char c; 271 272 for (int32 index = 0;(c = *string++);index++) { 273 if (c == '*' || c == '?' || c == '[') 274 return index; 275 } 276 return -1; 277 } 278 279 280 bool 281 isPattern(char *string) 282 { 283 return getFirstPatternSymbol(string) >= 0 ? true : false; 284 } 285 286 287 status_t 288 isValidPattern(char *pattern) 289 { 290 while (*pattern) { 291 switch (*pattern++) { 292 case '\\': 293 // the escape character must not be at the end of the pattern 294 if (!*pattern++) 295 return PATTERN_INVALID_ESCAPE; 296 break; 297 298 case '[': 299 if (pattern[0] == ']' || !pattern[0]) 300 return PATTERN_INVALID_SET; 301 302 while (*pattern != ']') { 303 if (*pattern == '\\' && !*++pattern) 304 return PATTERN_INVALID_ESCAPE; 305 306 if (!*pattern) 307 return PATTERN_INVALID_SET; 308 309 if (pattern[0] == '-' && pattern[1] == '-') 310 return PATTERN_INVALID_RANGE; 311 312 pattern++; 313 } 314 break; 315 } 316 } 317 return B_OK; 318 } 319 320 321 /** Matches the string against the given wildcard pattern. 322 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 323 * or values < 0 (see enum at the top of Query.cpp) if an error 324 * occurs 325 */ 326 327 status_t 328 matchString(char *pattern, char *string) 329 { 330 while (*pattern) { 331 // end of string == valid end of pattern? 332 if (!string[0]) { 333 while (pattern[0] == '*') 334 pattern++; 335 return !pattern[0] ? MATCH_OK : NO_MATCH; 336 } 337 338 switch (*pattern++) { 339 case '?': 340 { 341 // match exactly one UTF-8 character; we are 342 // not interested in the result 343 utf8ToUnicode(&string); 344 break; 345 } 346 347 case '*': 348 { 349 // compact pattern 350 while (true) { 351 if (pattern[0] == '?') { 352 if (!*++string) 353 return NO_MATCH; 354 } else if (pattern[0] != '*') 355 break; 356 357 pattern++; 358 } 359 360 // if the pattern is done, we have matched the string 361 if (!pattern[0]) 362 return MATCH_OK; 363 364 while(true) { 365 // we have removed all occurences of '*' and '?' 366 if (pattern[0] == string[0] 367 || pattern[0] == '[' 368 || pattern[0] == '\\') { 369 status_t status = matchString(pattern,string); 370 if (status < B_OK || status == MATCH_OK) 371 return status; 372 } 373 374 // we could be nice here and just jump to the next 375 // UTF-8 character - but we wouldn't gain that much 376 // and it'd be slower (since we're checking for 377 // equality before entering the recursion) 378 if (!*++string) 379 return NO_MATCH; 380 } 381 break; 382 } 383 384 case '[': 385 { 386 bool invert = false; 387 if (pattern[0] == '^' || pattern[0] == '!') { 388 invert = true; 389 pattern++; 390 } 391 392 if (!pattern[0] || pattern[0] == ']') 393 return MATCH_BAD_PATTERN; 394 395 uint32 c = utf8ToUnicode(&string); 396 bool matched = false; 397 398 while (pattern[0] != ']') { 399 if (!pattern[0]) 400 return MATCH_BAD_PATTERN; 401 402 if (pattern[0] == '\\') 403 pattern++; 404 405 uint32 first = utf8ToUnicode(&pattern); 406 407 // Does this character match, or is this a range? 408 if (first == c) { 409 matched = true; 410 break; 411 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 412 pattern++; 413 414 if (pattern[0] == '\\') { 415 pattern++; 416 if (!pattern[0]) 417 return MATCH_BAD_PATTERN; 418 } 419 uint32 last = utf8ToUnicode(&pattern); 420 421 if (c >= first && c <= last) { 422 matched = true; 423 break; 424 } 425 } 426 } 427 428 if (invert) 429 matched = !matched; 430 431 if (matched) { 432 while (pattern[0] != ']') { 433 if (!pattern[0]) 434 return MATCH_BAD_PATTERN; 435 pattern++; 436 } 437 pattern++; 438 break; 439 } 440 return NO_MATCH; 441 } 442 443 case '\\': 444 if (!pattern[0]) 445 return MATCH_BAD_PATTERN; 446 // supposed to fall through 447 default: 448 if (pattern[-1] != string[0]) 449 return NO_MATCH; 450 string++; 451 break; 452 } 453 } 454 455 if (string[0]) 456 return NO_MATCH; 457 458 return MATCH_OK; 459 } 460 461 462 // #pragma mark - 463 464 465 Equation::Equation(char **expr) 466 : Term(OP_EQUATION), 467 fAttribute(NULL), 468 fString(NULL), 469 fType(0), 470 fIsPattern(false) 471 { 472 char *string = *expr; 473 char *start = string; 474 char *end = NULL; 475 476 // Since the equation is the integral part of any query, we're just parsing 477 // the whole thing here. 478 // The whitespace at the start is already removed in Expression::ParseEquation() 479 480 if (*start == '"' || *start == '\'') { 481 // string is quoted (start has to be on the beginning of a string) 482 if (ParseQuotedString(&start, &end) < B_OK) 483 return; 484 485 // set string to a valid start of the equation symbol 486 string = end + 2; 487 skipWhitespace(&string); 488 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 489 *expr = string; 490 return; 491 } 492 } else { 493 // search the (in)equation for the actual equation symbol (and for other operators 494 // in case the equation is malformed) 495 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 496 && *string != '&' && *string != '|') 497 string++; 498 499 // get the attribute string (and trim whitespace), in case 500 // the string was not quoted 501 end = string - 1; 502 skipWhitespaceReverse(&end, start); 503 } 504 505 // attribute string is empty (which is not allowed) 506 if (start > end) 507 return; 508 509 // at this point, "start" points to the beginning of the string, "end" points 510 // to the last character of the string, and "string" points to the first 511 // character of the equation symbol 512 513 // test for the right symbol (as this doesn't need any memory) 514 switch (*string) { 515 case '=': 516 fOp = OP_EQUAL; 517 break; 518 case '>': 519 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 520 break; 521 case '<': 522 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 523 break; 524 case '!': 525 if (*(string + 1) != '=') 526 return; 527 fOp = OP_UNEQUAL; 528 break; 529 530 // any invalid characters will be rejected 531 default: 532 *expr = string; 533 return; 534 } 535 // lets change "start" to point to the first character after the symbol 536 if (*(string + 1) == '=') 537 string++; 538 string++; 539 skipWhitespace(&string); 540 541 // allocate & copy the attribute string 542 543 fAttribute = CopyString(start, end); 544 if (fAttribute == NULL) 545 return; 546 547 start = string; 548 if (*start == '"' || *start == '\'') { 549 // string is quoted (start has to be on the beginning of a string) 550 if (ParseQuotedString(&start, &end) < B_OK) 551 return; 552 553 string = end + 2; 554 skipWhitespace(&string); 555 } else { 556 while (*string && *string != '&' && *string != '|' && *string != ')') 557 string++; 558 559 end = string - 1; 560 skipWhitespaceReverse(&end, start); 561 } 562 563 // at this point, "start" will point to the first character of the value, 564 // "end" will point to its last character, and "start" to the first non- 565 // whitespace character after the value string 566 567 fString = CopyString(start, end); 568 if (fString == NULL) 569 return; 570 571 // patterns are only allowed for these operations (and strings) 572 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 573 fIsPattern = isPattern(fString); 574 if (fIsPattern && isValidPattern(fString) < B_OK) { 575 // we only want to have valid patterns; setting fString 576 // to NULL will cause InitCheck() to fail 577 free(fString); 578 fString = NULL; 579 } 580 } 581 582 // The special time flag is set if the time values are shifted 583 // 64-bit values to reduce the number of duplicates. 584 // We have to be able to compare them against unshifted values 585 // later. The only index which needs this is the last_modified 586 // index, but we may want to open that feature for other indices, 587 // too one day. 588 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 589 590 *expr = string; 591 } 592 593 594 Equation::~Equation() 595 { 596 if (fAttribute != NULL) 597 free(fAttribute); 598 if (fString != NULL) 599 free(fString); 600 } 601 602 603 status_t 604 Equation::InitCheck() 605 { 606 if (fAttribute == NULL 607 || fString == NULL 608 || fOp == OP_NONE) 609 return B_BAD_VALUE; 610 611 return B_OK; 612 } 613 614 615 status_t 616 Equation::ParseQuotedString(char **_start, char **_end) 617 { 618 char *start = *_start; 619 char quote = *start++; 620 char *end = start; 621 622 for (;*end && *end != quote;end++) { 623 if (*end == '\\') 624 end++; 625 } 626 if (*end == '\0') 627 return B_BAD_VALUE; 628 629 *_start = start; 630 *_end = end - 1; 631 632 return B_OK; 633 } 634 635 636 char * 637 Equation::CopyString(char *start, char *end) 638 { 639 // end points to the last character of the string - and the length 640 // also has to include the null-termination 641 int32 length = end + 2 - start; 642 // just to make sure; since that's the max. attribute name length and 643 // the max. string in an index, it make sense to have it that way 644 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 645 return NULL; 646 647 char *copy = (char *)malloc(length); 648 if (copy == NULL) 649 return NULL; 650 651 memcpy(copy,start,length - 1); 652 copy[length - 1] = '\0'; 653 654 return copy; 655 } 656 657 658 status_t 659 Equation::ConvertValue(type_code type) 660 { 661 // Has the type already been converted? 662 if (type == fType) 663 return B_OK; 664 665 char *string = fString; 666 667 switch (type) { 668 case B_MIME_STRING_TYPE: 669 type = B_STRING_TYPE; 670 // supposed to fall through 671 case B_STRING_TYPE: 672 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 673 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 674 fSize = strlen(fValue.String); 675 break; 676 case B_INT32_TYPE: 677 fValue.Int32 = strtol(string, &string, 0); 678 fSize = sizeof(int32); 679 break; 680 case B_UINT32_TYPE: 681 fValue.Int32 = strtoul(string, &string, 0); 682 fSize = sizeof(uint32); 683 break; 684 case B_INT64_TYPE: 685 fValue.Int64 = strtoll(string, &string, 0); 686 fSize = sizeof(int64); 687 break; 688 case B_UINT64_TYPE: 689 fValue.Uint64 = strtoull(string, &string, 0); 690 fSize = sizeof(uint64); 691 break; 692 case B_FLOAT_TYPE: 693 fValue.Float = strtod(string, &string); 694 fSize = sizeof(float); 695 break; 696 case B_DOUBLE_TYPE: 697 fValue.Double = strtod(string, &string); 698 fSize = sizeof(double); 699 break; 700 default: 701 FATAL(("query value conversion to 0x%lx requested!\n", type)); 702 // should we fail here or just do a safety int32 conversion? 703 return B_ERROR; 704 } 705 706 fType = type; 707 708 // patterns are only allowed for string types 709 if (fType != B_STRING_TYPE && fIsPattern) 710 fIsPattern = false; 711 712 return B_OK; 713 } 714 715 716 /** Returns true when the key matches the equation. You have to 717 * call ConvertValue() before this one. 718 */ 719 720 bool 721 Equation::CompareTo(const uint8 *value, uint16 size) 722 { 723 int32 compare; 724 725 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 726 if (fIsPattern) { 727 // we have already validated the pattern, so we don't check for failing 728 // here - if something is broken, and matchString() returns an error, 729 // we just don't match 730 compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1; 731 } else if (fIsSpecialTime) { 732 // the index is a shifted int64 index, but we have to match 733 // against an unshifted value (i.e. the last_modified index) 734 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 735 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64)); 736 } else 737 compare = compareKeys(fType, value, size, Value(), fSize); 738 739 switch (fOp) { 740 case OP_EQUAL: 741 return compare == 0; 742 case OP_UNEQUAL: 743 return compare != 0; 744 case OP_LESS_THAN: 745 return compare < 0; 746 case OP_LESS_THAN_OR_EQUAL: 747 return compare <= 0; 748 case OP_GREATER_THAN: 749 return compare > 0; 750 case OP_GREATER_THAN_OR_EQUAL: 751 return compare >= 0; 752 } 753 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 754 return false; 755 } 756 757 758 void 759 Equation::Complement() 760 { 761 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 762 FATAL(("op out of range!")); 763 return; 764 }); 765 766 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 767 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 768 fOp = complementOp[fOp - OP_EQUAL]; 769 } 770 771 772 status_t 773 Equation::MatchEmptyString() 774 { 775 // there is no matching attribute, we will just bail out if we 776 // already know that our value is not of a string type. 777 // If not, it will be converted to a string - and then be compared with "". 778 // That's why we have to call ConvertValue() here - but it will be 779 // a cheap call for the next time 780 // Should we do this only for OP_UNEQUAL? 781 if (fType != 0 && fType != B_STRING_TYPE) 782 return NO_MATCH; 783 784 status_t status = ConvertValue(B_STRING_TYPE); 785 if (status == B_OK) 786 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH; 787 788 return status; 789 } 790 791 792 /** Matches the inode's attribute value with the equation. 793 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 794 */ 795 796 status_t 797 Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size) 798 { 799 // get a pointer to the attribute in question 800 union value value; 801 uint8 *buffer; 802 bool locked = false; 803 804 // first, check if we are matching for a live query and use that value 805 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 806 if (key == NULL) { 807 if (type == B_STRING_TYPE) 808 return MatchEmptyString(); 809 810 return NO_MATCH; 811 } 812 buffer = const_cast<uint8 *>(key); 813 } else if (!strcmp(fAttribute, "name")) { 814 // we need to lock before accessing Inode::Name() 815 inode->SmallDataLock().Lock(); 816 locked = true; 817 818 // if not, check for "fake" attributes, "name", "size", "last_modified", 819 buffer = (uint8 *)inode->Name(); 820 if (buffer == NULL) { 821 inode->SmallDataLock().Unlock(); 822 return B_ERROR; 823 } 824 825 type = B_STRING_TYPE; 826 size = strlen((const char *)buffer); 827 } else if (!strcmp(fAttribute,"size")) { 828 buffer = (uint8 *)&inode->Node()->data.size; 829 type = B_INT64_TYPE; 830 } else if (!strcmp(fAttribute,"last_modified")) { 831 buffer = (uint8 *)&inode->Node()->last_modified_time; 832 type = B_INT64_TYPE; 833 } else { 834 // then for attributes in the small_data section, and finally for the 835 // real attributes 836 Inode *attribute; 837 838 inode->SmallDataLock().Lock(); 839 small_data *smallData = inode->FindSmallData(fAttribute); 840 if (smallData != NULL) { 841 buffer = smallData->Data(); 842 type = smallData->type; 843 size = smallData->data_size; 844 locked = true; 845 } else { 846 // needed to unlock the small_data section as fast as possible 847 inode->SmallDataLock().Unlock(); 848 849 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 850 buffer = (uint8 *)&value; 851 type = attribute->Node()->type; 852 size = attribute->Size(); 853 854 if (size > INODE_FILE_NAME_LENGTH) 855 size = INODE_FILE_NAME_LENGTH; 856 857 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 858 inode->ReleaseAttribute(attribute); 859 return B_IO_ERROR; 860 } 861 inode->ReleaseAttribute(attribute); 862 } else 863 return MatchEmptyString(); 864 } 865 } 866 // prepare own value for use, if it is possible to convert it 867 status_t status = ConvertValue(type); 868 if (status == B_OK) 869 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 870 871 if (locked) 872 inode->SmallDataLock().Unlock(); 873 874 RETURN_ERROR(status); 875 } 876 877 878 void 879 Equation::CalculateScore(Index &index) 880 { 881 // As always, these values could be tuned and refined. 882 // And the code could also need some real world testing :-) 883 884 // do we have to operate on a "foreign" index? 885 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 886 fScore = 0; 887 return; 888 } 889 890 // if we have a pattern, how much does it help our search? 891 if (fIsPattern) 892 fScore = getFirstPatternSymbol(fString) << 3; 893 else { 894 // Score by operator 895 if (fOp == OP_EQUAL) 896 // higher than pattern="255 chars+*" 897 fScore = 2048; 898 else 899 // the pattern search is regarded cheaper when you have at 900 // least one character to set your index to 901 fScore = 5; 902 } 903 904 // take index size into account (1024 is the current node size 905 // in our B+trees) 906 // 2048 * 2048 == 4194304 is the maximum score (for an empty 907 // tree, since the header + 1 node are already 2048 bytes) 908 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 909 } 910 911 912 status_t 913 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed) 914 { 915 status_t status = index.SetTo(fAttribute); 916 917 // if we should query attributes without an index, we can just proceed here 918 if (status < B_OK && !queryNonIndexed) 919 return B_ENTRY_NOT_FOUND; 920 921 type_code type; 922 923 // special case for OP_UNEQUAL - it will always operate through the whole index 924 // but we need the call to the original index to get the correct type 925 if (status < B_OK || fOp == OP_UNEQUAL) { 926 // Try to get an index that holds all files (name) 927 // Also sets the default type for all attributes without index 928 // to string. 929 type = status < B_OK ? B_STRING_TYPE : index.Type(); 930 931 if (index.SetTo("name") < B_OK) 932 return B_ENTRY_NOT_FOUND; 933 934 fHasIndex = false; 935 } else { 936 fHasIndex = true; 937 type = index.Type(); 938 } 939 940 if (ConvertValue(type) < B_OK) 941 return B_BAD_VALUE; 942 943 BPlusTree *tree; 944 if (index.Node()->GetTree(&tree) < B_OK) 945 return B_ERROR; 946 947 *iterator = new TreeIterator(tree); 948 if (*iterator == NULL) 949 return B_NO_MEMORY; 950 951 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 952 || fIsPattern) 953 && fHasIndex) { 954 // set iterator to the exact position 955 956 int32 keySize = index.KeySize(); 957 958 // at this point, fIsPattern is only true if it's a string type, and fOp 959 // is either OP_EQUAL or OP_UNEQUAL 960 if (fIsPattern) { 961 // let's see if we can use the beginning of the key for positioning 962 // the iterator and adjust the key size; if not, just leave the 963 // iterator at the start and return success 964 keySize = getFirstPatternSymbol(fString); 965 if (keySize <= 0) 966 return B_OK; 967 } 968 969 if (keySize == 0) { 970 // B_STRING_TYPE doesn't have a fixed length, so it was set 971 // to 0 before - we compute the correct value here 972 if (fType == B_STRING_TYPE) { 973 keySize = strlen(fValue.String); 974 975 // The empty string is a special case - we normally don't check 976 // for the trailing null byte, in the case for the empty string 977 // we do it explicitly, because there can't be keys in the B+tree 978 // with a length of zero 979 if (keySize == 0) 980 keySize = 1; 981 } else 982 RETURN_ERROR(B_ENTRY_NOT_FOUND); 983 } 984 985 if (fIsSpecialTime) { 986 // we have to find the first matching shifted value 987 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 988 status = (*iterator)->Find((uint8 *)&value, keySize); 989 if (status == B_ENTRY_NOT_FOUND) 990 return B_OK; 991 } else { 992 status = (*iterator)->Find(Value(), keySize); 993 if (fOp == OP_EQUAL && !fIsPattern) 994 return status; 995 else if (status == B_ENTRY_NOT_FOUND 996 && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 997 return B_OK; 998 } 999 1000 RETURN_ERROR(status); 1001 } 1002 1003 return B_OK; 1004 } 1005 1006 1007 status_t 1008 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 1009 struct dirent *dirent, size_t bufferSize) 1010 { 1011 while (true) { 1012 union value indexValue; 1013 uint16 keyLength; 1014 uint16 duplicate; 1015 off_t offset; 1016 1017 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 1018 (uint16)sizeof(indexValue), &offset, &duplicate); 1019 if (status < B_OK) 1020 return status; 1021 1022 // only compare against the index entry when this is the correct 1023 // index for the equation 1024 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) { 1025 // They aren't equal? let the operation decide what to do 1026 // Since we always start at the beginning of the index (or the correct 1027 // position), only some needs to be stopped if the entry doesn't fit. 1028 if (fOp == OP_LESS_THAN 1029 || fOp == OP_LESS_THAN_OR_EQUAL 1030 || (fOp == OP_EQUAL && !fIsPattern)) 1031 return B_ENTRY_NOT_FOUND; 1032 1033 if (duplicate > 0) 1034 iterator->SkipDuplicates(); 1035 continue; 1036 } 1037 1038 Vnode vnode(volume, offset); 1039 Inode *inode; 1040 if ((status = vnode.Get(&inode)) != B_OK) { 1041 REPORT_ERROR(status); 1042 FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute)); 1043 // try with next 1044 continue; 1045 } 1046 1047 // ToDo: check user permissions here - but which one?! 1048 // we could filter out all those where we don't have 1049 // read access... (we should check for every parent 1050 // directory if the X_OK is allowed) 1051 // Although it's quite expensive to open all parents, 1052 // it's likely that the application that runs the 1053 // query will do something similar (and we don't have 1054 // to do it for root, either). 1055 1056 // go up in the tree until a &&-operator is found, and check if the 1057 // inode matches with the rest of the expression - we don't have to 1058 // check ||-operators for that 1059 Term *term = this; 1060 status = MATCH_OK; 1061 1062 if (!fHasIndex) 1063 status = Match(inode); 1064 1065 while (term != NULL && status == MATCH_OK) { 1066 Operator *parent = (Operator *)term->Parent(); 1067 if (parent == NULL) 1068 break; 1069 1070 if (parent->Op() == OP_AND) { 1071 // choose the other child of the parent 1072 Term *other = parent->Right(); 1073 if (other == term) 1074 other = parent->Left(); 1075 1076 if (other == NULL) { 1077 FATAL(("&&-operator has only one child... (parent = %p)\n", parent)); 1078 break; 1079 } 1080 status = other->Match(inode); 1081 if (status < 0) { 1082 REPORT_ERROR(status); 1083 status = NO_MATCH; 1084 } 1085 } 1086 term = (Term *)parent; 1087 } 1088 1089 if (status == MATCH_OK) { 1090 dirent->d_dev = volume->ID(); 1091 dirent->d_ino = offset; 1092 dirent->d_pdev = volume->ID(); 1093 dirent->d_pino = volume->ToVnode(inode->Parent()); 1094 1095 if (inode->GetName(dirent->d_name) < B_OK) 1096 FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber())); 1097 1098 #ifdef KEEP_WRONG_DIRENT_RECLEN 1099 // ToDo: The available file systems in BeOS apparently don't set the 1100 // correct d_reclen - we are copying that behaviour if requested, but 1101 // if it doesn't break compatibility, we will remove it. 1102 dirent->d_reclen = strlen(dirent->d_name); 1103 #else 1104 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1105 #endif 1106 } 1107 1108 if (status == MATCH_OK) 1109 return B_OK; 1110 } 1111 RETURN_ERROR(B_ERROR); 1112 } 1113 1114 1115 // #pragma mark - 1116 1117 1118 Operator::Operator(Term *left, int8 op, Term *right) 1119 : Term(op), 1120 fLeft(left), 1121 fRight(right) 1122 { 1123 if (left) 1124 left->SetParent(this); 1125 if (right) 1126 right->SetParent(this); 1127 } 1128 1129 1130 Operator::~Operator() 1131 { 1132 delete fLeft; 1133 delete fRight; 1134 } 1135 1136 1137 status_t 1138 Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size) 1139 { 1140 if (fOp == OP_AND) { 1141 status_t status = fLeft->Match(inode, attribute, type, key, size); 1142 if (status != MATCH_OK) 1143 return status; 1144 1145 return fRight->Match(inode, attribute, type, key, size); 1146 } else { 1147 // choose the term with the better score for OP_OR 1148 if (fRight->Score() > fLeft->Score()) { 1149 status_t status = fRight->Match(inode, attribute, type, key, size); 1150 if (status != NO_MATCH) 1151 return status; 1152 } 1153 return fLeft->Match(inode, attribute, type, key, size); 1154 } 1155 } 1156 1157 1158 void 1159 Operator::Complement() 1160 { 1161 if (fOp == OP_AND) 1162 fOp = OP_OR; 1163 else 1164 fOp = OP_AND; 1165 1166 fLeft->Complement(); 1167 fRight->Complement(); 1168 } 1169 1170 1171 void 1172 Operator::CalculateScore(Index &index) 1173 { 1174 fLeft->CalculateScore(index); 1175 fRight->CalculateScore(index); 1176 } 1177 1178 1179 int32 1180 Operator::Score() const 1181 { 1182 if (fOp == OP_AND) { 1183 // return the one with the better score 1184 if (fRight->Score() > fLeft->Score()) 1185 return fRight->Score(); 1186 1187 return fLeft->Score(); 1188 } 1189 1190 // for OP_OR, be honest, and return the one with the worse score 1191 if (fRight->Score() < fLeft->Score()) 1192 return fRight->Score(); 1193 1194 return fLeft->Score(); 1195 } 1196 1197 1198 status_t 1199 Operator::InitCheck() 1200 { 1201 if (fOp != OP_AND && fOp != OP_OR 1202 || fLeft == NULL || fLeft->InitCheck() < B_OK 1203 || fRight == NULL || fRight->InitCheck() < B_OK) 1204 return B_ERROR; 1205 1206 return B_OK; 1207 } 1208 1209 1210 #if 0 1211 Term * 1212 Operator::Copy() const 1213 { 1214 if (fEquation != NULL) { 1215 Equation *equation = new Equation(*fEquation); 1216 if (equation == NULL) 1217 return NULL; 1218 1219 Term *term = new Term(equation); 1220 if (term == NULL) 1221 delete equation; 1222 1223 return term; 1224 } 1225 1226 Term *left = NULL, *right = NULL; 1227 1228 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1229 return NULL; 1230 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1231 delete left; 1232 return NULL; 1233 } 1234 1235 Term *term = new Term(left,fOp,right); 1236 if (term == NULL) { 1237 delete left; 1238 delete right; 1239 return NULL; 1240 } 1241 return term; 1242 } 1243 #endif 1244 1245 1246 // #pragma mark - 1247 1248 #ifdef DEBUG 1249 void 1250 Operator::PrintToStream() 1251 { 1252 D(__out("( ")); 1253 if (fLeft != NULL) 1254 fLeft->PrintToStream(); 1255 1256 char *op; 1257 switch (fOp) { 1258 case OP_OR: op = "OR"; break; 1259 case OP_AND: op = "AND"; break; 1260 default: op = "?"; break; 1261 } 1262 D(__out(" %s ",op)); 1263 1264 if (fRight != NULL) 1265 fRight->PrintToStream(); 1266 1267 D(__out(" )")); 1268 } 1269 1270 1271 void 1272 Equation::PrintToStream() 1273 { 1274 char *symbol = "???"; 1275 switch (fOp) { 1276 case OP_EQUAL: symbol = "=="; break; 1277 case OP_UNEQUAL: symbol = "!="; break; 1278 case OP_GREATER_THAN: symbol = ">"; break; 1279 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1280 case OP_LESS_THAN: symbol = "<"; break; 1281 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1282 } 1283 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1284 } 1285 1286 #endif /* DEBUG */ 1287 1288 // #pragma mark - 1289 1290 1291 Expression::Expression(char *expr) 1292 { 1293 if (expr == NULL) 1294 return; 1295 1296 fTerm = ParseOr(&expr); 1297 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1298 FATAL(("Corrupt tree in expression!\n")); 1299 delete fTerm; 1300 fTerm = NULL; 1301 } 1302 D(if (fTerm != NULL) { 1303 fTerm->PrintToStream(); 1304 D(__out("\n")); 1305 if (*expr != '\0') 1306 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1307 }); 1308 fPosition = expr; 1309 } 1310 1311 1312 Expression::~Expression() 1313 { 1314 delete fTerm; 1315 } 1316 1317 1318 Term * 1319 Expression::ParseEquation(char **expr) 1320 { 1321 skipWhitespace(expr); 1322 1323 bool _not = false; 1324 if (**expr == '!') { 1325 skipWhitespace(expr, 1); 1326 if (**expr != '(') 1327 return NULL; 1328 1329 _not = true; 1330 } 1331 1332 if (**expr == ')') { 1333 // shouldn't be handled here 1334 return NULL; 1335 } else if (**expr == '(') { 1336 skipWhitespace(expr, 1); 1337 1338 Term *term = ParseOr(expr); 1339 1340 skipWhitespace(expr); 1341 1342 if (**expr != ')') { 1343 delete term; 1344 return NULL; 1345 } 1346 1347 // If the term is negated, we just complement the tree, to get 1348 // rid of the not, a.k.a. DeMorgan's Law. 1349 if (_not) 1350 term->Complement(); 1351 1352 skipWhitespace(expr, 1); 1353 1354 return term; 1355 } 1356 1357 Equation *equation = new Equation(expr); 1358 if (equation == NULL || equation->InitCheck() < B_OK) { 1359 delete equation; 1360 return NULL; 1361 } 1362 return equation; 1363 } 1364 1365 1366 Term * 1367 Expression::ParseAnd(char **expr) 1368 { 1369 Term *left = ParseEquation(expr); 1370 if (left == NULL) 1371 return NULL; 1372 1373 while (IsOperator(expr,'&')) { 1374 Term *right = ParseAnd(expr); 1375 Term *newParent = NULL; 1376 1377 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1378 delete left; 1379 delete right; 1380 1381 return NULL; 1382 } 1383 left = newParent; 1384 } 1385 1386 return left; 1387 } 1388 1389 1390 Term * 1391 Expression::ParseOr(char **expr) 1392 { 1393 Term *left = ParseAnd(expr); 1394 if (left == NULL) 1395 return NULL; 1396 1397 while (IsOperator(expr,'|')) { 1398 Term *right = ParseAnd(expr); 1399 Term *newParent = NULL; 1400 1401 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1402 delete left; 1403 delete right; 1404 1405 return NULL; 1406 } 1407 left = newParent; 1408 } 1409 1410 return left; 1411 } 1412 1413 1414 bool 1415 Expression::IsOperator(char **expr, char op) 1416 { 1417 char *string = *expr; 1418 1419 if (*string == op && *(string + 1) == op) { 1420 *expr += 2; 1421 return true; 1422 } 1423 return false; 1424 } 1425 1426 1427 status_t 1428 Expression::InitCheck() 1429 { 1430 if (fTerm == NULL) 1431 return B_BAD_VALUE; 1432 1433 return B_OK; 1434 } 1435 1436 1437 // #pragma mark - 1438 1439 1440 Query::Query(Volume *volume, Expression *expression, uint32 flags) 1441 : 1442 fVolume(volume), 1443 fExpression(expression), 1444 fCurrent(NULL), 1445 fIterator(NULL), 1446 fIndex(volume), 1447 fFlags(flags), 1448 fPort(-1) 1449 { 1450 // if the expression has a valid root pointer, the whole tree has 1451 // already passed the sanity check, so that we don't have to check 1452 // every pointer 1453 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1454 return; 1455 1456 // create index on the stack and delete it afterwards 1457 fExpression->Root()->CalculateScore(fIndex); 1458 fIndex.Unset(); 1459 1460 Stack<Term *> stack; 1461 stack.Push(fExpression->Root()); 1462 1463 Term *term; 1464 while (stack.Pop(&term)) { 1465 if (term->Op() < OP_EQUATION) { 1466 Operator *op = (Operator *)term; 1467 1468 if (op->Op() == OP_OR) { 1469 stack.Push(op->Left()); 1470 stack.Push(op->Right()); 1471 } else { 1472 // For OP_AND, we can use the scoring system to decide which path to add 1473 if (op->Right()->Score() > op->Left()->Score()) 1474 stack.Push(op->Right()); 1475 else 1476 stack.Push(op->Left()); 1477 } 1478 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1479 FATAL(("Unknown term on stack or stack error")); 1480 } 1481 1482 if (fFlags & B_LIVE_QUERY) 1483 volume->AddQuery(this); 1484 } 1485 1486 1487 Query::~Query() 1488 { 1489 if (fFlags & B_LIVE_QUERY) 1490 fVolume->RemoveQuery(this); 1491 } 1492 1493 1494 status_t 1495 Query::GetNextEntry(struct dirent *dirent, size_t size) 1496 { 1497 // If we don't have an equation to use yet/anymore, get a new one 1498 // from the stack 1499 while (true) { 1500 if (fIterator == NULL) { 1501 if (!fStack.Pop(&fCurrent) 1502 || fCurrent == NULL 1503 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator, 1504 fFlags & B_QUERY_NON_INDEXED) < B_OK) 1505 return B_ENTRY_NOT_FOUND; 1506 } 1507 if (fCurrent == NULL) 1508 RETURN_ERROR(B_ERROR); 1509 1510 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size); 1511 if (status < B_OK) { 1512 delete fIterator; 1513 fIterator = NULL; 1514 fCurrent = NULL; 1515 } else { 1516 // only return if we have another entry 1517 return B_OK; 1518 } 1519 } 1520 } 1521 1522 1523 void 1524 Query::SetLiveMode(port_id port, int32 token) 1525 { 1526 fPort = port; 1527 fToken = token; 1528 1529 if ((fFlags & B_LIVE_QUERY) == 0) { 1530 // you can decide at any point to set the live query mode, 1531 // only live queries have to be updated by attribute changes 1532 fFlags |= B_LIVE_QUERY; 1533 fVolume->AddQuery(this); 1534 } 1535 } 1536 1537 1538 void 1539 Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 1540 size_t oldLength, const uint8 *newKey, size_t newLength) 1541 { 1542 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1543 return; 1544 1545 // ToDo: check if the attribute is part of the query at all... 1546 1547 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength); 1548 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength); 1549 1550 int32 op; 1551 if (oldStatus == MATCH_OK && newStatus == MATCH_OK) { 1552 // only send out a notification if the name was changed 1553 if (oldKey == NULL || strcmp(attribute, "name")) 1554 return; 1555 1556 send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0, 1557 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey); 1558 op = B_ENTRY_CREATED; 1559 } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) { 1560 // nothing has changed 1561 return; 1562 } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK) 1563 op = B_ENTRY_REMOVED; 1564 else 1565 op = B_ENTRY_CREATED; 1566 1567 // if "value" is NULL, send_notification() crashes... 1568 const char *value = (const char *)newKey; 1569 if (type != B_STRING_TYPE || value == NULL) 1570 value = ""; 1571 1572 send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0, 1573 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value); 1574 } 1575 1576