1 /* Query - query parsing and evaluation 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** The pattern matching is roughly based on code originally written 5 ** by J. Kercheval, and on code written by Kenneth Almquist, though 6 ** it shares no code. 7 ** 8 ** This file may be used under the terms of the OpenBeOS License. 9 */ 10 11 12 #include "Query.h" 13 #include "cpp.h" 14 #include "bfs.h" 15 #include "Debug.h" 16 #include "Stack.h" 17 #include "Volume.h" 18 #include "Inode.h" 19 #include "BPlusTree.h" 20 #include "Index.h" 21 22 #include <SupportDefs.h> 23 #include <TypeConstants.h> 24 #include <AppDefs.h> 25 26 #include <malloc.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 31 // The parser has a very static design, but it will do what is required. 32 // 33 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 34 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 35 // Apparently, the "!" (not) can only be used with brackets. 36 // 37 // If you think that there are too few NULL pointer checks in some places 38 // of the code, just read the beginning of the query constructor. 39 // The API is not fully available, just the Query and the Expression class 40 // are. 41 42 43 enum ops { 44 OP_NONE, 45 46 OP_AND, 47 OP_OR, 48 49 OP_EQUATION, 50 51 OP_EQUAL, 52 OP_UNEQUAL, 53 OP_GREATER_THAN, 54 OP_LESS_THAN, 55 OP_GREATER_THAN_OR_EQUAL, 56 OP_LESS_THAN_OR_EQUAL, 57 }; 58 59 enum match { 60 NO_MATCH = 0, 61 MATCH_OK = 1, 62 63 MATCH_BAD_PATTERN = -2, 64 MATCH_INVALID_CHARACTER 65 }; 66 67 // return values from isValidPattern() 68 enum { 69 PATTERN_INVALID_ESCAPE = -3, 70 PATTERN_INVALID_RANGE, 71 PATTERN_INVALID_SET 72 }; 73 74 union value { 75 int64 Int64; 76 uint64 Uint64; 77 int32 Int32; 78 uint32 Uint32; 79 float Float; 80 double Double; 81 char String[INODE_FILE_NAME_LENGTH]; 82 }; 83 84 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 85 // don't need the whole file here; the type can't change anyway 86 #ifndef _MIME_H 87 # define B_MIME_STRING_TYPE 'MIMS' 88 #endif 89 90 class Term { 91 public: 92 Term(int8 op) : fOp(op), fParent(NULL) {} 93 94 int8 Op() const { return fOp; } 95 96 void SetParent(Term *parent) { fParent = parent; } 97 Term *Parent() const { return fParent; } 98 99 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 100 const uint8 *key = NULL,size_t size = 0) = 0; 101 virtual void Complement() = 0; 102 103 virtual void CalculateScore(Index &index) = 0; 104 virtual int32 Score() const = 0; 105 106 virtual status_t InitCheck() = 0; 107 108 #ifdef DEBUG 109 virtual void PrintToStream() = 0; 110 #endif 111 112 protected: 113 int8 fOp; 114 Term *fParent; 115 }; 116 117 // Although an Equation object is quite independent from the volume on which 118 // the query is run, there are some dependencies that are produced while 119 // querying: 120 // The type/size of the value, the score, and if it has an index or not. 121 // So you could run more than one query on the same volume, but it might return 122 // wrong values when it runs concurrently on another volume. 123 // That's not an issue right now, because we run single-threaded and don't use 124 // queries more than once. 125 126 class Equation : public Term { 127 public: 128 Equation(char **expr); 129 ~Equation(); 130 131 virtual status_t InitCheck(); 132 133 status_t ParseQuotedString(char **_start,char **_end); 134 char *CopyString(char *start, char *end); 135 136 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,const uint8 *key = NULL,size_t size = 0); 137 virtual void Complement(); 138 139 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator); 140 status_t GetNextMatching(Volume *volume,TreeIterator *iterator,struct dirent *dirent,size_t bufferSize); 141 142 virtual void CalculateScore(Index &index); 143 virtual int32 Score() const { return fScore; } 144 145 #ifdef DEBUG 146 virtual void PrintToStream(); 147 #endif 148 149 private: 150 Equation(const Equation &); 151 Equation &operator=(const Equation &); 152 // no implementation 153 154 status_t ConvertValue(type_code type); 155 bool CompareTo(const uint8 *value, uint16 size); 156 uint8 *Value() const { return (uint8 *)&fValue; } 157 status_t MatchEmptyString(); 158 159 char *fAttribute; 160 char *fString; 161 union value fValue; 162 type_code fType; 163 size_t fSize; 164 bool fIsPattern; 165 bool fIsSpecialTime; 166 167 int32 fScore; 168 bool fHasIndex; 169 }; 170 171 class Operator : public Term { 172 public: 173 Operator(Term *,int8,Term *); 174 ~Operator(); 175 176 Term *Left() const { return fLeft; } 177 Term *Right() const { return fRight; } 178 179 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 180 const uint8 *key = NULL, size_t size = 0); 181 virtual void Complement(); 182 183 virtual void CalculateScore(Index &index); 184 virtual int32 Score() const; 185 186 virtual status_t InitCheck(); 187 188 //Term *Copy() const; 189 #ifdef DEBUG 190 virtual void PrintToStream(); 191 #endif 192 193 private: 194 Operator(const Operator &); 195 Operator &operator=(const Operator &); 196 // no implementation 197 198 Term *fLeft,*fRight; 199 }; 200 201 202 //--------------------------------- 203 204 205 void 206 skipWhitespace(char **expr, int32 skip = 0) 207 { 208 char *string = (*expr) + skip; 209 while (*string == ' ' || *string == '\t') string++; 210 *expr = string; 211 } 212 213 214 void 215 skipWhitespaceReverse(char **expr,char *stop) 216 { 217 char *string = *expr; 218 while (string > stop && (*string == ' ' || *string == '\t')) string--; 219 *expr = string; 220 } 221 222 223 // #pragma mark - 224 225 226 uint32 227 utf8ToUnicode(char **string) 228 { 229 uint8 *bytes = (uint8 *)*string; 230 int32 length; 231 uint8 mask = 0x1f; 232 233 switch (bytes[0] & 0xf0) { 234 case 0xc0: 235 case 0xd0: length = 2; break; 236 case 0xe0: length = 3; break; 237 case 0xf0: 238 mask = 0x0f; 239 length = 4; 240 break; 241 default: 242 // valid 1-byte character 243 // and invalid characters 244 (*string)++; 245 return bytes[0]; 246 } 247 uint32 c = bytes[0] & mask; 248 int32 i = 1; 249 for (;i < length && (bytes[i] & 0x80) > 0;i++) 250 c = (c << 6) | (bytes[i] & 0x3f); 251 252 if (i < length) { 253 // invalid character 254 (*string)++; 255 return (uint32)bytes[0]; 256 } 257 *string += length; 258 return c; 259 } 260 261 262 int32 263 getFirstPatternSymbol(char *string) 264 { 265 char c; 266 267 for (int32 index = 0;(c = *string++);index++) { 268 if (c == '*' || c == '?' || c == '[') 269 return index; 270 } 271 return -1; 272 } 273 274 275 bool 276 isPattern(char *string) 277 { 278 return getFirstPatternSymbol(string) >= 0 ? true : false; 279 } 280 281 282 status_t 283 isValidPattern(char *pattern) 284 { 285 while (*pattern) { 286 switch (*pattern++) { 287 case '\\': 288 // the escape character must not be at the end of the pattern 289 if (!*pattern++) 290 return PATTERN_INVALID_ESCAPE; 291 break; 292 293 case '[': 294 if (pattern[0] == ']' || !pattern[0]) 295 return PATTERN_INVALID_SET; 296 297 while (*pattern != ']') { 298 if (*pattern == '\\' && !*++pattern) 299 return PATTERN_INVALID_ESCAPE; 300 301 if (!*pattern) 302 return PATTERN_INVALID_SET; 303 304 if (pattern[0] == '-' && pattern[1] == '-') 305 return PATTERN_INVALID_RANGE; 306 307 pattern++; 308 } 309 break; 310 } 311 } 312 return B_OK; 313 } 314 315 316 /** Matches the string against the given wildcard pattern. 317 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 318 * or values < 0 (see enum at the top of Query.cpp) if an error 319 * occurs 320 */ 321 322 status_t 323 matchString(char *pattern, char *string) 324 { 325 while (*pattern) { 326 // end of string == valid end of pattern? 327 if (!string[0]) { 328 while (pattern[0] == '*') 329 pattern++; 330 return !pattern[0] ? MATCH_OK : NO_MATCH; 331 } 332 333 switch (*pattern++) { 334 case '?': 335 { 336 // match exactly one UTF-8 character; we are 337 // not interested in the result 338 utf8ToUnicode(&string); 339 break; 340 } 341 342 case '*': 343 { 344 // compact pattern 345 while (true) { 346 if (pattern[0] == '?') { 347 if (!*++string) 348 return NO_MATCH; 349 } else if (pattern[0] != '*') 350 break; 351 352 pattern++; 353 } 354 355 // if the pattern is done, we have matched the string 356 if (!pattern[0]) 357 return MATCH_OK; 358 359 while(true) { 360 // we have removed all occurences of '*' and '?' 361 if (pattern[0] == string[0] 362 || pattern[0] == '[' 363 || pattern[0] == '\\') { 364 status_t status = matchString(pattern,string); 365 if (status < B_OK || status == MATCH_OK) 366 return status; 367 } 368 369 // we could be nice here and just jump to the next 370 // UTF-8 character - but we wouldn't gain that much 371 // and it'd be slower (since we're checking for 372 // equality before entering the recursion) 373 if (!*++string) 374 return NO_MATCH; 375 } 376 break; 377 } 378 379 case '[': 380 { 381 bool invert = false; 382 if (pattern[0] == '^' || pattern[0] == '!') { 383 invert = true; 384 pattern++; 385 } 386 387 if (!pattern[0] || pattern[0] == ']') 388 return MATCH_BAD_PATTERN; 389 390 uint32 c = utf8ToUnicode(&string); 391 bool matched = false; 392 393 while (pattern[0] != ']') { 394 if (!pattern[0]) 395 return MATCH_BAD_PATTERN; 396 397 if (pattern[0] == '\\') 398 pattern++; 399 400 uint32 first = utf8ToUnicode(&pattern); 401 402 // Does this character match, or is this a range? 403 if (first == c) { 404 matched = true; 405 break; 406 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 407 pattern++; 408 409 if (pattern[0] == '\\') { 410 pattern++; 411 if (!pattern[0]) 412 return MATCH_BAD_PATTERN; 413 } 414 uint32 last = utf8ToUnicode(&pattern); 415 416 if (c >= first && c <= last) { 417 matched = true; 418 break; 419 } 420 } 421 } 422 423 if (invert) 424 matched = !matched; 425 426 if (matched) { 427 while (pattern[0] != ']') { 428 if (!pattern[0]) 429 return MATCH_BAD_PATTERN; 430 pattern++; 431 } 432 pattern++; 433 break; 434 } 435 return NO_MATCH; 436 } 437 438 case '\\': 439 if (!pattern[0]) 440 return MATCH_BAD_PATTERN; 441 // supposed to fall through 442 default: 443 if (pattern[-1] != string[0]) 444 return NO_MATCH; 445 string++; 446 break; 447 } 448 } 449 450 if (string[0]) 451 return NO_MATCH; 452 453 return MATCH_OK; 454 } 455 456 457 // #pragma mark - 458 459 460 Equation::Equation(char **expr) 461 : Term(OP_EQUATION), 462 fAttribute(NULL), 463 fString(NULL), 464 fType(0), 465 fIsPattern(false) 466 { 467 char *string = *expr; 468 char *start = string; 469 char *end = NULL; 470 471 // Since the equation is the integral part of any query, we're just parsing 472 // the whole thing here. 473 // The whitespace at the start is already removed in Expression::ParseEquation() 474 475 if (*start == '"' || *start == '\'') { 476 // string is quoted (start has to be on the beginning of a string) 477 if (ParseQuotedString(&start, &end) < B_OK) 478 return; 479 480 // set string to a valid start of the equation symbol 481 string = end + 2; 482 skipWhitespace(&string); 483 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 484 *expr = string; 485 return; 486 } 487 } else { 488 // search the (in)equation for the actual equation symbol (and for other operators 489 // in case the equation is malformed) 490 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 491 && *string != '&' && *string != '|') 492 string++; 493 494 // get the attribute string (and trim whitespace), in case 495 // the string was not quoted 496 end = string - 1; 497 skipWhitespaceReverse(&end, start); 498 } 499 500 // attribute string is empty (which is not allowed) 501 if (start > end) 502 return; 503 504 // at this point, "start" points to the beginning of the string, "end" points 505 // to the last character of the string, and "string" points to the first 506 // character of the equation symbol 507 508 // test for the right symbol (as this doesn't need any memory) 509 switch (*string) { 510 case '=': 511 fOp = OP_EQUAL; 512 break; 513 case '>': 514 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 515 break; 516 case '<': 517 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 518 break; 519 case '!': 520 if (*(string + 1) != '=') 521 return; 522 fOp = OP_UNEQUAL; 523 break; 524 525 // any invalid characters will be rejected 526 default: 527 *expr = string; 528 return; 529 } 530 // lets change "start" to point to the first character after the symbol 531 if (*(string + 1) == '=') 532 string++; 533 string++; 534 skipWhitespace(&string); 535 536 // allocate & copy the attribute string 537 538 fAttribute = CopyString(start, end); 539 if (fAttribute == NULL) 540 return; 541 542 start = string; 543 if (*start == '"' || *start == '\'') { 544 // string is quoted (start has to be on the beginning of a string) 545 if (ParseQuotedString(&start, &end) < B_OK) 546 return; 547 548 string = end + 2; 549 skipWhitespace(&string); 550 } else { 551 while (*string && *string != '&' && *string != '|' && *string != ')') 552 string++; 553 554 end = string - 1; 555 skipWhitespaceReverse(&end, start); 556 } 557 558 // at this point, "start" will point to the first character of the value, 559 // "end" will point to its last character, and "start" to the first non- 560 // whitespace character after the value string 561 562 fString = CopyString(start, end); 563 if (fString == NULL) 564 return; 565 566 // patterns are only allowed for these operations (and strings) 567 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 568 fIsPattern = isPattern(fString); 569 if (fIsPattern && isValidPattern(fString) < B_OK) { 570 // we only want to have valid patterns; setting fString 571 // to NULL will cause InitCheck() to fail 572 free(fString); 573 fString = NULL; 574 } 575 } 576 577 // The special time flag is set if the time values are shifted 578 // 64-bit values to reduce the number of duplicates. 579 // We have to be able to compare them against unshifted values 580 // later. The only index which needs this is the last_modified 581 // index, but we may want to open that feature for other indices, 582 // too one day. 583 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 584 585 *expr = string; 586 } 587 588 589 Equation::~Equation() 590 { 591 if (fAttribute != NULL) 592 free(fAttribute); 593 if (fString != NULL) 594 free(fString); 595 } 596 597 598 status_t 599 Equation::InitCheck() 600 { 601 if (fAttribute == NULL 602 || fString == NULL 603 || fOp == OP_NONE) 604 return B_BAD_VALUE; 605 606 return B_OK; 607 } 608 609 610 status_t 611 Equation::ParseQuotedString(char **_start, char **_end) 612 { 613 char *start = *_start; 614 char quote = *start++; 615 char *end = start; 616 617 for (;*end && *end != quote;end++) { 618 if (*end == '\\') 619 end++; 620 } 621 if (*end == '\0') 622 return B_BAD_VALUE; 623 624 *_start = start; 625 *_end = end - 1; 626 627 return B_OK; 628 } 629 630 631 char * 632 Equation::CopyString(char *start, char *end) 633 { 634 // end points to the last character of the string - and the length 635 // also has to include the null-termination 636 int32 length = end + 2 - start; 637 // just to make sure; since that's the max. attribute name length and 638 // the max. string in an index, it make sense to have it that way 639 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 640 return NULL; 641 642 char *copy = (char *)malloc(length); 643 if (copy == NULL) 644 return NULL; 645 646 memcpy(copy,start,length - 1); 647 copy[length - 1] = '\0'; 648 649 return copy; 650 } 651 652 653 status_t 654 Equation::ConvertValue(type_code type) 655 { 656 // Has the type already been converted? 657 if (type == fType) 658 return B_OK; 659 660 char *string = fString; 661 662 switch (type) { 663 case B_MIME_STRING_TYPE: 664 type = B_STRING_TYPE; 665 // supposed to fall through 666 case B_STRING_TYPE: 667 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 668 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 669 fSize = strlen(fValue.String); 670 break; 671 case B_INT32_TYPE: 672 fValue.Int32 = strtol(string, &string, 0); 673 fSize = sizeof(int32); 674 break; 675 case B_UINT32_TYPE: 676 fValue.Int32 = strtoul(string, &string, 0); 677 fSize = sizeof(uint32); 678 break; 679 case B_INT64_TYPE: 680 fValue.Int64 = strtoll(string, &string, 0); 681 fSize = sizeof(int64); 682 break; 683 case B_UINT64_TYPE: 684 fValue.Uint64 = strtoull(string, &string, 0); 685 fSize = sizeof(uint64); 686 break; 687 case B_FLOAT_TYPE: 688 fValue.Float = strtod(string, &string); 689 fSize = sizeof(float); 690 break; 691 case B_DOUBLE_TYPE: 692 fValue.Double = strtod(string, &string); 693 fSize = sizeof(double); 694 break; 695 default: 696 FATAL(("query value conversion to 0x%lx requested!\n", type)); 697 // should we fail here or just do a safety int32 conversion? 698 return B_ERROR; 699 } 700 701 fType = type; 702 703 // patterns are only allowed for string types 704 if (fType != B_STRING_TYPE && fIsPattern) 705 fIsPattern = false; 706 707 return B_OK; 708 } 709 710 711 /** Returns true when the key matches the equation. You have to 712 * call ConvertValue() before this one. 713 */ 714 715 bool 716 Equation::CompareTo(const uint8 *value, uint16 size) 717 { 718 int32 compare; 719 720 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 721 if (fIsPattern) { 722 // we have already validated the pattern, so we don't check for failing 723 // here - if something is broken, and matchString() returns an error, 724 // we just don't match 725 compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1; 726 } else if (fIsSpecialTime) { 727 // the index is a shifted int64 index, but we have to match 728 // against an unshifted value (i.e. the last_modified index) 729 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 730 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64)); 731 } else 732 compare = compareKeys(fType, value, size, Value(), fSize); 733 734 switch (fOp) { 735 case OP_EQUAL: 736 return compare == 0; 737 case OP_UNEQUAL: 738 return compare != 0; 739 case OP_LESS_THAN: 740 return compare < 0; 741 case OP_LESS_THAN_OR_EQUAL: 742 return compare <= 0; 743 case OP_GREATER_THAN: 744 return compare > 0; 745 case OP_GREATER_THAN_OR_EQUAL: 746 return compare >= 0; 747 } 748 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 749 return false; 750 } 751 752 753 void 754 Equation::Complement() 755 { 756 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 757 FATAL(("op out of range!")); 758 return; 759 }); 760 761 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 762 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 763 fOp = complementOp[fOp - OP_EQUAL]; 764 } 765 766 767 status_t 768 Equation::MatchEmptyString() 769 { 770 // there is no matching attribute, we will just bail out if we 771 // already know that our value is not of a string type. 772 // If not, it will be converted to a string - and then be compared with "". 773 // That's why we have to call ConvertValue() here - but it will be 774 // a cheap call for the next time 775 // Should we do this only for OP_UNEQUAL? 776 if (fType != 0 && fType != B_STRING_TYPE) 777 return NO_MATCH; 778 779 status_t status = ConvertValue(B_STRING_TYPE); 780 if (status == B_OK) 781 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH; 782 783 return status; 784 } 785 786 787 /** Matches the inode's attribute value with the equation. 788 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 789 */ 790 791 status_t 792 Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size) 793 { 794 // get a pointer to the attribute in question 795 union value value; 796 uint8 *buffer; 797 bool locked = false; 798 799 // first, check if we are matching for a live query and use that value 800 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 801 if (key == NULL) { 802 if (type == B_STRING_TYPE) 803 return MatchEmptyString(); 804 805 return NO_MATCH; 806 } 807 buffer = const_cast<uint8 *>(key); 808 } else if (!strcmp(fAttribute, "name")) { 809 // we need to lock before accessing Inode::Name() 810 inode->SmallDataLock().Lock(); 811 locked = true; 812 813 // if not, check for "fake" attributes, "name", "size", "last_modified", 814 buffer = (uint8 *)inode->Name(); 815 if (buffer == NULL) { 816 inode->SmallDataLock().Unlock(); 817 return B_ERROR; 818 } 819 820 type = B_STRING_TYPE; 821 size = strlen((const char *)buffer); 822 } else if (!strcmp(fAttribute,"size")) { 823 buffer = (uint8 *)&inode->Node()->data.size; 824 type = B_INT64_TYPE; 825 } else if (!strcmp(fAttribute,"last_modified")) { 826 buffer = (uint8 *)&inode->Node()->last_modified_time; 827 type = B_INT64_TYPE; 828 } else { 829 // then for attributes in the small_data section, and finally for the 830 // real attributes 831 Inode *attribute; 832 833 inode->SmallDataLock().Lock(); 834 small_data *smallData = inode->FindSmallData(fAttribute); 835 if (smallData != NULL) { 836 buffer = smallData->Data(); 837 type = smallData->type; 838 size = smallData->data_size; 839 locked = true; 840 } else { 841 // needed to unlock the small_data section as fast as possible 842 inode->SmallDataLock().Unlock(); 843 844 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 845 buffer = (uint8 *)&value; 846 type = attribute->Node()->type; 847 size = attribute->Size(); 848 849 if (size > INODE_FILE_NAME_LENGTH) 850 size = INODE_FILE_NAME_LENGTH; 851 852 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 853 inode->ReleaseAttribute(attribute); 854 return B_IO_ERROR; 855 } 856 inode->ReleaseAttribute(attribute); 857 } else 858 return MatchEmptyString(); 859 } 860 } 861 // prepare own value for use, if it is possible to convert it 862 status_t status = ConvertValue(type); 863 if (status == B_OK) 864 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 865 866 if (locked) 867 inode->SmallDataLock().Unlock(); 868 869 RETURN_ERROR(status); 870 } 871 872 873 void 874 Equation::CalculateScore(Index &index) 875 { 876 // As always, these values could be tuned and refined. 877 // And the code could also need some real world testing :-) 878 879 // do we have to operate on a "foreign" index? 880 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 881 fScore = 0; 882 return; 883 } 884 885 // if we have a pattern, how much does it help our search? 886 if (fIsPattern) 887 fScore = getFirstPatternSymbol(fString) << 3; 888 else { 889 // Score by operator 890 if (fOp == OP_EQUAL) 891 // higher than pattern="255 chars+*" 892 fScore = 2048; 893 else 894 // the pattern search is regarded cheaper when you have at 895 // least one character to set your index to 896 fScore = 5; 897 } 898 899 // take index size into account (1024 is the current node size 900 // in our B+trees) 901 // 2048 * 2048 == 4194304 is the maximum score (for an empty 902 // tree, since the header + 1 node are already 2048 bytes) 903 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 904 } 905 906 907 status_t 908 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator) 909 { 910 type_code type; 911 status_t status = index.SetTo(fAttribute); 912 913 // special case for OP_UNEQUAL - it will always operate through the whole index 914 // but we need the call to the original index to get the correct type 915 if (status < B_OK || fOp == OP_UNEQUAL) { 916 // Try to get an index that holds all files (name) 917 // Also sets the default type for all attributes without index 918 // to string. 919 type = status < B_OK ? B_STRING_TYPE : index.Type(); 920 921 if (index.SetTo("name") < B_OK) 922 return B_ENTRY_NOT_FOUND; 923 924 fHasIndex = false; 925 } else { 926 fHasIndex = true; 927 type = index.Type(); 928 } 929 930 if (ConvertValue(type) < B_OK) 931 return B_BAD_VALUE; 932 933 BPlusTree *tree; 934 if (index.Node()->GetTree(&tree) < B_OK) 935 return B_ERROR; 936 937 *iterator = new TreeIterator(tree); 938 if (*iterator == NULL) 939 return B_NO_MEMORY; 940 941 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 942 || fIsPattern) 943 && fHasIndex) { 944 // set iterator to the exact position 945 946 int32 keySize = index.KeySize(); 947 948 // at this point, fIsPattern is only true if it's a string type, and fOp 949 // is either OP_EQUAL or OP_UNEQUAL 950 if (fIsPattern) { 951 // let's see if we can use the beginning of the key for positioning 952 // the iterator and adjust the key size; if not, just leave the 953 // iterator at the start and return success 954 keySize = getFirstPatternSymbol(fString); 955 if (keySize <= 0) 956 return B_OK; 957 } 958 959 if (keySize == 0) { 960 if (fType == B_STRING_TYPE) 961 keySize = strlen(fValue.String); 962 else 963 RETURN_ERROR(B_ENTRY_NOT_FOUND); 964 } 965 966 if (fIsSpecialTime) { 967 // we have to find the first matching shifted value 968 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 969 status = (*iterator)->Find((uint8 *)&value, keySize); 970 if (status == B_ENTRY_NOT_FOUND) 971 return B_OK; 972 } else { 973 status = (*iterator)->Find(Value(),keySize); 974 if (fOp == OP_EQUAL && !fIsPattern) 975 return status; 976 else if (status == B_ENTRY_NOT_FOUND 977 && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 978 return B_OK; 979 } 980 981 RETURN_ERROR(status); 982 } 983 984 return B_OK; 985 } 986 987 988 status_t 989 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 990 struct dirent *dirent, size_t bufferSize) 991 { 992 while (true) { 993 union value indexValue; 994 uint16 keyLength; 995 uint16 duplicate; 996 off_t offset; 997 998 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 999 (uint16)sizeof(indexValue), &offset, &duplicate); 1000 if (status < B_OK) 1001 return status; 1002 1003 // only compare against the index entry when this is the correct 1004 // index for the equation 1005 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) { 1006 // They aren't equal? let the operation decide what to do 1007 // Since we always start at the beginning of the index (or the correct 1008 // position), only some needs to be stopped if the entry doesn't fit. 1009 if (fOp == OP_LESS_THAN 1010 || fOp == OP_LESS_THAN_OR_EQUAL 1011 || (fOp == OP_EQUAL && !fIsPattern)) 1012 return B_ENTRY_NOT_FOUND; 1013 1014 if (duplicate > 0) 1015 iterator->SkipDuplicates(); 1016 continue; 1017 } 1018 1019 Inode *inode; 1020 if ((status = get_vnode(volume->ID(), offset, (void **)&inode)) != B_OK) { 1021 REPORT_ERROR(status); 1022 FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute)); 1023 // try with next 1024 continue; 1025 } 1026 1027 // ToDo: check user permissions here - but which one?! 1028 // we could filter out all those where we don't have 1029 // read access... (we should check for every parent 1030 // directory if the X_OK is allowed) 1031 // Although it's quite expensive to open all parents, 1032 // it's likely that the application that runs the 1033 // query will do something similar (and we don't have 1034 // to do it for root, either). 1035 1036 // go up in the tree until a &&-operator is found, and check if the 1037 // inode matches with the rest of the expression - we don't have to 1038 // check ||-operators for that 1039 Term *term = this; 1040 status = MATCH_OK; 1041 1042 if (!fHasIndex) 1043 status = Match(inode); 1044 1045 while (term != NULL && status == MATCH_OK) { 1046 Operator *parent = (Operator *)term->Parent(); 1047 if (parent == NULL) 1048 break; 1049 1050 if (parent->Op() == OP_AND) { 1051 // choose the other child of the parent 1052 Term *other = parent->Right(); 1053 if (other == term) 1054 other = parent->Left(); 1055 1056 if (other == NULL) { 1057 FATAL(("&&-operator has only one child... (parent = %p)\n", parent)); 1058 break; 1059 } 1060 status = other->Match(inode); 1061 if (status < 0) { 1062 REPORT_ERROR(status); 1063 status = NO_MATCH; 1064 } 1065 } 1066 term = (Term *)parent; 1067 } 1068 1069 if (status == MATCH_OK) { 1070 dirent->d_dev = volume->ID(); 1071 dirent->d_ino = offset; 1072 dirent->d_pdev = volume->ID(); 1073 dirent->d_pino = volume->ToVnode(inode->Parent()); 1074 1075 if (inode->GetName(dirent->d_name) < B_OK) 1076 FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber())); 1077 1078 #ifdef KEEP_WRONG_DIRENT_RECLEN 1079 // ToDo: The available file systems in BeOS apparently don't set the 1080 // correct d_reclen - we are copying that behaviour if requested, but 1081 // if it doesn't break compatibility, we will remove it. 1082 dirent->d_reclen = strlen(dirent->d_name); 1083 #else 1084 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1085 #endif 1086 } 1087 1088 put_vnode(volume->ID(), inode->ID()); 1089 1090 if (status == MATCH_OK) 1091 return B_OK; 1092 } 1093 RETURN_ERROR(B_ERROR); 1094 } 1095 1096 1097 // #pragma mark - 1098 1099 1100 Operator::Operator(Term *left, int8 op, Term *right) 1101 : Term(op), 1102 fLeft(left), 1103 fRight(right) 1104 { 1105 if (left) 1106 left->SetParent(this); 1107 if (right) 1108 right->SetParent(this); 1109 } 1110 1111 1112 Operator::~Operator() 1113 { 1114 delete fLeft; 1115 delete fRight; 1116 } 1117 1118 1119 status_t 1120 Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size) 1121 { 1122 if (fOp == OP_AND) { 1123 status_t status = fLeft->Match(inode, attribute, type, key, size); 1124 if (status != MATCH_OK) 1125 return status; 1126 1127 return fRight->Match(inode, attribute, type, key, size); 1128 } else { 1129 // choose the term with the better score for OP_OR 1130 if (fRight->Score() > fLeft->Score()) { 1131 status_t status = fRight->Match(inode, attribute, type, key, size); 1132 if (status != NO_MATCH) 1133 return status; 1134 } 1135 return fLeft->Match(inode, attribute, type, key, size); 1136 } 1137 } 1138 1139 1140 void 1141 Operator::Complement() 1142 { 1143 if (fOp == OP_AND) 1144 fOp = OP_OR; 1145 else 1146 fOp = OP_AND; 1147 1148 fLeft->Complement(); 1149 fRight->Complement(); 1150 } 1151 1152 1153 void 1154 Operator::CalculateScore(Index &index) 1155 { 1156 fLeft->CalculateScore(index); 1157 fRight->CalculateScore(index); 1158 } 1159 1160 1161 int32 1162 Operator::Score() const 1163 { 1164 if (fOp == OP_AND) { 1165 // return the one with the better score 1166 if (fRight->Score() > fLeft->Score()) 1167 return fRight->Score(); 1168 1169 return fLeft->Score(); 1170 } 1171 1172 // for OP_OR, be honest, and return the one with the worse score 1173 if (fRight->Score() < fLeft->Score()) 1174 return fRight->Score(); 1175 1176 return fLeft->Score(); 1177 } 1178 1179 1180 status_t 1181 Operator::InitCheck() 1182 { 1183 if (fOp != OP_AND && fOp != OP_OR 1184 || fLeft == NULL || fLeft->InitCheck() < B_OK 1185 || fRight == NULL || fRight->InitCheck() < B_OK) 1186 return B_ERROR; 1187 1188 return B_OK; 1189 } 1190 1191 1192 #if 0 1193 Term * 1194 Operator::Copy() const 1195 { 1196 if (fEquation != NULL) { 1197 Equation *equation = new Equation(*fEquation); 1198 if (equation == NULL) 1199 return NULL; 1200 1201 Term *term = new Term(equation); 1202 if (term == NULL) 1203 delete equation; 1204 1205 return term; 1206 } 1207 1208 Term *left = NULL, *right = NULL; 1209 1210 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1211 return NULL; 1212 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1213 delete left; 1214 return NULL; 1215 } 1216 1217 Term *term = new Term(left,fOp,right); 1218 if (term == NULL) { 1219 delete left; 1220 delete right; 1221 return NULL; 1222 } 1223 return term; 1224 } 1225 #endif 1226 1227 1228 // #pragma mark - 1229 1230 #ifdef DEBUG 1231 void 1232 Operator::PrintToStream() 1233 { 1234 D(__out("( ")); 1235 if (fLeft != NULL) 1236 fLeft->PrintToStream(); 1237 1238 char *op; 1239 switch (fOp) { 1240 case OP_OR: op = "OR"; break; 1241 case OP_AND: op = "AND"; break; 1242 default: op = "?"; break; 1243 } 1244 D(__out(" %s ",op)); 1245 1246 if (fRight != NULL) 1247 fRight->PrintToStream(); 1248 1249 D(__out(" )")); 1250 } 1251 1252 1253 void 1254 Equation::PrintToStream() 1255 { 1256 char *symbol = "???"; 1257 switch (fOp) { 1258 case OP_EQUAL: symbol = "=="; break; 1259 case OP_UNEQUAL: symbol = "!="; break; 1260 case OP_GREATER_THAN: symbol = ">"; break; 1261 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1262 case OP_LESS_THAN: symbol = "<"; break; 1263 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1264 } 1265 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1266 } 1267 1268 #endif /* DEBUG */ 1269 1270 // #pragma mark - 1271 1272 1273 Expression::Expression(char *expr) 1274 { 1275 if (expr == NULL) 1276 return; 1277 1278 fTerm = ParseOr(&expr); 1279 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1280 FATAL(("Corrupt tree in expression!\n")); 1281 delete fTerm; 1282 fTerm = NULL; 1283 } 1284 D(if (fTerm != NULL) { 1285 fTerm->PrintToStream(); 1286 D(__out("\n")); 1287 if (*expr != '\0') 1288 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1289 }); 1290 fPosition = expr; 1291 } 1292 1293 1294 Expression::~Expression() 1295 { 1296 delete fTerm; 1297 } 1298 1299 1300 Term * 1301 Expression::ParseEquation(char **expr) 1302 { 1303 skipWhitespace(expr); 1304 1305 bool not = false; 1306 if (**expr == '!') { 1307 skipWhitespace(expr, 1); 1308 if (**expr != '(') 1309 return NULL; 1310 1311 not = true; 1312 } 1313 1314 if (**expr == ')') { 1315 // shouldn't be handled here 1316 return NULL; 1317 } else if (**expr == '(') { 1318 skipWhitespace(expr, 1); 1319 1320 Term *term = ParseOr(expr); 1321 1322 skipWhitespace(expr); 1323 1324 if (**expr != ')') { 1325 delete term; 1326 return NULL; 1327 } 1328 1329 // If the term is negated, we just complement the tree, to get 1330 // rid of the not, a.k.a. DeMorgan's Law. 1331 if (not) 1332 term->Complement(); 1333 1334 skipWhitespace(expr, 1); 1335 1336 return term; 1337 } 1338 1339 Equation *equation = new Equation(expr); 1340 if (equation == NULL || equation->InitCheck() < B_OK) { 1341 delete equation; 1342 return NULL; 1343 } 1344 return equation; 1345 } 1346 1347 1348 Term * 1349 Expression::ParseAnd(char **expr) 1350 { 1351 Term *left = ParseEquation(expr); 1352 if (left == NULL) 1353 return NULL; 1354 1355 while (IsOperator(expr,'&')) { 1356 Term *right = ParseAnd(expr); 1357 Term *newParent = NULL; 1358 1359 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1360 delete left; 1361 delete right; 1362 1363 return NULL; 1364 } 1365 left = newParent; 1366 } 1367 1368 return left; 1369 } 1370 1371 1372 Term * 1373 Expression::ParseOr(char **expr) 1374 { 1375 Term *left = ParseAnd(expr); 1376 if (left == NULL) 1377 return NULL; 1378 1379 while (IsOperator(expr,'|')) { 1380 Term *right = ParseAnd(expr); 1381 Term *newParent = NULL; 1382 1383 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1384 delete left; 1385 delete right; 1386 1387 return NULL; 1388 } 1389 left = newParent; 1390 } 1391 1392 return left; 1393 } 1394 1395 1396 bool 1397 Expression::IsOperator(char **expr, char op) 1398 { 1399 char *string = *expr; 1400 1401 if (*string == op && *(string + 1) == op) { 1402 *expr += 2; 1403 return true; 1404 } 1405 return false; 1406 } 1407 1408 1409 status_t 1410 Expression::InitCheck() 1411 { 1412 if (fTerm == NULL) 1413 return B_BAD_VALUE; 1414 1415 return B_OK; 1416 } 1417 1418 1419 // #pragma mark - 1420 1421 1422 Query::Query(Volume *volume, Expression *expression) 1423 : 1424 fVolume(volume), 1425 fExpression(expression), 1426 fCurrent(NULL), 1427 fIterator(NULL), 1428 fIndex(volume), 1429 fPort(-1) 1430 { 1431 // if the expression has a valid root pointer, the whole tree has 1432 // already passed the sanity check, so that we don't have to check 1433 // every pointer 1434 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1435 return; 1436 1437 // create index on the stack and delete it afterwards 1438 fExpression->Root()->CalculateScore(fIndex); 1439 fIndex.Unset(); 1440 1441 Stack<Term *> stack; 1442 stack.Push(fExpression->Root()); 1443 1444 Term *term; 1445 while (stack.Pop(&term)) { 1446 if (term->Op() < OP_EQUATION) { 1447 Operator *op = (Operator *)term; 1448 1449 if (op->Op() == OP_OR) { 1450 stack.Push(op->Left()); 1451 stack.Push(op->Right()); 1452 } else { 1453 // For OP_AND, we can use the scoring system to decide which path to add 1454 if (op->Right()->Score() > op->Left()->Score()) 1455 stack.Push(op->Right()); 1456 else 1457 stack.Push(op->Left()); 1458 } 1459 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1460 FATAL(("Unknown term on stack or stack error")); 1461 } 1462 1463 volume->AddQuery(this); 1464 } 1465 1466 1467 Query::~Query() 1468 { 1469 fVolume->RemoveQuery(this); 1470 } 1471 1472 1473 status_t 1474 Query::GetNextEntry(struct dirent *dirent, size_t size) 1475 { 1476 // If we don't have an equation to use yet/anymore, get a new one 1477 // from the stack 1478 while (true) { 1479 if (fIterator == NULL) { 1480 if (!fStack.Pop(&fCurrent) 1481 || fCurrent == NULL 1482 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator) < B_OK) 1483 return B_ENTRY_NOT_FOUND; 1484 } 1485 if (fCurrent == NULL) 1486 RETURN_ERROR(B_ERROR); 1487 1488 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size); 1489 if (status < B_OK) { 1490 delete fIterator; 1491 fIterator = NULL; 1492 fCurrent = NULL; 1493 } else { 1494 // only return if we have another entry 1495 return B_OK; 1496 } 1497 } 1498 } 1499 1500 1501 void 1502 Query::SetLiveMode(port_id port, int32 token) 1503 { 1504 fPort = port; 1505 fToken = token; 1506 } 1507 1508 1509 void 1510 Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 1511 size_t oldLength, const uint8 *newKey, size_t newLength) 1512 { 1513 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1514 return; 1515 1516 // ToDo: check if the attribute is part of the query at all... 1517 1518 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength); 1519 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength); 1520 int32 op; 1521 if (oldStatus == MATCH_OK && newStatus == MATCH_OK) { 1522 // only send out a notification if the name was changed 1523 if (oldKey == NULL || strcmp(attribute,"name")) 1524 return; 1525 1526 send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0, 1527 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey); 1528 op = B_ENTRY_CREATED; 1529 } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) { 1530 // nothing has changed 1531 return; 1532 } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK) 1533 op = B_ENTRY_REMOVED; 1534 else 1535 op = B_ENTRY_CREATED; 1536 1537 // if "value" is NULL, send_notification() crashes... 1538 const char *value = (const char *)newKey; 1539 if (type != B_STRING_TYPE || value == NULL) 1540 value = ""; 1541 1542 send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0, 1543 fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value); 1544 } 1545 1546