1 /* Query - query parsing and evaluation 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** The pattern matching is roughly based on code originally written 5 ** by J. Kercheval, and on code written by Kenneth Almquist, though 6 ** it shares no code. 7 ** 8 ** This file may be used under the terms of the OpenBeOS License. 9 */ 10 11 12 #include "Query.h" 13 #include "cpp.h" 14 #include "bfs.h" 15 #include "Debug.h" 16 #include "Stack.h" 17 #include "Volume.h" 18 #include "Inode.h" 19 #include "BPlusTree.h" 20 #include "Index.h" 21 22 #include <SupportDefs.h> 23 #include <TypeConstants.h> 24 #include <AppDefs.h> 25 26 #include <malloc.h> 27 #include <stdio.h> 28 #include <string.h> 29 30 31 // The parser has a very static design, but it will do what is required. 32 // 33 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 34 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 35 // Apparently, the "!" (not) can only be used with brackets. 36 // 37 // If you think that there are too few NULL pointer checks in some places 38 // of the code, just read the beginning of the query constructor. 39 // The API is not fully available, just the Query and the Expression class 40 // are. 41 42 43 enum ops { 44 OP_NONE, 45 46 OP_AND, 47 OP_OR, 48 49 OP_EQUATION, 50 51 OP_EQUAL, 52 OP_UNEQUAL, 53 OP_GREATER_THAN, 54 OP_LESS_THAN, 55 OP_GREATER_THAN_OR_EQUAL, 56 OP_LESS_THAN_OR_EQUAL, 57 }; 58 59 enum match { 60 NO_MATCH = 0, 61 MATCH_OK = 1, 62 63 MATCH_BAD_PATTERN = -2, 64 MATCH_INVALID_CHARACTER 65 }; 66 67 // return values from isValidPattern() 68 enum { 69 PATTERN_INVALID_ESCAPE = -3, 70 PATTERN_INVALID_RANGE, 71 PATTERN_INVALID_SET 72 }; 73 74 union value { 75 int64 Int64; 76 uint64 Uint64; 77 int32 Int32; 78 uint32 Uint32; 79 float Float; 80 double Double; 81 char String[INODE_FILE_NAME_LENGTH]; 82 }; 83 84 class Term { 85 public: 86 Term(int8 op) : fOp(op), fParent(NULL) {} 87 88 int8 Op() const { return fOp; } 89 90 void SetParent(Term *parent) { fParent = parent; } 91 Term *Parent() const { return fParent; } 92 93 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 94 const uint8 *key = NULL,size_t size = 0) = 0; 95 virtual void Complement() = 0; 96 97 virtual void CalculateScore(Index &index) = 0; 98 virtual int32 Score() const = 0; 99 100 virtual status_t InitCheck() = 0; 101 102 #ifdef DEBUG 103 virtual void PrintToStream() = 0; 104 #endif 105 106 protected: 107 int8 fOp; 108 Term *fParent; 109 }; 110 111 // Although an Equation object is quite independent from the volume on which 112 // the query is run, there are some dependencies that are produced while 113 // querying: 114 // The type/size of the value, the score, and if it has an index or not. 115 // So you could run more than one query on the same volume, but it might return 116 // wrong values when it runs concurrently on another volume. 117 // That's not an issue right now, because we run single-threaded and don't use 118 // queries more than once. 119 120 class Equation : public Term { 121 public: 122 Equation(char **expr); 123 ~Equation(); 124 125 virtual status_t InitCheck(); 126 127 status_t ParseQuotedString(char **_start,char **_end); 128 char *CopyString(char *start, char *end); 129 130 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,const uint8 *key = NULL,size_t size = 0); 131 virtual void Complement(); 132 133 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator); 134 status_t GetNextMatching(Volume *volume,TreeIterator *iterator,struct dirent *dirent,size_t bufferSize); 135 136 virtual void CalculateScore(Index &index); 137 virtual int32 Score() const { return fScore; } 138 139 #ifdef DEBUG 140 virtual void PrintToStream(); 141 #endif 142 143 private: 144 Equation(const Equation &); 145 Equation &operator=(const Equation &); 146 // no implementation 147 148 status_t ConvertValue(type_code type); 149 bool CompareTo(const uint8 *value, uint16 size); 150 uint8 *Value() const { return (uint8 *)&fValue; } 151 status_t MatchEmptyString(); 152 153 char *fAttribute; 154 char *fString; 155 union value fValue; 156 type_code fType; 157 size_t fSize; 158 bool fIsPattern; 159 bool fIsSpecialTime; 160 161 int32 fScore; 162 bool fHasIndex; 163 }; 164 165 class Operator : public Term { 166 public: 167 Operator(Term *,int8,Term *); 168 ~Operator(); 169 170 Term *Left() const { return fLeft; } 171 Term *Right() const { return fRight; } 172 173 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 174 const uint8 *key = NULL, size_t size = 0); 175 virtual void Complement(); 176 177 virtual void CalculateScore(Index &index); 178 virtual int32 Score() const; 179 180 virtual status_t InitCheck(); 181 182 //Term *Copy() const; 183 #ifdef DEBUG 184 virtual void PrintToStream(); 185 #endif 186 187 private: 188 Operator(const Operator &); 189 Operator &operator=(const Operator &); 190 // no implementation 191 192 Term *fLeft,*fRight; 193 }; 194 195 196 //--------------------------------- 197 198 199 void 200 skipWhitespace(char **expr, int32 skip = 0) 201 { 202 char *string = (*expr) + skip; 203 while (*string == ' ' || *string == '\t') string++; 204 *expr = string; 205 } 206 207 208 void 209 skipWhitespaceReverse(char **expr,char *stop) 210 { 211 char *string = *expr; 212 while (string > stop && (*string == ' ' || *string == '\t')) string--; 213 *expr = string; 214 } 215 216 217 // #pragma mark - 218 219 220 uint32 221 utf8ToUnicode(char **string) 222 { 223 uint8 *bytes = (uint8 *)*string; 224 int32 length; 225 uint8 mask = 0x1f; 226 227 switch (bytes[0] & 0xf0) { 228 case 0xc0: 229 case 0xd0: length = 2; break; 230 case 0xe0: length = 3; break; 231 case 0xf0: 232 mask = 0x0f; 233 length = 4; 234 break; 235 default: 236 // valid 1-byte character 237 // and invalid characters 238 (*string)++; 239 return bytes[0]; 240 } 241 uint32 c = bytes[0] & mask; 242 int32 i = 1; 243 for (;i < length && (bytes[i] & 0x80) > 0;i++) 244 c = (c << 6) | (bytes[i] & 0x3f); 245 246 if (i < length) { 247 // invalid character 248 (*string)++; 249 return (uint32)bytes[0]; 250 } 251 *string += length; 252 return c; 253 } 254 255 256 int32 257 getFirstPatternSymbol(char *string) 258 { 259 char c; 260 261 for (int32 index = 0;(c = *string++);index++) { 262 if (c == '*' || c == '?' || c == '[') 263 return index; 264 } 265 return -1; 266 } 267 268 269 bool 270 isPattern(char *string) 271 { 272 return getFirstPatternSymbol(string) >= 0 ? true : false; 273 } 274 275 276 status_t 277 isValidPattern(char *pattern) 278 { 279 while (*pattern) { 280 switch (*pattern++) { 281 case '\\': 282 // the escape character must not be at the end of the pattern 283 if (!*pattern++) 284 return PATTERN_INVALID_ESCAPE; 285 break; 286 287 case '[': 288 if (pattern[0] == ']' || !pattern[0]) 289 return PATTERN_INVALID_SET; 290 291 while (*pattern != ']') { 292 if (*pattern == '\\' && !*++pattern) 293 return PATTERN_INVALID_ESCAPE; 294 295 if (!*pattern) 296 return PATTERN_INVALID_SET; 297 298 if (pattern[0] == '-' && pattern[1] == '-') 299 return PATTERN_INVALID_RANGE; 300 301 pattern++; 302 } 303 break; 304 } 305 } 306 return B_OK; 307 } 308 309 310 /** Matches the string against the given wildcard pattern. 311 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 312 * or values < 0 (see enum at the top of Query.cpp) if an error 313 * occurs 314 */ 315 316 status_t 317 matchString(char *pattern,char *string) 318 { 319 while (*pattern) { 320 // end of string == valid end of pattern? 321 if (!string[0]) { 322 while (pattern[0] == '*') 323 pattern++; 324 return !pattern[0] ? MATCH_OK : NO_MATCH; 325 } 326 327 switch (*pattern++) { 328 case '?': 329 { 330 // match exactly one UTF-8 character; we are 331 // not interested in the result 332 utf8ToUnicode(&string); 333 break; 334 } 335 336 case '*': 337 { 338 // compact pattern 339 while (true) { 340 if (pattern[0] == '?') { 341 if (!*++string) 342 return NO_MATCH; 343 } else if (pattern[0] != '*') 344 break; 345 346 pattern++; 347 } 348 349 // if the pattern is done, we have matched the string 350 if (!pattern[0]) 351 return MATCH_OK; 352 353 while(true) { 354 // we have removed all occurences of '*' and '?' 355 if (pattern[0] == string[0] 356 || pattern[0] == '[' 357 || pattern[0] == '\\') { 358 status_t status = matchString(pattern,string); 359 if (status < B_OK || status == MATCH_OK) 360 return status; 361 } 362 363 // we could be nice here and just jump to the next 364 // UTF-8 character - but we wouldn't gain that much 365 // and it'd be slower (since we're checking for 366 // equality before entering the recursion) 367 if (!*++string) 368 return NO_MATCH; 369 } 370 break; 371 } 372 373 case '[': 374 { 375 bool invert = false; 376 if (pattern[0] == '^' || pattern[0] == '!') { 377 invert = true; 378 pattern++; 379 } 380 381 if (!pattern[0] || pattern[0] == ']') 382 return MATCH_BAD_PATTERN; 383 384 uint32 c = utf8ToUnicode(&string); 385 bool matched = false; 386 387 while (pattern[0] != ']') { 388 if (!pattern[0]) 389 return MATCH_BAD_PATTERN; 390 391 if (pattern[0] == '\\') 392 pattern++; 393 394 uint32 first = utf8ToUnicode(&pattern); 395 396 // Does this character match, or is this a range? 397 if (first == c) { 398 matched = true; 399 break; 400 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 401 pattern++; 402 403 if (pattern[0] == '\\') { 404 pattern++; 405 if (!pattern[0]) 406 return MATCH_BAD_PATTERN; 407 } 408 uint32 last = utf8ToUnicode(&pattern); 409 410 if (c >= first && c <= last) { 411 matched = true; 412 break; 413 } 414 } 415 } 416 417 if (invert) 418 matched = !matched; 419 420 if (matched) { 421 while (pattern[0] != ']') { 422 if (!pattern[0]) 423 return MATCH_BAD_PATTERN; 424 pattern++; 425 } 426 pattern++; 427 break; 428 } 429 return NO_MATCH; 430 } 431 432 case '\\': 433 if (!pattern[0]) 434 return MATCH_BAD_PATTERN; 435 // supposed to fall through 436 default: 437 if (pattern[-1] != string[0]) 438 return NO_MATCH; 439 string++; 440 break; 441 } 442 } 443 444 if (string[0]) 445 return NO_MATCH; 446 447 return MATCH_OK; 448 } 449 450 451 // #pragma mark - 452 453 454 Equation::Equation(char **expr) 455 : Term(OP_EQUATION), 456 fAttribute(NULL), 457 fString(NULL), 458 fType(0), 459 fIsPattern(false) 460 { 461 char *string = *expr; 462 char *start = string; 463 char *end = NULL; 464 465 // Since the equation is the integral part of any query, we're just parsing 466 // the whole thing here. 467 // The whitespace at the start is already removed in Expression::ParseEquation() 468 469 if (*start == '"' || *start == '\'') { 470 // string is quoted (start has to be on the beginning of a string) 471 if (ParseQuotedString(&start,&end) < B_OK) 472 return; 473 474 // set string to a valid start of the equation symbol 475 string = end + 2; 476 skipWhitespace(&string); 477 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 478 *expr = string; 479 return; 480 } 481 } else { 482 // search the (in)equation for the actual equation symbol (and for other operators 483 // in case the equation is malformed) 484 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 485 && *string != '&' && *string != '|') 486 string++; 487 488 // get the attribute string (and trim whitespace), in case 489 // the string was not quoted 490 end = string - 1; 491 skipWhitespaceReverse(&end,start); 492 } 493 494 // attribute string is empty (which is not allowed) 495 if (start > end) 496 return; 497 498 // at this point, "start" points to the beginning of the string, "end" points 499 // to the last character of the string, and "string" points to the first 500 // character of the equation symbol 501 502 // test for the right symbol (as this doesn't need any memory) 503 switch (*string) { 504 case '=': 505 fOp = OP_EQUAL; 506 break; 507 case '>': 508 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 509 break; 510 case '<': 511 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 512 break; 513 case '!': 514 if (*(string + 1) != '=') 515 return; 516 fOp = OP_UNEQUAL; 517 break; 518 519 // any invalid characters will be rejected 520 default: 521 *expr = string; 522 return; 523 } 524 // lets change "start" to point to the first character after the symbol 525 if (*(string + 1) == '=') 526 string++; 527 string++; 528 skipWhitespace(&string); 529 530 // allocate & copy the attribute string 531 532 fAttribute = CopyString(start,end); 533 if (fAttribute == NULL) 534 return; 535 536 start = string; 537 if (*start == '"' || *start == '\'') { 538 // string is quoted (start has to be on the beginning of a string) 539 if (ParseQuotedString(&start,&end) < B_OK) 540 return; 541 542 string = end + 2; 543 skipWhitespace(&string); 544 } else { 545 while (*string && *string != '&' && *string != '|' && *string != ')') 546 string++; 547 548 end = string - 1; 549 skipWhitespaceReverse(&end,start); 550 } 551 552 // at this point, "start" will point to the first character of the value, 553 // "end" will point to its last character, and "start" to the first non- 554 // whitespace character after the value string 555 556 fString = CopyString(start,end); 557 if (fString == NULL) 558 return; 559 560 // patterns are only allowed for these operations (and strings) 561 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 562 fIsPattern = isPattern(fString); 563 if (fIsPattern && isValidPattern(fString) < B_OK) { 564 // we only want to have valid patterns; setting fString 565 // to NULL will cause InitCheck() to fail 566 free(fString); 567 fString = NULL; 568 } 569 } 570 571 // The special time flag is set if the time values are shifted 572 // 64-bit values to reduce the number of duplicates. 573 // We have to be able to compare them against unshifted values 574 // later. The only index which needs this is the last_modified 575 // index, but we may want to open that feature for other indices, 576 // too one day. 577 fIsSpecialTime = !strcmp(fAttribute,"last_modified"); 578 579 *expr = string; 580 } 581 582 583 Equation::~Equation() 584 { 585 if (fAttribute != NULL) 586 free(fAttribute); 587 if (fString != NULL) 588 free(fString); 589 } 590 591 592 status_t 593 Equation::InitCheck() 594 { 595 if (fAttribute == NULL 596 || fString == NULL 597 || fOp == OP_NONE) 598 return B_BAD_VALUE; 599 600 return B_OK; 601 } 602 603 604 status_t 605 Equation::ParseQuotedString(char **_start, char **_end) 606 { 607 char *start = *_start; 608 char quote = *start++; 609 char *end = start; 610 611 for (;*end && *end != quote;end++) { 612 if (*end == '\\') 613 end++; 614 } 615 if (*end == '\0') 616 return B_BAD_VALUE; 617 618 *_start = start; 619 *_end = end - 1; 620 621 return B_OK; 622 } 623 624 625 char * 626 Equation::CopyString(char *start,char *end) 627 { 628 // end points to the last character of the string - and the length 629 // also has to include the null-termination 630 int32 length = end + 2 - start; 631 // just to make sure; since that's the max. attribute name length and 632 // the max. string in an index, it make sense to have it that way 633 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 634 return NULL; 635 636 char *copy = (char *)malloc(length); 637 if (copy == NULL) 638 return NULL; 639 640 memcpy(copy,start,length - 1); 641 copy[length - 1] = '\0'; 642 643 return copy; 644 } 645 646 647 status_t 648 Equation::ConvertValue(type_code type) 649 { 650 // Has the type already been converted? 651 if (type == fType) 652 return B_OK; 653 654 fType = type; 655 char *string = fString; 656 657 switch (type) { 658 // B_MIME_STRING_TYPE is defined in Mime.h which I didn't want to include just for that 659 case 'MIMS': 660 type = B_STRING_TYPE; 661 // supposed to fall through 662 case B_STRING_TYPE: 663 strncpy(fValue.String,string,INODE_FILE_NAME_LENGTH); 664 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 665 fSize = strlen(fValue.String); 666 break; 667 case B_INT32_TYPE: 668 fValue.Int32 = strtol(string,&string,0); 669 fSize = sizeof(int32); 670 break; 671 case B_UINT32_TYPE: 672 fValue.Int32 = strtoul(string,&string,0); 673 fSize = sizeof(uint32); 674 break; 675 case B_INT64_TYPE: 676 fValue.Int64 = strtoll(string,&string,0); 677 fSize = sizeof(int64); 678 break; 679 case B_UINT64_TYPE: 680 fValue.Uint64 = strtoull(string,&string,0); 681 fSize = sizeof(uint64); 682 break; 683 case B_FLOAT_TYPE: 684 fValue.Float = strtod(string,&string); 685 fSize = sizeof(float); 686 break; 687 case B_DOUBLE_TYPE: 688 fValue.Double = strtod(string,&string); 689 fSize = sizeof(double); 690 break; 691 default: 692 FATAL(("query value conversion to 0x%lx requested!\n",type)); 693 // should we fail here or just do a safety int32 conversion? 694 return B_ERROR; 695 } 696 697 // patterns are only allowed for string types 698 if (fType != B_STRING_TYPE && fIsPattern) 699 fIsPattern = false; 700 701 return B_OK; 702 } 703 704 705 /** Returns true when the key matches the equation. You have to 706 * call ConvertValue() before this one. 707 */ 708 709 bool 710 Equation::CompareTo(const uint8 *value,uint16 size) 711 { 712 int32 compare; 713 714 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 715 if (fIsPattern) { 716 // we have already validated the pattern, so we don't check for failing 717 // here - if something is broken, and matchString() returns an error, 718 // we just don't match 719 compare = matchString(fValue.String,(char *)value) == MATCH_OK ? 0 : 1; 720 } else if (fIsSpecialTime) { 721 // the index is a shifted int64 index, but we have to match 722 // against an unshifted value (i.e. the last_modified index) 723 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 724 compare = compareKeys(fType,&timeValue,sizeof(int64),&fValue.Int64,sizeof(int64)); 725 } else 726 compare = compareKeys(fType,value,size,Value(),fSize); 727 728 switch (fOp) { 729 case OP_EQUAL: 730 return compare == 0; 731 case OP_UNEQUAL: 732 return compare != 0; 733 case OP_LESS_THAN: 734 return compare < 0; 735 case OP_LESS_THAN_OR_EQUAL: 736 return compare <= 0; 737 case OP_GREATER_THAN: 738 return compare > 0; 739 case OP_GREATER_THAN_OR_EQUAL: 740 return compare >= 0; 741 } 742 FATAL(("Unknown/Unsupported operation: %d\n",fOp)); 743 return false; 744 } 745 746 747 void 748 Equation::Complement() 749 { 750 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 751 FATAL(("op out of range!")); 752 return; 753 }); 754 755 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 756 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 757 fOp = complementOp[fOp - OP_EQUAL]; 758 } 759 760 761 status_t 762 Equation::MatchEmptyString() 763 { 764 // there is no matching attribute, we will just bail out if we 765 // already know that our value is not of a string type. 766 // If not, it will be converted to a string - and then be compared with "". 767 // That's why we have to call ConvertValue() here - but it will be 768 // a cheap call for the next time 769 // Should we do this only for OP_UNEQUAL? 770 if (fType != 0 && fType != B_STRING_TYPE) 771 return NO_MATCH; 772 773 status_t status = ConvertValue(B_STRING_TYPE); 774 if (status == B_OK) 775 status = CompareTo((const uint8 *)"",fSize) ? MATCH_OK : NO_MATCH; 776 777 return status; 778 } 779 780 781 /** Matches the inode's attribute value with the equation. 782 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 783 */ 784 785 status_t 786 Equation::Match(Inode *inode,const char *attributeName,int32 type,const uint8 *key,size_t size) 787 { 788 // get a pointer to the attribute in question 789 union value value; 790 uint8 *buffer; 791 792 // first, check if we are matching for a live query and use that value 793 if (attributeName != NULL && !strcmp(fAttribute,attributeName)) { 794 if (key == NULL) { 795 if (type == B_STRING_TYPE) 796 return MatchEmptyString(); 797 798 return NO_MATCH; 799 } 800 buffer = const_cast<uint8 *>(key); 801 } else if (!strcmp(fAttribute,"name")) { 802 // if not, check for "fake" attributes, "name", "size", "last_modified", 803 buffer = (uint8 *)inode->Name(); 804 if (buffer == NULL) 805 return B_ERROR; 806 807 type = B_STRING_TYPE; 808 size = strlen((const char *)buffer); 809 } else if (!strcmp(fAttribute,"size")) { 810 buffer = (uint8 *)&inode->Node()->data.size; 811 type = B_INT64_TYPE; 812 } else if (!strcmp(fAttribute,"last_modified")) { 813 buffer = (uint8 *)&inode->Node()->last_modified_time; 814 type = B_INT64_TYPE; 815 } else { 816 // then for attributes in the small_data section, and finally for the 817 // real attributes 818 Inode *attribute; 819 820 inode->SmallDataLock().Lock(); 821 small_data *smallData = inode->FindSmallData(fAttribute); 822 if (smallData != NULL) { 823 buffer = smallData->Data(); 824 type = smallData->type; 825 size = smallData->data_size; 826 inode->SmallDataLock().Unlock(); 827 } else { 828 // needed to unlock the small_data section as fast as possible 829 inode->SmallDataLock().Unlock(); 830 831 if (inode->GetAttribute(fAttribute,&attribute) == B_OK) { 832 buffer = (uint8 *)&value; 833 type = attribute->Node()->type; 834 size = attribute->Size(); 835 836 if (size > INODE_FILE_NAME_LENGTH) 837 size = INODE_FILE_NAME_LENGTH; 838 839 if (attribute->ReadAt(0,buffer,&size) < B_OK) { 840 inode->ReleaseAttribute(attribute); 841 return B_IO_ERROR; 842 } 843 inode->ReleaseAttribute(attribute); 844 } else 845 return MatchEmptyString(); 846 } 847 } 848 // prepare own value for use, if it is possible to convert it 849 status_t status = ConvertValue(type); 850 if (status == B_OK) 851 status = CompareTo(buffer,size) ? MATCH_OK : NO_MATCH; 852 853 RETURN_ERROR(status); 854 } 855 856 857 void 858 Equation::CalculateScore(Index &index) 859 { 860 // As always, these values could be tuned and refined. 861 // And the code could also need some real world testing :-) 862 863 // do we have to operate on a "foreign" index? 864 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 865 fScore = 0; 866 return; 867 } 868 869 // if we have a pattern, how much does it help our search? 870 if (fIsPattern) 871 fScore = getFirstPatternSymbol(fString) << 3; 872 else { 873 // Score by operator 874 if (fOp == OP_EQUAL) 875 // higher than pattern="255 chars+*" 876 fScore = 2048; 877 else 878 // the pattern search is regarded cheaper when you have at 879 // least one character to set your index to 880 fScore = 5; 881 } 882 883 // take index size into account (1024 is the current node size 884 // in our B+trees) 885 // 2048 * 2048 == 4194304 is the maximum score (for an empty 886 // tree, since the header + 1 node are already 2048 bytes) 887 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 888 } 889 890 891 status_t 892 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator) 893 { 894 type_code type; 895 status_t status = index.SetTo(fAttribute); 896 897 // special case for OP_UNEQUAL - it will always operate through the whole index 898 // but we need the call to the original index to get the correct type 899 if (status < B_OK || fOp == OP_UNEQUAL) { 900 // Try to get an index that holds all files (name) 901 // Also sets the default type for all attributes without index 902 // to string. 903 type = status < B_OK ? B_STRING_TYPE : index.Type(); 904 905 if (index.SetTo("name") < B_OK) 906 return B_ENTRY_NOT_FOUND; 907 908 fHasIndex = false; 909 } else { 910 fHasIndex = true; 911 type = index.Type(); 912 } 913 914 if (ConvertValue(type) < B_OK) 915 return B_BAD_VALUE; 916 917 BPlusTree *tree; 918 if (index.Node()->GetTree(&tree) < B_OK) 919 return B_ERROR; 920 921 *iterator = new TreeIterator(tree); 922 if (*iterator == NULL) 923 return B_NO_MEMORY; 924 925 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 926 || fIsPattern) 927 && fHasIndex) { 928 // set iterator to the exact position 929 930 int32 keySize = index.KeySize(); 931 932 // at this point, fIsPattern is only true if it's a string type, and fOp 933 // is either OP_EQUAL or OP_UNEQUAL 934 if (fIsPattern) { 935 // let's see if we can use the beginning of the key for positioning 936 // the iterator and adjust the key size; if not, just leave the 937 // iterator at the start and return success 938 keySize = getFirstPatternSymbol(fString); 939 if (keySize <= 0) 940 return B_OK; 941 } 942 943 if (keySize == 0) { 944 if (fType == B_STRING_TYPE) 945 keySize = strlen(fValue.String); 946 else 947 RETURN_ERROR(B_ENTRY_NOT_FOUND); 948 } 949 950 if (fIsSpecialTime) { 951 // we have to find the first matching shifted value 952 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 953 status = (*iterator)->Find((uint8 *)&value,keySize); 954 if (status == B_ENTRY_NOT_FOUND) 955 return B_OK; 956 } else { 957 status = (*iterator)->Find(Value(),keySize); 958 if (fOp == OP_EQUAL && !fIsPattern) 959 return status; 960 else if (status == B_ENTRY_NOT_FOUND && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 961 return B_OK; 962 } 963 964 RETURN_ERROR(status); 965 } 966 967 return B_OK; 968 } 969 970 971 status_t 972 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 973 struct dirent *dirent, size_t bufferSize) 974 { 975 while (true) { 976 union value indexValue; 977 uint16 keyLength; 978 uint16 duplicate; 979 off_t offset; 980 981 status_t status = iterator->GetNextEntry(&indexValue,&keyLength,(uint16)sizeof(indexValue),&offset,&duplicate); 982 if (status < B_OK) 983 return status; 984 985 // only compare against the index entry when this is the correct 986 // index for the equation 987 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue,keyLength)) { 988 // They aren't equal? let the operation decide what to do 989 // Since we always start at the beginning of the index (or the correct 990 // position), only some needs to be stopped if the entry doesn't fit. 991 if (fOp == OP_LESS_THAN 992 || fOp == OP_LESS_THAN_OR_EQUAL 993 || (fOp == OP_EQUAL && !fIsPattern)) 994 return B_ENTRY_NOT_FOUND; 995 996 if (duplicate > 0) 997 iterator->SkipDuplicates(); 998 continue; 999 } 1000 1001 Inode *inode; 1002 if ((status = get_vnode(volume->ID(),offset,(void **)&inode)) != B_OK) { 1003 REPORT_ERROR(status); 1004 FATAL(("could not get inode %Ld in index \"%s\"!\n",offset,fAttribute)); 1005 // try with next 1006 continue; 1007 } 1008 1009 // check user permissions here - but which one?! 1010 // we could filter out all those where we don't have 1011 // read access... (we should check for every parent 1012 // directory if the X_OK is allowed) 1013 // Although it's quite expensive to open all parents, 1014 // it's likely that the application that runs the 1015 // query will do something similar (and we don't have 1016 // to do it for root, either). 1017 1018 // go up in the tree until a &&-operator is found, and check if the 1019 // inode matches with the rest of the expression - we don't have to 1020 // check ||-operators for that 1021 Term *term = this; 1022 status = MATCH_OK; 1023 1024 if (!fHasIndex) 1025 status = Match(inode); 1026 1027 while (term != NULL && status == MATCH_OK) { 1028 Operator *parent = (Operator *)term->Parent(); 1029 if (parent == NULL) 1030 break; 1031 1032 if (parent->Op() == OP_AND) { 1033 // choose the other child of the parent 1034 Term *other = parent->Right(); 1035 if (other == term) 1036 other = parent->Left(); 1037 1038 if (other == NULL) { 1039 FATAL(("&&-operator has only one child... (parent = %p)\n",parent)); 1040 break; 1041 } 1042 status = other->Match(inode); 1043 if (status < 0) { 1044 REPORT_ERROR(status); 1045 status = NO_MATCH; 1046 } 1047 } 1048 term = (Term *)parent; 1049 } 1050 1051 if (status == MATCH_OK) { 1052 dirent->d_dev = volume->ID(); 1053 dirent->d_ino = offset; 1054 dirent->d_pdev = volume->ID(); 1055 dirent->d_pino = volume->ToVnode(inode->Parent()); 1056 strcpy(dirent->d_name,inode->Name()); 1057 1058 #ifdef KEEP_WRONG_DIRENT_RECLEN 1059 // ToDo: The available file systems in BeOS apparently don't set the 1060 // correct d_reclen - we are copying that behaviour if requested, but 1061 // if it doesn't break compatibility, we will remove it. 1062 dirent->d_reclen = strlen(dirent->d_name); 1063 #else 1064 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1065 #endif 1066 } 1067 1068 put_vnode(volume->ID(), inode->ID()); 1069 1070 if (status == MATCH_OK) 1071 return B_OK; 1072 } 1073 RETURN_ERROR(B_ERROR); 1074 } 1075 1076 1077 // #pragma mark - 1078 1079 1080 Operator::Operator(Term *left, int8 op, Term *right) 1081 : Term(op), 1082 fLeft(left), 1083 fRight(right) 1084 { 1085 if (left) 1086 left->SetParent(this); 1087 if (right) 1088 right->SetParent(this); 1089 } 1090 1091 1092 Operator::~Operator() 1093 { 1094 delete fLeft; 1095 delete fRight; 1096 } 1097 1098 1099 status_t 1100 Operator::Match(Inode *inode,const char *attribute,int32 type,const uint8 *key,size_t size) 1101 { 1102 if (fOp == OP_AND) { 1103 status_t status = fLeft->Match(inode,attribute,type,key,size); 1104 if (status != MATCH_OK) 1105 return status; 1106 1107 return fRight->Match(inode,attribute,type,key,size); 1108 } else { 1109 // choose the term with the better score for OP_OR 1110 if (fRight->Score() > fLeft->Score()) { 1111 status_t status = fRight->Match(inode,attribute,type,key,size); 1112 if (status != NO_MATCH) 1113 return status; 1114 } 1115 return fLeft->Match(inode,attribute,type,key,size); 1116 } 1117 } 1118 1119 1120 void 1121 Operator::Complement() 1122 { 1123 if (fOp == OP_AND) 1124 fOp = OP_OR; 1125 else 1126 fOp = OP_AND; 1127 1128 fLeft->Complement(); 1129 fRight->Complement(); 1130 } 1131 1132 1133 void 1134 Operator::CalculateScore(Index &index) 1135 { 1136 fLeft->CalculateScore(index); 1137 fRight->CalculateScore(index); 1138 } 1139 1140 1141 int32 1142 Operator::Score() const 1143 { 1144 if (fOp == OP_AND) { 1145 // return the one with the better score 1146 if (fRight->Score() > fLeft->Score()) 1147 return fRight->Score(); 1148 1149 return fLeft->Score(); 1150 } 1151 1152 // for OP_OR, be honest, and return the one with the worse score 1153 if (fRight->Score() < fLeft->Score()) 1154 return fRight->Score(); 1155 1156 return fLeft->Score(); 1157 } 1158 1159 1160 status_t 1161 Operator::InitCheck() 1162 { 1163 if (fOp != OP_AND && fOp != OP_OR 1164 || fLeft == NULL || fLeft->InitCheck() < B_OK 1165 || fRight == NULL || fRight->InitCheck() < B_OK) 1166 return B_ERROR; 1167 1168 return B_OK; 1169 } 1170 1171 1172 #if 0 1173 Term * 1174 Operator::Copy() const 1175 { 1176 if (fEquation != NULL) { 1177 Equation *equation = new Equation(*fEquation); 1178 if (equation == NULL) 1179 return NULL; 1180 1181 Term *term = new Term(equation); 1182 if (term == NULL) 1183 delete equation; 1184 1185 return term; 1186 } 1187 1188 Term *left = NULL, *right = NULL; 1189 1190 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1191 return NULL; 1192 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1193 delete left; 1194 return NULL; 1195 } 1196 1197 Term *term = new Term(left,fOp,right); 1198 if (term == NULL) { 1199 delete left; 1200 delete right; 1201 return NULL; 1202 } 1203 return term; 1204 } 1205 #endif 1206 1207 1208 // #pragma mark - 1209 1210 #ifdef DEBUG 1211 void 1212 Operator::PrintToStream() 1213 { 1214 D(__out("( ")); 1215 if (fLeft != NULL) 1216 fLeft->PrintToStream(); 1217 1218 char *op; 1219 switch (fOp) { 1220 case OP_OR: op = "OR"; break; 1221 case OP_AND: op = "AND"; break; 1222 default: op = "?"; break; 1223 } 1224 D(__out(" %s ",op)); 1225 1226 if (fRight != NULL) 1227 fRight->PrintToStream(); 1228 1229 D(__out(" )")); 1230 } 1231 1232 1233 void 1234 Equation::PrintToStream() 1235 { 1236 char *symbol = "???"; 1237 switch (fOp) { 1238 case OP_EQUAL: symbol = "=="; break; 1239 case OP_UNEQUAL: symbol = "!="; break; 1240 case OP_GREATER_THAN: symbol = ">"; break; 1241 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1242 case OP_LESS_THAN: symbol = "<"; break; 1243 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1244 } 1245 D(__out("[\"%s\" %s \"%s\"]",fAttribute,symbol,fString)); 1246 } 1247 1248 #endif /* DEBUG */ 1249 1250 // #pragma mark - 1251 1252 1253 Expression::Expression(char *expr) 1254 { 1255 if (expr == NULL) 1256 return; 1257 1258 fTerm = ParseOr(&expr); 1259 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1260 FATAL(("Corrupt tree in expression!\n")); 1261 delete fTerm; 1262 fTerm = NULL; 1263 } 1264 D(if (fTerm != NULL) { 1265 fTerm->PrintToStream(); 1266 D(__out("\n")); 1267 if (*expr != '\0') 1268 PRINT(("Unexpected end of string: \"%s\"!\n",expr)); 1269 }); 1270 fPosition = expr; 1271 } 1272 1273 1274 Expression::~Expression() 1275 { 1276 delete fTerm; 1277 } 1278 1279 1280 Term * 1281 Expression::ParseEquation(char **expr) 1282 { 1283 skipWhitespace(expr); 1284 1285 bool not = false; 1286 if (**expr == '!') { 1287 skipWhitespace(expr, 1); 1288 if (**expr != '(') 1289 return NULL; 1290 1291 not = true; 1292 } 1293 1294 if (**expr == ')') { 1295 // shouldn't be handled here 1296 return NULL; 1297 } else if (**expr == '(') { 1298 skipWhitespace(expr, 1); 1299 1300 Term *term = ParseOr(expr); 1301 1302 skipWhitespace(expr); 1303 1304 if (**expr != ')') { 1305 delete term; 1306 return NULL; 1307 } 1308 1309 // If the term is negated, we just complement the tree, to get 1310 // rid of the not, a.k.a. DeMorgan's Law. 1311 if (not) 1312 term->Complement(); 1313 1314 skipWhitespace(expr, 1); 1315 1316 return term; 1317 } 1318 1319 Equation *equation = new Equation(expr); 1320 if (equation == NULL || equation->InitCheck() < B_OK) { 1321 delete equation; 1322 return NULL; 1323 } 1324 return equation; 1325 } 1326 1327 1328 Term * 1329 Expression::ParseAnd(char **expr) 1330 { 1331 Term *left = ParseEquation(expr); 1332 if (left == NULL) 1333 return NULL; 1334 1335 while (IsOperator(expr,'&')) { 1336 Term *right = ParseAnd(expr); 1337 Term *newParent = NULL; 1338 1339 if (right == NULL || (newParent = new Operator(left,OP_AND,right)) == NULL) { 1340 delete left; 1341 delete right; 1342 1343 return NULL; 1344 } 1345 left = newParent; 1346 } 1347 1348 return left; 1349 } 1350 1351 1352 Term * 1353 Expression::ParseOr(char **expr) 1354 { 1355 Term *left = ParseAnd(expr); 1356 if (left == NULL) 1357 return NULL; 1358 1359 while (IsOperator(expr,'|')) { 1360 Term *right = ParseAnd(expr); 1361 Term *newParent = NULL; 1362 1363 if (right == NULL || (newParent = new Operator(left,OP_OR,right)) == NULL) { 1364 delete left; 1365 delete right; 1366 1367 return NULL; 1368 } 1369 left = newParent; 1370 } 1371 1372 return left; 1373 } 1374 1375 1376 bool 1377 Expression::IsOperator(char **expr, char op) 1378 { 1379 char *string = *expr; 1380 1381 if (*string == op && *(string + 1) == op) { 1382 *expr += 2; 1383 return true; 1384 } 1385 return false; 1386 } 1387 1388 1389 status_t 1390 Expression::InitCheck() 1391 { 1392 if (fTerm == NULL) 1393 return B_BAD_VALUE; 1394 1395 return B_OK; 1396 } 1397 1398 1399 // #pragma mark - 1400 1401 1402 Query::Query(Volume *volume,Expression *expression) 1403 : 1404 fVolume(volume), 1405 fExpression(expression), 1406 fCurrent(NULL), 1407 fIterator(NULL), 1408 fIndex(volume), 1409 fPort(-1) 1410 { 1411 // if the expression has a valid root pointer, the whole tree has 1412 // already passed the sanity check, so that we don't have to check 1413 // every pointer 1414 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1415 return; 1416 1417 // create index on the stack and delete it afterwards 1418 fExpression->Root()->CalculateScore(fIndex); 1419 fIndex.Unset(); 1420 1421 Stack<Term *> stack; 1422 stack.Push(fExpression->Root()); 1423 1424 Term *term; 1425 while (stack.Pop(&term)) { 1426 if (term->Op() < OP_EQUATION) { 1427 Operator *op = (Operator *)term; 1428 1429 if (op->Op() == OP_OR) { 1430 stack.Push(op->Left()); 1431 stack.Push(op->Right()); 1432 } else { 1433 // For OP_AND, we can use the scoring system to decide which path to add 1434 if (op->Right()->Score() > op->Left()->Score()) 1435 stack.Push(op->Right()); 1436 else 1437 stack.Push(op->Left()); 1438 } 1439 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1440 FATAL(("Unknown term on stack or stack error")); 1441 } 1442 1443 volume->AddQuery(this); 1444 } 1445 1446 1447 Query::~Query() 1448 { 1449 fVolume->RemoveQuery(this); 1450 } 1451 1452 1453 status_t 1454 Query::GetNextEntry(struct dirent *dirent, size_t size) 1455 { 1456 // If we don't have an equation to use yet/anymore, get a new one 1457 // from the stack 1458 while (true) { 1459 if (fIterator == NULL) { 1460 if (!fStack.Pop(&fCurrent) 1461 || fCurrent == NULL 1462 || fCurrent->PrepareQuery(fVolume,fIndex,&fIterator) < B_OK) 1463 return B_ENTRY_NOT_FOUND; 1464 } 1465 if (fCurrent == NULL) 1466 RETURN_ERROR(B_ERROR); 1467 1468 status_t status = fCurrent->GetNextMatching(fVolume,fIterator,dirent,size); 1469 if (status < B_OK) { 1470 delete fIterator; 1471 fIterator = NULL; 1472 fCurrent = NULL; 1473 } else { 1474 // only return if we have another entry 1475 return B_OK; 1476 } 1477 } 1478 } 1479 1480 1481 void 1482 Query::SetLiveMode(port_id port,int32 token) 1483 { 1484 fPort = port; 1485 fToken = token; 1486 } 1487 1488 1489 void 1490 Query::LiveUpdate(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength) 1491 { 1492 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1493 return; 1494 1495 // ToDo: check if the attribute is part of the query at all... 1496 1497 status_t oldStatus = fExpression->Root()->Match(inode,attribute,type,oldKey,oldLength); 1498 status_t newStatus = fExpression->Root()->Match(inode,attribute,type,newKey,newLength); 1499 int32 op; 1500 if (oldStatus == MATCH_OK && newStatus == MATCH_OK) { 1501 // only send out a notification if the name was changed 1502 if (oldKey == NULL || strcmp(attribute,"name")) 1503 return; 1504 1505 send_notification(fPort,fToken,B_QUERY_UPDATE,B_ENTRY_REMOVED,fVolume->ID(),0,fVolume->ToVnode(inode->Parent()),0,inode->ID(),(const char *)oldKey); 1506 op = B_ENTRY_CREATED; 1507 } else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) { 1508 // nothing has changed 1509 return; 1510 } else if (oldStatus == MATCH_OK && newStatus != MATCH_OK) 1511 op = B_ENTRY_REMOVED; 1512 else 1513 op = B_ENTRY_CREATED; 1514 1515 // if "value" is NULL, send_notification() crashes... 1516 const char *value = (const char *)newKey; 1517 if (type != B_STRING_TYPE || value == NULL) 1518 value = ""; 1519 1520 send_notification(fPort,fToken,B_QUERY_UPDATE,op,fVolume->ID(),0,fVolume->ToVnode(inode->Parent()),0,inode->ID(),value); 1521 } 1522 1523