1 /* Query - query parsing and evaluation 2 * 3 * The pattern matching is roughly based on code originally written 4 * by J. Kercheval, and on code written by Kenneth Almquist, though 5 * it shares no code. 6 * 7 * Copyright 2001-2006, Axel Dörfler, axeld@pinc-software.de. 8 * This file may be used under the terms of the MIT License. 9 */ 10 11 12 #include "Query.h" 13 #include "bfs.h" 14 #include "Debug.h" 15 #include "Volume.h" 16 #include "Inode.h" 17 #include "BPlusTree.h" 18 #include "Index.h" 19 20 #include <util/kernel_cpp.h> 21 #include <SupportDefs.h> 22 #include <NodeMonitor.h> 23 #include <TypeConstants.h> 24 #include <AppDefs.h> 25 #include <fs_query.h> 26 27 #include <malloc.h> 28 #include <stdio.h> 29 #include <string.h> 30 31 32 // The parser has a very static design, but it will do what is required. 33 // 34 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator 35 // precedence, that is =,!=,>,<,>=,<= .. && .. ||. 36 // Apparently, the "!" (not) can only be used with brackets. 37 // 38 // If you think that there are too few NULL pointer checks in some places 39 // of the code, just read the beginning of the query constructor. 40 // The API is not fully available, just the Query and the Expression class 41 // are. 42 43 44 enum ops { 45 OP_NONE, 46 47 OP_AND, 48 OP_OR, 49 50 OP_EQUATION, 51 52 OP_EQUAL, 53 OP_UNEQUAL, 54 OP_GREATER_THAN, 55 OP_LESS_THAN, 56 OP_GREATER_THAN_OR_EQUAL, 57 OP_LESS_THAN_OR_EQUAL, 58 }; 59 60 enum match { 61 NO_MATCH = 0, 62 MATCH_OK = 1, 63 64 MATCH_BAD_PATTERN = -2, 65 MATCH_INVALID_CHARACTER 66 }; 67 68 // return values from isValidPattern() 69 enum { 70 PATTERN_INVALID_ESCAPE = -3, 71 PATTERN_INVALID_RANGE, 72 PATTERN_INVALID_SET 73 }; 74 75 union value { 76 int64 Int64; 77 uint64 Uint64; 78 int32 Int32; 79 uint32 Uint32; 80 float Float; 81 double Double; 82 char String[INODE_FILE_NAME_LENGTH]; 83 }; 84 85 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we 86 // don't need the whole file here; the type can't change anyway 87 #ifndef _MIME_H 88 # define B_MIME_STRING_TYPE 'MIMS' 89 #endif 90 91 class Term { 92 public: 93 Term(int8 op) : fOp(op), fParent(NULL) {} 94 virtual ~Term() {} 95 96 int8 Op() const { return fOp; } 97 98 void SetParent(Term *parent) { fParent = parent; } 99 Term *Parent() const { return fParent; } 100 101 virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0, 102 const uint8 *key = NULL,size_t size = 0) = 0; 103 virtual void Complement() = 0; 104 105 virtual void CalculateScore(Index &index) = 0; 106 virtual int32 Score() const = 0; 107 108 virtual status_t InitCheck() = 0; 109 110 #ifdef DEBUG 111 virtual void PrintToStream() = 0; 112 #endif 113 114 protected: 115 int8 fOp; 116 Term *fParent; 117 }; 118 119 // Although an Equation object is quite independent from the volume on which 120 // the query is run, there are some dependencies that are produced while 121 // querying: 122 // The type/size of the value, the score, and if it has an index or not. 123 // So you could run more than one query on the same volume, but it might return 124 // wrong values when it runs concurrently on another volume. 125 // That's not an issue right now, because we run single-threaded and don't use 126 // queries more than once. 127 128 class Equation : public Term { 129 public: 130 Equation(char **expr); 131 virtual ~Equation(); 132 133 virtual status_t InitCheck(); 134 135 status_t ParseQuotedString(char **_start, char **_end); 136 char *CopyString(char *start, char *end); 137 138 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 139 const uint8 *key = NULL, size_t size = 0); 140 virtual void Complement(); 141 142 status_t PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator, 143 bool queryNonIndexed); 144 status_t GetNextMatching(Volume *volume, TreeIterator *iterator, 145 struct dirent *dirent, size_t bufferSize); 146 147 virtual void CalculateScore(Index &index); 148 virtual int32 Score() const { return fScore; } 149 150 #ifdef DEBUG 151 virtual void PrintToStream(); 152 #endif 153 154 private: 155 Equation(const Equation &); 156 Equation &operator=(const Equation &); 157 // no implementation 158 159 status_t ConvertValue(type_code type); 160 bool CompareTo(const uint8 *value, uint16 size); 161 uint8 *Value() const { return (uint8 *)&fValue; } 162 status_t MatchEmptyString(); 163 164 char *fAttribute; 165 char *fString; 166 union value fValue; 167 type_code fType; 168 size_t fSize; 169 bool fIsPattern; 170 bool fIsSpecialTime; 171 172 int32 fScore; 173 bool fHasIndex; 174 }; 175 176 class Operator : public Term { 177 public: 178 Operator(Term *,int8,Term *); 179 virtual ~Operator(); 180 181 Term *Left() const { return fLeft; } 182 Term *Right() const { return fRight; } 183 184 virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0, 185 const uint8 *key = NULL, size_t size = 0); 186 virtual void Complement(); 187 188 virtual void CalculateScore(Index &index); 189 virtual int32 Score() const; 190 191 virtual status_t InitCheck(); 192 193 //Term *Copy() const; 194 #ifdef DEBUG 195 virtual void PrintToStream(); 196 #endif 197 198 private: 199 Operator(const Operator &); 200 Operator &operator=(const Operator &); 201 // no implementation 202 203 Term *fLeft,*fRight; 204 }; 205 206 207 //--------------------------------- 208 209 210 void 211 skipWhitespace(char **expr, int32 skip = 0) 212 { 213 char *string = (*expr) + skip; 214 while (*string == ' ' || *string == '\t') string++; 215 *expr = string; 216 } 217 218 219 void 220 skipWhitespaceReverse(char **expr,char *stop) 221 { 222 char *string = *expr; 223 while (string > stop && (*string == ' ' || *string == '\t')) string--; 224 *expr = string; 225 } 226 227 228 // #pragma mark - 229 230 231 uint32 232 utf8ToUnicode(char **string) 233 { 234 uint8 *bytes = (uint8 *)*string; 235 int32 length; 236 uint8 mask = 0x1f; 237 238 switch (bytes[0] & 0xf0) { 239 case 0xc0: 240 case 0xd0: length = 2; break; 241 case 0xe0: length = 3; break; 242 case 0xf0: 243 mask = 0x0f; 244 length = 4; 245 break; 246 default: 247 // valid 1-byte character 248 // and invalid characters 249 (*string)++; 250 return bytes[0]; 251 } 252 uint32 c = bytes[0] & mask; 253 int32 i = 1; 254 for (;i < length && (bytes[i] & 0x80) > 0;i++) 255 c = (c << 6) | (bytes[i] & 0x3f); 256 257 if (i < length) { 258 // invalid character 259 (*string)++; 260 return (uint32)bytes[0]; 261 } 262 *string += length; 263 return c; 264 } 265 266 267 int32 268 getFirstPatternSymbol(char *string) 269 { 270 char c; 271 272 for (int32 index = 0;(c = *string++);index++) { 273 if (c == '*' || c == '?' || c == '[') 274 return index; 275 } 276 return -1; 277 } 278 279 280 bool 281 isPattern(char *string) 282 { 283 return getFirstPatternSymbol(string) >= 0 ? true : false; 284 } 285 286 287 status_t 288 isValidPattern(char *pattern) 289 { 290 while (*pattern) { 291 switch (*pattern++) { 292 case '\\': 293 // the escape character must not be at the end of the pattern 294 if (!*pattern++) 295 return PATTERN_INVALID_ESCAPE; 296 break; 297 298 case '[': 299 if (pattern[0] == ']' || !pattern[0]) 300 return PATTERN_INVALID_SET; 301 302 while (*pattern != ']') { 303 if (*pattern == '\\' && !*++pattern) 304 return PATTERN_INVALID_ESCAPE; 305 306 if (!*pattern) 307 return PATTERN_INVALID_SET; 308 309 if (pattern[0] == '-' && pattern[1] == '-') 310 return PATTERN_INVALID_RANGE; 311 312 pattern++; 313 } 314 break; 315 } 316 } 317 return B_OK; 318 } 319 320 321 /** Matches the string against the given wildcard pattern. 322 * Returns either MATCH_OK, or NO_MATCH when everything went fine, 323 * or values < 0 (see enum at the top of Query.cpp) if an error 324 * occurs 325 */ 326 327 status_t 328 matchString(char *pattern, char *string) 329 { 330 while (*pattern) { 331 // end of string == valid end of pattern? 332 if (!string[0]) { 333 while (pattern[0] == '*') 334 pattern++; 335 return !pattern[0] ? MATCH_OK : NO_MATCH; 336 } 337 338 switch (*pattern++) { 339 case '?': 340 { 341 // match exactly one UTF-8 character; we are 342 // not interested in the result 343 utf8ToUnicode(&string); 344 break; 345 } 346 347 case '*': 348 { 349 // compact pattern 350 while (true) { 351 if (pattern[0] == '?') { 352 if (!*++string) 353 return NO_MATCH; 354 } else if (pattern[0] != '*') 355 break; 356 357 pattern++; 358 } 359 360 // if the pattern is done, we have matched the string 361 if (!pattern[0]) 362 return MATCH_OK; 363 364 while(true) { 365 // we have removed all occurences of '*' and '?' 366 if (pattern[0] == string[0] 367 || pattern[0] == '[' 368 || pattern[0] == '\\') { 369 status_t status = matchString(pattern,string); 370 if (status < B_OK || status == MATCH_OK) 371 return status; 372 } 373 374 // we could be nice here and just jump to the next 375 // UTF-8 character - but we wouldn't gain that much 376 // and it'd be slower (since we're checking for 377 // equality before entering the recursion) 378 if (!*++string) 379 return NO_MATCH; 380 } 381 break; 382 } 383 384 case '[': 385 { 386 bool invert = false; 387 if (pattern[0] == '^' || pattern[0] == '!') { 388 invert = true; 389 pattern++; 390 } 391 392 if (!pattern[0] || pattern[0] == ']') 393 return MATCH_BAD_PATTERN; 394 395 uint32 c = utf8ToUnicode(&string); 396 bool matched = false; 397 398 while (pattern[0] != ']') { 399 if (!pattern[0]) 400 return MATCH_BAD_PATTERN; 401 402 if (pattern[0] == '\\') 403 pattern++; 404 405 uint32 first = utf8ToUnicode(&pattern); 406 407 // Does this character match, or is this a range? 408 if (first == c) { 409 matched = true; 410 break; 411 } else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) { 412 pattern++; 413 414 if (pattern[0] == '\\') { 415 pattern++; 416 if (!pattern[0]) 417 return MATCH_BAD_PATTERN; 418 } 419 uint32 last = utf8ToUnicode(&pattern); 420 421 if (c >= first && c <= last) { 422 matched = true; 423 break; 424 } 425 } 426 } 427 428 if (invert) 429 matched = !matched; 430 431 if (matched) { 432 while (pattern[0] != ']') { 433 if (!pattern[0]) 434 return MATCH_BAD_PATTERN; 435 pattern++; 436 } 437 pattern++; 438 break; 439 } 440 return NO_MATCH; 441 } 442 443 case '\\': 444 if (!pattern[0]) 445 return MATCH_BAD_PATTERN; 446 // supposed to fall through 447 default: 448 if (pattern[-1] != string[0]) 449 return NO_MATCH; 450 string++; 451 break; 452 } 453 } 454 455 if (string[0]) 456 return NO_MATCH; 457 458 return MATCH_OK; 459 } 460 461 462 // #pragma mark - 463 464 465 Equation::Equation(char **expr) 466 : Term(OP_EQUATION), 467 fAttribute(NULL), 468 fString(NULL), 469 fType(0), 470 fIsPattern(false) 471 { 472 char *string = *expr; 473 char *start = string; 474 char *end = NULL; 475 476 // Since the equation is the integral part of any query, we're just parsing 477 // the whole thing here. 478 // The whitespace at the start is already removed in Expression::ParseEquation() 479 480 if (*start == '"' || *start == '\'') { 481 // string is quoted (start has to be on the beginning of a string) 482 if (ParseQuotedString(&start, &end) < B_OK) 483 return; 484 485 // set string to a valid start of the equation symbol 486 string = end + 2; 487 skipWhitespace(&string); 488 if (*string != '=' && *string != '<' && *string != '>' && *string != '!') { 489 *expr = string; 490 return; 491 } 492 } else { 493 // search the (in)equation for the actual equation symbol (and for other operators 494 // in case the equation is malformed) 495 while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!' 496 && *string != '&' && *string != '|') 497 string++; 498 499 // get the attribute string (and trim whitespace), in case 500 // the string was not quoted 501 end = string - 1; 502 skipWhitespaceReverse(&end, start); 503 } 504 505 // attribute string is empty (which is not allowed) 506 if (start > end) 507 return; 508 509 // at this point, "start" points to the beginning of the string, "end" points 510 // to the last character of the string, and "string" points to the first 511 // character of the equation symbol 512 513 // test for the right symbol (as this doesn't need any memory) 514 switch (*string) { 515 case '=': 516 fOp = OP_EQUAL; 517 break; 518 case '>': 519 fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN; 520 break; 521 case '<': 522 fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN; 523 break; 524 case '!': 525 if (*(string + 1) != '=') 526 return; 527 fOp = OP_UNEQUAL; 528 break; 529 530 // any invalid characters will be rejected 531 default: 532 *expr = string; 533 return; 534 } 535 // lets change "start" to point to the first character after the symbol 536 if (*(string + 1) == '=') 537 string++; 538 string++; 539 skipWhitespace(&string); 540 541 // allocate & copy the attribute string 542 543 fAttribute = CopyString(start, end); 544 if (fAttribute == NULL) 545 return; 546 547 start = string; 548 if (*start == '"' || *start == '\'') { 549 // string is quoted (start has to be on the beginning of a string) 550 if (ParseQuotedString(&start, &end) < B_OK) 551 return; 552 553 string = end + 2; 554 skipWhitespace(&string); 555 } else { 556 while (*string && *string != '&' && *string != '|' && *string != ')') 557 string++; 558 559 end = string - 1; 560 skipWhitespaceReverse(&end, start); 561 } 562 563 // at this point, "start" will point to the first character of the value, 564 // "end" will point to its last character, and "start" to the first non- 565 // whitespace character after the value string 566 567 fString = CopyString(start, end); 568 if (fString == NULL) 569 return; 570 571 // patterns are only allowed for these operations (and strings) 572 if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) { 573 fIsPattern = isPattern(fString); 574 if (fIsPattern && isValidPattern(fString) < B_OK) { 575 // we only want to have valid patterns; setting fString 576 // to NULL will cause InitCheck() to fail 577 free(fString); 578 fString = NULL; 579 } 580 } 581 582 // The special time flag is set if the time values are shifted 583 // 64-bit values to reduce the number of duplicates. 584 // We have to be able to compare them against unshifted values 585 // later. The only index which needs this is the last_modified 586 // index, but we may want to open that feature for other indices, 587 // too one day. 588 fIsSpecialTime = !strcmp(fAttribute, "last_modified"); 589 590 *expr = string; 591 } 592 593 594 Equation::~Equation() 595 { 596 if (fAttribute != NULL) 597 free(fAttribute); 598 if (fString != NULL) 599 free(fString); 600 } 601 602 603 status_t 604 Equation::InitCheck() 605 { 606 if (fAttribute == NULL 607 || fString == NULL 608 || fOp == OP_NONE) 609 return B_BAD_VALUE; 610 611 return B_OK; 612 } 613 614 615 status_t 616 Equation::ParseQuotedString(char **_start, char **_end) 617 { 618 char *start = *_start; 619 char quote = *start++; 620 char *end = start; 621 622 for (;*end && *end != quote;end++) { 623 if (*end == '\\') 624 end++; 625 } 626 if (*end == '\0') 627 return B_BAD_VALUE; 628 629 *_start = start; 630 *_end = end - 1; 631 632 return B_OK; 633 } 634 635 636 char * 637 Equation::CopyString(char *start, char *end) 638 { 639 // end points to the last character of the string - and the length 640 // also has to include the null-termination 641 int32 length = end + 2 - start; 642 // just to make sure; since that's the max. attribute name length and 643 // the max. string in an index, it make sense to have it that way 644 if (length > INODE_FILE_NAME_LENGTH || length <= 0) 645 return NULL; 646 647 char *copy = (char *)malloc(length); 648 if (copy == NULL) 649 return NULL; 650 651 memcpy(copy,start,length - 1); 652 copy[length - 1] = '\0'; 653 654 return copy; 655 } 656 657 658 status_t 659 Equation::ConvertValue(type_code type) 660 { 661 // Has the type already been converted? 662 if (type == fType) 663 return B_OK; 664 665 char *string = fString; 666 667 switch (type) { 668 case B_MIME_STRING_TYPE: 669 type = B_STRING_TYPE; 670 // supposed to fall through 671 case B_STRING_TYPE: 672 strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH); 673 fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0'; 674 fSize = strlen(fValue.String); 675 break; 676 case B_INT32_TYPE: 677 fValue.Int32 = strtol(string, &string, 0); 678 fSize = sizeof(int32); 679 break; 680 case B_UINT32_TYPE: 681 fValue.Int32 = strtoul(string, &string, 0); 682 fSize = sizeof(uint32); 683 break; 684 case B_INT64_TYPE: 685 fValue.Int64 = strtoll(string, &string, 0); 686 fSize = sizeof(int64); 687 break; 688 case B_UINT64_TYPE: 689 fValue.Uint64 = strtoull(string, &string, 0); 690 fSize = sizeof(uint64); 691 break; 692 case B_FLOAT_TYPE: 693 fValue.Float = strtod(string, &string); 694 fSize = sizeof(float); 695 break; 696 case B_DOUBLE_TYPE: 697 fValue.Double = strtod(string, &string); 698 fSize = sizeof(double); 699 break; 700 default: 701 FATAL(("query value conversion to 0x%lx requested!\n", type)); 702 // should we fail here or just do a safety int32 conversion? 703 return B_ERROR; 704 } 705 706 fType = type; 707 708 // patterns are only allowed for string types 709 if (fType != B_STRING_TYPE && fIsPattern) 710 fIsPattern = false; 711 712 return B_OK; 713 } 714 715 716 /** Returns true when the key matches the equation. You have to 717 * call ConvertValue() before this one. 718 */ 719 720 bool 721 Equation::CompareTo(const uint8 *value, uint16 size) 722 { 723 int32 compare; 724 725 // fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL 726 if (fIsPattern) { 727 // we have already validated the pattern, so we don't check for failing 728 // here - if something is broken, and matchString() returns an error, 729 // we just don't match 730 compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1; 731 } else if (fIsSpecialTime) { 732 // the index is a shifted int64 index, but we have to match 733 // against an unshifted value (i.e. the last_modified index) 734 int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT; 735 compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64)); 736 } else 737 compare = compareKeys(fType, value, size, Value(), fSize); 738 739 switch (fOp) { 740 case OP_EQUAL: 741 return compare == 0; 742 case OP_UNEQUAL: 743 return compare != 0; 744 case OP_LESS_THAN: 745 return compare < 0; 746 case OP_LESS_THAN_OR_EQUAL: 747 return compare <= 0; 748 case OP_GREATER_THAN: 749 return compare > 0; 750 case OP_GREATER_THAN_OR_EQUAL: 751 return compare >= 0; 752 } 753 FATAL(("Unknown/Unsupported operation: %d\n", fOp)); 754 return false; 755 } 756 757 758 void 759 Equation::Complement() 760 { 761 D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) { 762 FATAL(("op out of range!")); 763 return; 764 }); 765 766 int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL, 767 OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN}; 768 fOp = complementOp[fOp - OP_EQUAL]; 769 } 770 771 772 status_t 773 Equation::MatchEmptyString() 774 { 775 // there is no matching attribute, we will just bail out if we 776 // already know that our value is not of a string type. 777 // If not, it will be converted to a string - and then be compared with "". 778 // That's why we have to call ConvertValue() here - but it will be 779 // a cheap call for the next time 780 // Should we do this only for OP_UNEQUAL? 781 if (fType != 0 && fType != B_STRING_TYPE) 782 return NO_MATCH; 783 784 status_t status = ConvertValue(B_STRING_TYPE); 785 if (status == B_OK) 786 status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH; 787 788 return status; 789 } 790 791 792 /** Matches the inode's attribute value with the equation. 793 * Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong 794 */ 795 796 status_t 797 Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size) 798 { 799 // get a pointer to the attribute in question 800 NodeGetter nodeGetter(inode->GetVolume()); 801 union value value; 802 uint8 *buffer = (uint8 *)&value; 803 bool locked = false; 804 805 // first, check if we are matching for a live query and use that value 806 if (attributeName != NULL && !strcmp(fAttribute, attributeName)) { 807 if (key == NULL) { 808 if (type == B_STRING_TYPE) 809 return MatchEmptyString(); 810 811 return NO_MATCH; 812 } 813 buffer = const_cast<uint8 *>(key); 814 } else if (!strcmp(fAttribute, "name")) { 815 // we need to lock before accessing Inode::Name() 816 nodeGetter.SetToNode(inode); 817 818 inode->SmallDataLock().Lock(); 819 locked = true; 820 821 // if not, check for "fake" attributes, "name", "size", "last_modified", 822 buffer = (uint8 *)inode->Name(nodeGetter.Node()); 823 if (buffer == NULL) { 824 inode->SmallDataLock().Unlock(); 825 return B_ERROR; 826 } 827 828 type = B_STRING_TYPE; 829 size = strlen((const char *)buffer); 830 } else if (!strcmp(fAttribute, "size")) { 831 #ifdef BFS_NATIVE_ENDIAN 832 buffer = (uint8 *)&inode->Node().data.size; 833 #else 834 value.Int64 = inode->Size(); 835 #endif 836 type = B_INT64_TYPE; 837 } else if (!strcmp(fAttribute, "last_modified")) { 838 #ifdef BFS_NATIVE_ENDIAN 839 buffer = (uint8 *)&inode->Node().last_modified_time; 840 #else 841 value.Int64 = inode->Node().LastModifiedTime(); 842 #endif 843 type = B_INT64_TYPE; 844 } else { 845 // then for attributes in the small_data section, and finally for the 846 // real attributes 847 nodeGetter.SetToNode(inode); 848 Inode *attribute; 849 850 inode->SmallDataLock().Lock(); 851 small_data *smallData = inode->FindSmallData(nodeGetter.Node(), fAttribute); 852 if (smallData != NULL) { 853 buffer = smallData->Data(); 854 type = smallData->type; 855 size = smallData->data_size; 856 locked = true; 857 } else { 858 // needed to unlock the small_data section as fast as possible 859 inode->SmallDataLock().Unlock(); 860 nodeGetter.Unset(); 861 862 if (inode->GetAttribute(fAttribute, &attribute) == B_OK) { 863 buffer = (uint8 *)&value; 864 type = attribute->Type(); 865 size = attribute->Size(); 866 867 if (size > INODE_FILE_NAME_LENGTH) 868 size = INODE_FILE_NAME_LENGTH; 869 870 if (attribute->ReadAt(0, buffer, &size) < B_OK) { 871 inode->ReleaseAttribute(attribute); 872 return B_IO_ERROR; 873 } 874 inode->ReleaseAttribute(attribute); 875 } else 876 return MatchEmptyString(); 877 } 878 } 879 // prepare own value for use, if it is possible to convert it 880 status_t status = ConvertValue(type); 881 if (status == B_OK) 882 status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH; 883 884 if (locked) 885 inode->SmallDataLock().Unlock(); 886 887 RETURN_ERROR(status); 888 } 889 890 891 void 892 Equation::CalculateScore(Index &index) 893 { 894 // As always, these values could be tuned and refined. 895 // And the code could also need some real world testing :-) 896 897 // do we have to operate on a "foreign" index? 898 if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) { 899 fScore = 0; 900 return; 901 } 902 903 // if we have a pattern, how much does it help our search? 904 if (fIsPattern) 905 fScore = getFirstPatternSymbol(fString) << 3; 906 else { 907 // Score by operator 908 if (fOp == OP_EQUAL) 909 // higher than pattern="255 chars+*" 910 fScore = 2048; 911 else 912 // the pattern search is regarded cheaper when you have at 913 // least one character to set your index to 914 fScore = 5; 915 } 916 917 // take index size into account (1024 is the current node size 918 // in our B+trees) 919 // 2048 * 2048 == 4194304 is the maximum score (for an empty 920 // tree, since the header + 1 node are already 2048 bytes) 921 fScore = fScore * ((2048 * 1024LL) / index.Node()->Size()); 922 } 923 924 925 status_t 926 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator, bool queryNonIndexed) 927 { 928 status_t status = index.SetTo(fAttribute); 929 930 // if we should query attributes without an index, we can just proceed here 931 if (status < B_OK && !queryNonIndexed) 932 return B_ENTRY_NOT_FOUND; 933 934 type_code type; 935 936 // special case for OP_UNEQUAL - it will always operate through the whole index 937 // but we need the call to the original index to get the correct type 938 if (status < B_OK || fOp == OP_UNEQUAL) { 939 // Try to get an index that holds all files (name) 940 // Also sets the default type for all attributes without index 941 // to string. 942 type = status < B_OK ? B_STRING_TYPE : index.Type(); 943 944 if (index.SetTo("name") < B_OK) 945 return B_ENTRY_NOT_FOUND; 946 947 fHasIndex = false; 948 } else { 949 fHasIndex = true; 950 type = index.Type(); 951 } 952 953 if (ConvertValue(type) < B_OK) 954 return B_BAD_VALUE; 955 956 BPlusTree *tree; 957 if (index.Node()->GetTree(&tree) < B_OK) 958 return B_ERROR; 959 960 *iterator = new TreeIterator(tree); 961 if (*iterator == NULL) 962 return B_NO_MEMORY; 963 964 if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL 965 || fIsPattern) 966 && fHasIndex) { 967 // set iterator to the exact position 968 969 int32 keySize = index.KeySize(); 970 971 // at this point, fIsPattern is only true if it's a string type, and fOp 972 // is either OP_EQUAL or OP_UNEQUAL 973 if (fIsPattern) { 974 // let's see if we can use the beginning of the key for positioning 975 // the iterator and adjust the key size; if not, just leave the 976 // iterator at the start and return success 977 keySize = getFirstPatternSymbol(fString); 978 if (keySize <= 0) 979 return B_OK; 980 } 981 982 if (keySize == 0) { 983 // B_STRING_TYPE doesn't have a fixed length, so it was set 984 // to 0 before - we compute the correct value here 985 if (fType == B_STRING_TYPE) { 986 keySize = strlen(fValue.String); 987 988 // The empty string is a special case - we normally don't check 989 // for the trailing null byte, in the case for the empty string 990 // we do it explicitly, because there can't be keys in the B+tree 991 // with a length of zero 992 if (keySize == 0) 993 keySize = 1; 994 } else 995 RETURN_ERROR(B_ENTRY_NOT_FOUND); 996 } 997 998 if (fIsSpecialTime) { 999 // we have to find the first matching shifted value 1000 off_t value = fValue.Int64 << INODE_TIME_SHIFT; 1001 status = (*iterator)->Find((uint8 *)&value, keySize); 1002 if (status == B_ENTRY_NOT_FOUND) 1003 return B_OK; 1004 } else { 1005 status = (*iterator)->Find(Value(), keySize); 1006 if (fOp == OP_EQUAL && !fIsPattern) 1007 return status; 1008 else if (status == B_ENTRY_NOT_FOUND 1009 && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL)) 1010 return B_OK; 1011 } 1012 1013 RETURN_ERROR(status); 1014 } 1015 1016 return B_OK; 1017 } 1018 1019 1020 status_t 1021 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator, 1022 struct dirent *dirent, size_t bufferSize) 1023 { 1024 while (true) { 1025 union value indexValue; 1026 uint16 keyLength; 1027 uint16 duplicate; 1028 off_t offset; 1029 1030 status_t status = iterator->GetNextEntry(&indexValue, &keyLength, 1031 (uint16)sizeof(indexValue), &offset, &duplicate); 1032 if (status < B_OK) 1033 return status; 1034 1035 // only compare against the index entry when this is the correct 1036 // index for the equation 1037 if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) { 1038 // They aren't equal? let the operation decide what to do 1039 // Since we always start at the beginning of the index (or the correct 1040 // position), only some needs to be stopped if the entry doesn't fit. 1041 if (fOp == OP_LESS_THAN 1042 || fOp == OP_LESS_THAN_OR_EQUAL 1043 || (fOp == OP_EQUAL && !fIsPattern)) 1044 return B_ENTRY_NOT_FOUND; 1045 1046 if (duplicate > 0) 1047 iterator->SkipDuplicates(); 1048 continue; 1049 } 1050 1051 Vnode vnode(volume, offset); 1052 Inode *inode; 1053 if ((status = vnode.Get(&inode)) != B_OK) { 1054 REPORT_ERROR(status); 1055 FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute)); 1056 // try with next 1057 continue; 1058 } 1059 1060 // ToDo: check user permissions here - but which one?! 1061 // we could filter out all those where we don't have 1062 // read access... (we should check for every parent 1063 // directory if the X_OK is allowed) 1064 // Although it's quite expensive to open all parents, 1065 // it's likely that the application that runs the 1066 // query will do something similar (and we don't have 1067 // to do it for root, either). 1068 1069 // go up in the tree until a &&-operator is found, and check if the 1070 // inode matches with the rest of the expression - we don't have to 1071 // check ||-operators for that 1072 Term *term = this; 1073 status = MATCH_OK; 1074 1075 if (!fHasIndex) 1076 status = Match(inode); 1077 1078 while (term != NULL && status == MATCH_OK) { 1079 Operator *parent = (Operator *)term->Parent(); 1080 if (parent == NULL) 1081 break; 1082 1083 if (parent->Op() == OP_AND) { 1084 // choose the other child of the parent 1085 Term *other = parent->Right(); 1086 if (other == term) 1087 other = parent->Left(); 1088 1089 if (other == NULL) { 1090 FATAL(("&&-operator has only one child... (parent = %p)\n", parent)); 1091 break; 1092 } 1093 status = other->Match(inode); 1094 if (status < 0) { 1095 REPORT_ERROR(status); 1096 status = NO_MATCH; 1097 } 1098 } 1099 term = (Term *)parent; 1100 } 1101 1102 if (status == MATCH_OK) { 1103 dirent->d_dev = volume->ID(); 1104 dirent->d_ino = offset; 1105 dirent->d_pdev = volume->ID(); 1106 dirent->d_pino = volume->ToVnode(inode->Parent()); 1107 1108 if (inode->GetName(dirent->d_name) < B_OK) 1109 FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber())); 1110 1111 dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name); 1112 } 1113 1114 if (status == MATCH_OK) 1115 return B_OK; 1116 } 1117 RETURN_ERROR(B_ERROR); 1118 } 1119 1120 1121 // #pragma mark - 1122 1123 1124 Operator::Operator(Term *left, int8 op, Term *right) 1125 : Term(op), 1126 fLeft(left), 1127 fRight(right) 1128 { 1129 if (left) 1130 left->SetParent(this); 1131 if (right) 1132 right->SetParent(this); 1133 } 1134 1135 1136 Operator::~Operator() 1137 { 1138 delete fLeft; 1139 delete fRight; 1140 } 1141 1142 1143 status_t 1144 Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size) 1145 { 1146 if (fOp == OP_AND) { 1147 status_t status = fLeft->Match(inode, attribute, type, key, size); 1148 if (status != MATCH_OK) 1149 return status; 1150 1151 return fRight->Match(inode, attribute, type, key, size); 1152 } else { 1153 // choose the term with the better score for OP_OR 1154 if (fRight->Score() > fLeft->Score()) { 1155 status_t status = fRight->Match(inode, attribute, type, key, size); 1156 if (status != NO_MATCH) 1157 return status; 1158 } 1159 return fLeft->Match(inode, attribute, type, key, size); 1160 } 1161 } 1162 1163 1164 void 1165 Operator::Complement() 1166 { 1167 if (fOp == OP_AND) 1168 fOp = OP_OR; 1169 else 1170 fOp = OP_AND; 1171 1172 fLeft->Complement(); 1173 fRight->Complement(); 1174 } 1175 1176 1177 void 1178 Operator::CalculateScore(Index &index) 1179 { 1180 fLeft->CalculateScore(index); 1181 fRight->CalculateScore(index); 1182 } 1183 1184 1185 int32 1186 Operator::Score() const 1187 { 1188 if (fOp == OP_AND) { 1189 // return the one with the better score 1190 if (fRight->Score() > fLeft->Score()) 1191 return fRight->Score(); 1192 1193 return fLeft->Score(); 1194 } 1195 1196 // for OP_OR, be honest, and return the one with the worse score 1197 if (fRight->Score() < fLeft->Score()) 1198 return fRight->Score(); 1199 1200 return fLeft->Score(); 1201 } 1202 1203 1204 status_t 1205 Operator::InitCheck() 1206 { 1207 if (fOp != OP_AND && fOp != OP_OR 1208 || fLeft == NULL || fLeft->InitCheck() < B_OK 1209 || fRight == NULL || fRight->InitCheck() < B_OK) 1210 return B_ERROR; 1211 1212 return B_OK; 1213 } 1214 1215 1216 #if 0 1217 Term * 1218 Operator::Copy() const 1219 { 1220 if (fEquation != NULL) { 1221 Equation *equation = new Equation(*fEquation); 1222 if (equation == NULL) 1223 return NULL; 1224 1225 Term *term = new Term(equation); 1226 if (term == NULL) 1227 delete equation; 1228 1229 return term; 1230 } 1231 1232 Term *left = NULL, *right = NULL; 1233 1234 if (fLeft != NULL && (left = fLeft->Copy()) == NULL) 1235 return NULL; 1236 if (fRight != NULL && (right = fRight->Copy()) == NULL) { 1237 delete left; 1238 return NULL; 1239 } 1240 1241 Term *term = new Term(left,fOp,right); 1242 if (term == NULL) { 1243 delete left; 1244 delete right; 1245 return NULL; 1246 } 1247 return term; 1248 } 1249 #endif 1250 1251 1252 // #pragma mark - 1253 1254 #ifdef DEBUG 1255 void 1256 Operator::PrintToStream() 1257 { 1258 D(__out("( ")); 1259 if (fLeft != NULL) 1260 fLeft->PrintToStream(); 1261 1262 char *op; 1263 switch (fOp) { 1264 case OP_OR: op = "OR"; break; 1265 case OP_AND: op = "AND"; break; 1266 default: op = "?"; break; 1267 } 1268 D(__out(" %s ",op)); 1269 1270 if (fRight != NULL) 1271 fRight->PrintToStream(); 1272 1273 D(__out(" )")); 1274 } 1275 1276 1277 void 1278 Equation::PrintToStream() 1279 { 1280 char *symbol = "???"; 1281 switch (fOp) { 1282 case OP_EQUAL: symbol = "=="; break; 1283 case OP_UNEQUAL: symbol = "!="; break; 1284 case OP_GREATER_THAN: symbol = ">"; break; 1285 case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break; 1286 case OP_LESS_THAN: symbol = "<"; break; 1287 case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break; 1288 } 1289 D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString)); 1290 } 1291 1292 #endif /* DEBUG */ 1293 1294 // #pragma mark - 1295 1296 1297 Expression::Expression(char *expr) 1298 { 1299 if (expr == NULL) 1300 return; 1301 1302 fTerm = ParseOr(&expr); 1303 if (fTerm != NULL && fTerm->InitCheck() < B_OK) { 1304 FATAL(("Corrupt tree in expression!\n")); 1305 delete fTerm; 1306 fTerm = NULL; 1307 } 1308 D(if (fTerm != NULL) { 1309 fTerm->PrintToStream(); 1310 D(__out("\n")); 1311 if (*expr != '\0') 1312 PRINT(("Unexpected end of string: \"%s\"!\n", expr)); 1313 }); 1314 fPosition = expr; 1315 } 1316 1317 1318 Expression::~Expression() 1319 { 1320 delete fTerm; 1321 } 1322 1323 1324 Term * 1325 Expression::ParseEquation(char **expr) 1326 { 1327 skipWhitespace(expr); 1328 1329 bool _not = false; 1330 if (**expr == '!') { 1331 skipWhitespace(expr, 1); 1332 if (**expr != '(') 1333 return NULL; 1334 1335 _not = true; 1336 } 1337 1338 if (**expr == ')') { 1339 // shouldn't be handled here 1340 return NULL; 1341 } else if (**expr == '(') { 1342 skipWhitespace(expr, 1); 1343 1344 Term *term = ParseOr(expr); 1345 1346 skipWhitespace(expr); 1347 1348 if (**expr != ')') { 1349 delete term; 1350 return NULL; 1351 } 1352 1353 // If the term is negated, we just complement the tree, to get 1354 // rid of the not, a.k.a. DeMorgan's Law. 1355 if (_not) 1356 term->Complement(); 1357 1358 skipWhitespace(expr, 1); 1359 1360 return term; 1361 } 1362 1363 Equation *equation = new Equation(expr); 1364 if (equation == NULL || equation->InitCheck() < B_OK) { 1365 delete equation; 1366 return NULL; 1367 } 1368 return equation; 1369 } 1370 1371 1372 Term * 1373 Expression::ParseAnd(char **expr) 1374 { 1375 Term *left = ParseEquation(expr); 1376 if (left == NULL) 1377 return NULL; 1378 1379 while (IsOperator(expr,'&')) { 1380 Term *right = ParseAnd(expr); 1381 Term *newParent = NULL; 1382 1383 if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) { 1384 delete left; 1385 delete right; 1386 1387 return NULL; 1388 } 1389 left = newParent; 1390 } 1391 1392 return left; 1393 } 1394 1395 1396 Term * 1397 Expression::ParseOr(char **expr) 1398 { 1399 Term *left = ParseAnd(expr); 1400 if (left == NULL) 1401 return NULL; 1402 1403 while (IsOperator(expr,'|')) { 1404 Term *right = ParseAnd(expr); 1405 Term *newParent = NULL; 1406 1407 if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) { 1408 delete left; 1409 delete right; 1410 1411 return NULL; 1412 } 1413 left = newParent; 1414 } 1415 1416 return left; 1417 } 1418 1419 1420 bool 1421 Expression::IsOperator(char **expr, char op) 1422 { 1423 char *string = *expr; 1424 1425 if (*string == op && *(string + 1) == op) { 1426 *expr += 2; 1427 return true; 1428 } 1429 return false; 1430 } 1431 1432 1433 status_t 1434 Expression::InitCheck() 1435 { 1436 if (fTerm == NULL) 1437 return B_BAD_VALUE; 1438 1439 return B_OK; 1440 } 1441 1442 1443 // #pragma mark - 1444 1445 1446 Query::Query(Volume *volume, Expression *expression, uint32 flags) 1447 : 1448 fVolume(volume), 1449 fExpression(expression), 1450 fCurrent(NULL), 1451 fIterator(NULL), 1452 fIndex(volume), 1453 fFlags(flags), 1454 fPort(-1) 1455 { 1456 // if the expression has a valid root pointer, the whole tree has 1457 // already passed the sanity check, so that we don't have to check 1458 // every pointer 1459 if (volume == NULL || expression == NULL || expression->Root() == NULL) 1460 return; 1461 1462 // create index on the stack and delete it afterwards 1463 fExpression->Root()->CalculateScore(fIndex); 1464 fIndex.Unset(); 1465 1466 Rewind(); 1467 1468 if (fFlags & B_LIVE_QUERY) 1469 volume->AddQuery(this); 1470 } 1471 1472 1473 Query::~Query() 1474 { 1475 if (fFlags & B_LIVE_QUERY) 1476 fVolume->RemoveQuery(this); 1477 } 1478 1479 1480 status_t 1481 Query::Rewind() 1482 { 1483 // free previous stuff 1484 1485 fStack.MakeEmpty(); 1486 1487 delete fIterator; 1488 fIterator = NULL; 1489 fCurrent = NULL; 1490 1491 // put the whole expression on the stack 1492 1493 Stack<Term *> stack; 1494 stack.Push(fExpression->Root()); 1495 1496 Term *term; 1497 while (stack.Pop(&term)) { 1498 if (term->Op() < OP_EQUATION) { 1499 Operator *op = (Operator *)term; 1500 1501 if (op->Op() == OP_OR) { 1502 stack.Push(op->Left()); 1503 stack.Push(op->Right()); 1504 } else { 1505 // For OP_AND, we can use the scoring system to decide which path to add 1506 if (op->Right()->Score() > op->Left()->Score()) 1507 stack.Push(op->Right()); 1508 else 1509 stack.Push(op->Left()); 1510 } 1511 } else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK) 1512 FATAL(("Unknown term on stack or stack error")); 1513 } 1514 1515 return B_OK; 1516 } 1517 1518 1519 status_t 1520 Query::GetNextEntry(struct dirent *dirent, size_t size) 1521 { 1522 // If we don't have an equation to use yet/anymore, get a new one 1523 // from the stack 1524 while (true) { 1525 if (fIterator == NULL) { 1526 if (!fStack.Pop(&fCurrent) 1527 || fCurrent == NULL 1528 || fCurrent->PrepareQuery(fVolume, fIndex, &fIterator, 1529 fFlags & B_QUERY_NON_INDEXED) < B_OK) 1530 return B_ENTRY_NOT_FOUND; 1531 } 1532 if (fCurrent == NULL) 1533 RETURN_ERROR(B_ERROR); 1534 1535 status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size); 1536 if (status < B_OK) { 1537 delete fIterator; 1538 fIterator = NULL; 1539 fCurrent = NULL; 1540 } else { 1541 // only return if we have another entry 1542 return B_OK; 1543 } 1544 } 1545 } 1546 1547 1548 void 1549 Query::SetLiveMode(port_id port, int32 token) 1550 { 1551 fPort = port; 1552 fToken = token; 1553 1554 if ((fFlags & B_LIVE_QUERY) == 0) { 1555 // you can decide at any point to set the live query mode, 1556 // only live queries have to be updated by attribute changes 1557 fFlags |= B_LIVE_QUERY; 1558 fVolume->AddQuery(this); 1559 } 1560 } 1561 1562 1563 void 1564 Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey, 1565 size_t oldLength, const uint8 *newKey, size_t newLength) 1566 { 1567 if (fPort < 0 || fExpression == NULL || attribute == NULL) 1568 return; 1569 1570 // ToDo: check if the attribute is part of the query at all... 1571 1572 status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength); 1573 status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength); 1574 1575 const char *name = NULL; 1576 bool entryCreated; 1577 1578 if (oldStatus != MATCH_OK) { 1579 if (newStatus != MATCH_OK) { 1580 // nothing has changed 1581 return; 1582 } 1583 entryCreated = true; 1584 } else if (newStatus != MATCH_OK) { 1585 // entry got removed 1586 entryCreated = false; 1587 } else { 1588 // the entry stays in the query - only notify in case the name of the inode was changed 1589 if (oldKey == NULL || strcmp(attribute, "name")) 1590 return; 1591 1592 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1593 fVolume->ToVnode(inode->Parent()), (const char *)oldKey, inode->ID()); 1594 name = (const char *)newKey; 1595 entryCreated = true; 1596 } 1597 1598 // we may need to get the name of the inode 1599 1600 char nameBuffer[B_FILE_NAME_LENGTH]; 1601 1602 if (name == NULL) { 1603 if (strcmp(attribute, "name")) { 1604 if (inode->GetName(nameBuffer) != B_OK) 1605 nameBuffer[0] = '\0'; 1606 name = nameBuffer; 1607 } else { 1608 // a shortcut to prevent having to scan the attribute section 1609 name = (const char *)newKey; 1610 } 1611 } 1612 1613 // notify query listeners 1614 1615 if (entryCreated) { 1616 notify_query_entry_created(fPort, fToken, fVolume->ID(), 1617 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1618 } else { 1619 notify_query_entry_removed(fPort, fToken, fVolume->ID(), 1620 fVolume->ToVnode(inode->Parent()), name, inode->ID()); 1621 } 1622 } 1623 1624