xref: /haiku/src/add-ons/kernel/file_systems/bfs/Query.cpp (revision da199a7e781d23415247118bb116b833b0025337)
1 /* Query - query parsing and evaluation
2 **
3 ** Initial version by Axel Dörfler, axeld@pinc-software.de
4 ** The pattern matching is roughly based on code originally written
5 ** by J. Kercheval, and on code written by Kenneth Almquist, though
6 ** it shares no code.
7 **
8 ** This file may be used under the terms of the OpenBeOS License.
9 */
10 
11 
12 #include "Query.h"
13 #include "cpp.h"
14 #include "bfs.h"
15 #include "Debug.h"
16 #include "Stack.h"
17 #include "Volume.h"
18 #include "Inode.h"
19 #include "BPlusTree.h"
20 #include "Index.h"
21 
22 #include <SupportDefs.h>
23 #include <TypeConstants.h>
24 #include <AppDefs.h>
25 
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 
31 // The parser has a very static design, but it will do what is required.
32 //
33 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
34 // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
35 // Apparently, the "!" (not) can only be used with brackets.
36 //
37 // If you think that there are too few NULL pointer checks in some places
38 // of the code, just read the beginning of the query constructor.
39 // The API is not fully available, just the Query and the Expression class
40 // are.
41 
42 
43 enum ops {
44 	OP_NONE,
45 
46 	OP_AND,
47 	OP_OR,
48 
49 	OP_EQUATION,
50 
51 	OP_EQUAL,
52 	OP_UNEQUAL,
53 	OP_GREATER_THAN,
54 	OP_LESS_THAN,
55 	OP_GREATER_THAN_OR_EQUAL,
56 	OP_LESS_THAN_OR_EQUAL,
57 };
58 
59 enum match {
60 	NO_MATCH = 0,
61 	MATCH_OK = 1,
62 
63 	MATCH_BAD_PATTERN = -2,
64 	MATCH_INVALID_CHARACTER
65 };
66 
67 // return values from isValidPattern()
68 enum {
69 	PATTERN_INVALID_ESCAPE = -3,
70 	PATTERN_INVALID_RANGE,
71 	PATTERN_INVALID_SET
72 };
73 
74 union value {
75 	int64	Int64;
76 	uint64	Uint64;
77 	int32	Int32;
78 	uint32	Uint32;
79 	float	Float;
80 	double	Double;
81 	char	String[INODE_FILE_NAME_LENGTH];
82 };
83 
84 class Term {
85 	public:
86 		Term(int8 op) : fOp(op), fParent(NULL) {}
87 
88 		int8		Op() const { return fOp; }
89 
90 		void		SetParent(Term *parent) { fParent = parent; }
91 		Term		*Parent() const { return fParent; }
92 
93 		virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,
94 				const uint8 *key = NULL,size_t size = 0) = 0;
95 		virtual void Complement() = 0;
96 
97 		virtual void CalculateScore(Index &index) = 0;
98 		virtual int32 Score() const = 0;
99 
100 		virtual status_t InitCheck() = 0;
101 
102 #ifdef DEBUG
103 		virtual void	PrintToStream() = 0;
104 #endif
105 
106 	protected:
107 		int8	fOp;
108 		Term	*fParent;
109 };
110 
111 // Although an Equation object is quite independent from the volume on which
112 // the query is run, there are some dependencies that are produced while
113 // querying:
114 // The type/size of the value, the score, and if it has an index or not.
115 // So you could run more than one query on the same volume, but it might return
116 // wrong values when it runs concurrently on another volume.
117 // That's not an issue right now, because we run single-threaded and don't use
118 // queries more than once.
119 
120 class Equation : public Term {
121 	public:
122 		Equation(char **expr);
123 		~Equation();
124 
125 		virtual status_t InitCheck();
126 
127 		status_t	ParseQuotedString(char **_start,char **_end);
128 		char		*CopyString(char *start, char *end);
129 
130 		virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,const uint8 *key = NULL,size_t size = 0);
131 		virtual void Complement();
132 
133 		status_t	PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator);
134 		status_t	GetNextMatching(Volume *volume,TreeIterator *iterator,struct dirent *dirent,size_t bufferSize);
135 
136 		virtual void CalculateScore(Index &index);
137 		virtual int32 Score() const { return fScore; }
138 
139 #ifdef DEBUG
140 		virtual void PrintToStream();
141 #endif
142 
143 	private:
144 		Equation(const Equation &);
145 		Equation &operator=(const Equation &);
146 			// no implementation
147 
148 		status_t	ConvertValue(type_code type);
149 		bool		CompareTo(const uint8 *value, uint16 size);
150 		uint8		*Value() const { return (uint8 *)&fValue; }
151 		status_t	MatchEmptyString();
152 
153 		char		*fAttribute;
154 		char		*fString;
155 		union value fValue;
156 		type_code	fType;
157 		size_t		fSize;
158 		bool		fIsPattern;
159 		bool		fIsSpecialTime;
160 
161 		int32		fScore;
162 		bool		fHasIndex;
163 };
164 
165 class Operator : public Term {
166 	public:
167 		Operator(Term *,int8,Term *);
168 		~Operator();
169 
170 		Term		*Left() const { return fLeft; }
171 		Term		*Right() const { return fRight; }
172 
173 		virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
174 			const uint8 *key = NULL, size_t size = 0);
175 		virtual void Complement();
176 
177 		virtual void CalculateScore(Index &index);
178 		virtual int32 Score() const;
179 
180 		virtual status_t InitCheck();
181 
182 		//Term		*Copy() const;
183 #ifdef DEBUG
184 		virtual void PrintToStream();
185 #endif
186 
187 	private:
188 		Operator(const Operator &);
189 		Operator &operator=(const Operator &);
190 			// no implementation
191 
192 		Term		*fLeft,*fRight;
193 };
194 
195 
196 //---------------------------------
197 
198 
199 void
200 skipWhitespace(char **expr, int32 skip = 0)
201 {
202 	char *string = (*expr) + skip;
203 	while (*string == ' ' || *string == '\t') string++;
204 	*expr = string;
205 }
206 
207 
208 void
209 skipWhitespaceReverse(char **expr,char *stop)
210 {
211 	char *string = *expr;
212 	while (string > stop && (*string == ' ' || *string == '\t')) string--;
213 	*expr = string;
214 }
215 
216 
217 //	#pragma mark -
218 
219 
220 uint32
221 utf8ToUnicode(char **string)
222 {
223 	uint8 *bytes = (uint8 *)*string;
224 	int32 length;
225 	uint8 mask = 0x1f;
226 
227 	switch (bytes[0] & 0xf0) {
228 		case 0xc0:
229 		case 0xd0:	length = 2; break;
230 		case 0xe0:	length = 3; break;
231 		case 0xf0:
232 			mask = 0x0f;
233 			length = 4;
234 			break;
235 		default:
236 			// valid 1-byte character
237 			// and invalid characters
238 			(*string)++;
239 			return bytes[0];
240 	}
241 	uint32 c = bytes[0] & mask;
242 	int32 i = 1;
243 	for (;i < length && (bytes[i] & 0x80) > 0;i++)
244 		c = (c << 6) | (bytes[i] & 0x3f);
245 
246 	if (i < length) {
247 		// invalid character
248 		(*string)++;
249 		return (uint32)bytes[0];
250 	}
251 	*string += length;
252 	return c;
253 }
254 
255 
256 int32
257 getFirstPatternSymbol(char *string)
258 {
259 	char c;
260 
261 	for (int32 index = 0;(c = *string++);index++) {
262 		if (c == '*' || c == '?' || c == '[')
263 			return index;
264 	}
265 	return -1;
266 }
267 
268 
269 bool
270 isPattern(char *string)
271 {
272 	return getFirstPatternSymbol(string) >= 0 ? true : false;
273 }
274 
275 
276 status_t
277 isValidPattern(char *pattern)
278 {
279 	while (*pattern) {
280 		switch (*pattern++) {
281 			case '\\':
282 				// the escape character must not be at the end of the pattern
283 				if (!*pattern++)
284 					return PATTERN_INVALID_ESCAPE;
285 				break;
286 
287 			case '[':
288 				if (pattern[0] == ']' || !pattern[0])
289 					return PATTERN_INVALID_SET;
290 
291 				while (*pattern != ']') {
292 					if (*pattern == '\\' && !*++pattern)
293 						return PATTERN_INVALID_ESCAPE;
294 
295 					if (!*pattern)
296 						return PATTERN_INVALID_SET;
297 
298 					if (pattern[0] == '-' && pattern[1] == '-')
299 						return PATTERN_INVALID_RANGE;
300 
301 					pattern++;
302 				}
303 				break;
304 		}
305 	}
306 	return B_OK;
307 }
308 
309 
310 /**	Matches the string against the given wildcard pattern.
311  *	Returns either MATCH_OK, or NO_MATCH when everything went fine,
312  *	or values < 0 (see enum at the top of Query.cpp) if an error
313  *	occurs
314  */
315 
316 status_t
317 matchString(char *pattern,char *string)
318 {
319 	while (*pattern) {
320 		// end of string == valid end of pattern?
321 		if (!string[0]) {
322 			while (pattern[0] == '*')
323 				pattern++;
324 			return !pattern[0] ? MATCH_OK : NO_MATCH;
325 		}
326 
327 		switch (*pattern++) {
328 			case '?':
329 			{
330 				// match exactly one UTF-8 character; we are
331 				// not interested in the result
332 				utf8ToUnicode(&string);
333 				break;
334 			}
335 
336 			case '*':
337 			{
338 				// compact pattern
339 				while (true) {
340 					if (pattern[0] == '?') {
341 						if (!*++string)
342 							return NO_MATCH;
343 					} else if (pattern[0] != '*')
344 						break;
345 
346 					pattern++;
347 				}
348 
349 				// if the pattern is done, we have matched the string
350 				if (!pattern[0])
351 					return MATCH_OK;
352 
353 				while(true) {
354 					// we have removed all occurences of '*' and '?'
355 					if (pattern[0] == string[0]
356 						|| pattern[0] == '['
357 						|| pattern[0] == '\\') {
358 						status_t status = matchString(pattern,string);
359 						if (status < B_OK || status == MATCH_OK)
360 							return status;
361 					}
362 
363 					// we could be nice here and just jump to the next
364 					// UTF-8 character - but we wouldn't gain that much
365 					// and it'd be slower (since we're checking for
366 					// equality before entering the recursion)
367 					if (!*++string)
368 						return NO_MATCH;
369 				}
370 				break;
371 			}
372 
373 			case '[':
374 			{
375 				bool invert = false;
376 				if (pattern[0] == '^' || pattern[0] == '!') {
377 					invert = true;
378 					pattern++;
379 				}
380 
381 				if (!pattern[0] || pattern[0] == ']')
382 					return MATCH_BAD_PATTERN;
383 
384 				uint32 c = utf8ToUnicode(&string);
385 				bool matched = false;
386 
387 				while (pattern[0] != ']') {
388 					if (!pattern[0])
389 						return MATCH_BAD_PATTERN;
390 
391 					if (pattern[0] == '\\')
392 						pattern++;
393 
394 					uint32 first = utf8ToUnicode(&pattern);
395 
396 					// Does this character match, or is this a range?
397 					if (first == c) {
398 						matched = true;
399 						break;
400 					} else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) {
401 						pattern++;
402 
403 						if (pattern[0] == '\\') {
404 							pattern++;
405 							if (!pattern[0])
406 								return MATCH_BAD_PATTERN;
407 						}
408 						uint32 last = utf8ToUnicode(&pattern);
409 
410 						if (c >= first && c <= last) {
411 							matched = true;
412 							break;
413 						}
414 					}
415 				}
416 
417 				if (invert)
418 					matched = !matched;
419 
420 				if (matched) {
421 					while (pattern[0] != ']') {
422 						if (!pattern[0])
423 							return MATCH_BAD_PATTERN;
424 						pattern++;
425 					}
426 					pattern++;
427 					break;
428 				}
429 				return NO_MATCH;
430 			}
431 
432             case '\\':
433 				if (!pattern[0])
434 					return MATCH_BAD_PATTERN;
435 				// supposed to fall through
436 			default:
437 				if (pattern[-1] != string[0])
438 					return NO_MATCH;
439 				string++;
440 				break;
441 		}
442 	}
443 
444 	if (string[0])
445 		return NO_MATCH;
446 
447 	return MATCH_OK;
448 }
449 
450 
451 //	#pragma mark -
452 
453 
454 Equation::Equation(char **expr)
455 	: Term(OP_EQUATION),
456 	fAttribute(NULL),
457 	fString(NULL),
458 	fType(0),
459 	fIsPattern(false)
460 {
461 	char *string = *expr;
462 	char *start = string;
463 	char *end = NULL;
464 
465 	// Since the equation is the integral part of any query, we're just parsing
466 	// the whole thing here.
467 	// The whitespace at the start is already removed in Expression::ParseEquation()
468 
469 	if (*start == '"' || *start == '\'') {
470 		// string is quoted (start has to be on the beginning of a string)
471 		if (ParseQuotedString(&start,&end) < B_OK)
472 			return;
473 
474 		// set string to a valid start of the equation symbol
475 		string = end + 2;
476 		skipWhitespace(&string);
477 		if (*string != '=' && *string != '<' && *string != '>' && *string != '!') {
478 			*expr = string;
479 			return;
480 		}
481 	} else {
482 		// search the (in)equation for the actual equation symbol (and for other operators
483 		// in case the equation is malformed)
484 		while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!'
485 			&& *string != '&' && *string != '|')
486 			string++;
487 
488 		// get the attribute string	(and trim whitespace), in case
489 		// the string was not quoted
490 		end = string - 1;
491 		skipWhitespaceReverse(&end,start);
492 	}
493 
494 	// attribute string is empty (which is not allowed)
495 	if (start > end)
496 		return;
497 
498 	// at this point, "start" points to the beginning of the string, "end" points
499 	// to the last character of the string, and "string" points to the first
500 	// character of the equation symbol
501 
502 	// test for the right symbol (as this doesn't need any memory)
503 	switch (*string) {
504 		case '=':
505 			fOp = OP_EQUAL;
506 			break;
507 		case '>':
508 			fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
509 			break;
510 		case '<':
511 			fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
512 			break;
513 		case '!':
514 			if (*(string + 1) != '=')
515 				return;
516 			fOp = OP_UNEQUAL;
517 			break;
518 
519 		// any invalid characters will be rejected
520 		default:
521 			*expr = string;
522 			return;
523 	}
524 	// lets change "start" to point to the first character after the symbol
525 	if (*(string + 1) == '=')
526 		string++;
527 	string++;
528 	skipWhitespace(&string);
529 
530 	// allocate & copy the attribute string
531 
532 	fAttribute = CopyString(start,end);
533 	if (fAttribute == NULL)
534 		return;
535 
536 	start = string;
537 	if (*start == '"' || *start == '\'') {
538 		// string is quoted (start has to be on the beginning of a string)
539 		if (ParseQuotedString(&start,&end) < B_OK)
540 			return;
541 
542 		string = end + 2;
543 		skipWhitespace(&string);
544 	} else {
545 		while (*string && *string != '&' && *string != '|' && *string != ')')
546 			string++;
547 
548 		end = string - 1;
549 		skipWhitespaceReverse(&end,start);
550 	}
551 
552 	// at this point, "start" will point to the first character of the value,
553 	// "end" will point to its last character, and "start" to the first non-
554 	// whitespace character after the value string
555 
556 	fString = CopyString(start,end);
557 	if (fString == NULL)
558 		return;
559 
560 	// patterns are only allowed for these operations (and strings)
561 	if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
562 		fIsPattern = isPattern(fString);
563 		if (fIsPattern && isValidPattern(fString) < B_OK) {
564 			// we only want to have valid patterns; setting fString
565 			// to NULL will cause InitCheck() to fail
566 			free(fString);
567 			fString = NULL;
568 		}
569 	}
570 
571 	// The special time flag is set if the time values are shifted
572 	// 64-bit values to reduce the number of duplicates.
573 	// We have to be able to compare them against unshifted values
574 	// later. The only index which needs this is the last_modified
575 	// index, but we may want to open that feature for other indices,
576 	// too one day.
577 	fIsSpecialTime = !strcmp(fAttribute,"last_modified");
578 
579 	*expr = string;
580 }
581 
582 
583 Equation::~Equation()
584 {
585 	if (fAttribute != NULL)
586 		free(fAttribute);
587 	if (fString != NULL)
588 		free(fString);
589 }
590 
591 
592 status_t
593 Equation::InitCheck()
594 {
595 	if (fAttribute == NULL
596 		|| fString == NULL
597 		|| fOp == OP_NONE)
598 		return B_BAD_VALUE;
599 
600 	return B_OK;
601 }
602 
603 
604 status_t
605 Equation::ParseQuotedString(char **_start, char **_end)
606 {
607 	char *start = *_start;
608 	char quote = *start++;
609 	char *end = start;
610 
611 	for (;*end && *end != quote;end++) {
612 		if (*end == '\\')
613 			end++;
614 	}
615 	if (*end == '\0')
616 		return B_BAD_VALUE;
617 
618 	*_start = start;
619 	*_end = end - 1;
620 
621 	return B_OK;
622 }
623 
624 
625 char *
626 Equation::CopyString(char *start,char *end)
627 {
628 	// end points to the last character of the string - and the length
629 	// also has to include the null-termination
630 	int32 length = end + 2 - start;
631 	// just to make sure; since that's the max. attribute name length and
632 	// the max. string in an index, it make sense to have it that way
633 	if (length > INODE_FILE_NAME_LENGTH || length <= 0)
634 		return NULL;
635 
636 	char *copy = (char *)malloc(length);
637 	if (copy == NULL)
638 		return NULL;
639 
640 	memcpy(copy,start,length - 1);
641 	copy[length - 1] = '\0';
642 
643 	return copy;
644 }
645 
646 
647 status_t
648 Equation::ConvertValue(type_code type)
649 {
650 	// Has the type already been converted?
651 	if (type == fType)
652 		return B_OK;
653 
654 	fType = type;
655 	char *string = fString;
656 
657 	switch (type) {
658 		// B_MIME_STRING_TYPE is defined in Mime.h which I didn't want to include just for that
659 		case 'MIMS':
660 			type = B_STRING_TYPE;
661 			// supposed to fall through
662 		case B_STRING_TYPE:
663 			strncpy(fValue.String,string,INODE_FILE_NAME_LENGTH);
664 			fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0';
665 			fSize = strlen(fValue.String);
666 			break;
667 		case B_INT32_TYPE:
668 			fValue.Int32 = strtol(string,&string,0);
669 			fSize = sizeof(int32);
670 			break;
671 		case B_UINT32_TYPE:
672 			fValue.Int32 = strtoul(string,&string,0);
673 			fSize = sizeof(uint32);
674 			break;
675 		case B_INT64_TYPE:
676 			fValue.Int64 = strtoll(string,&string,0);
677 			fSize = sizeof(int64);
678 			break;
679 		case B_UINT64_TYPE:
680 			fValue.Uint64 = strtoull(string,&string,0);
681 			fSize = sizeof(uint64);
682 			break;
683 		case B_FLOAT_TYPE:
684 			fValue.Float = strtod(string,&string);
685 			fSize = sizeof(float);
686 			break;
687 		case B_DOUBLE_TYPE:
688 			fValue.Double = strtod(string,&string);
689 			fSize = sizeof(double);
690 			break;
691 		default:
692 			FATAL(("query value conversion to 0x%lx requested!\n",type));
693 			// should we fail here or just do a safety int32 conversion?
694 			return B_ERROR;
695 	}
696 
697 	// patterns are only allowed for string types
698 	if (fType != B_STRING_TYPE && fIsPattern)
699 		fIsPattern = false;
700 
701 	return B_OK;
702 }
703 
704 
705 /**	Returns true when the key matches the equation. You have to
706  *	call ConvertValue() before this one.
707  */
708 
709 bool
710 Equation::CompareTo(const uint8 *value,uint16 size)
711 {
712 	int32 compare;
713 
714 	// fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL
715 	if (fIsPattern) {
716 		// we have already validated the pattern, so we don't check for failing
717 		// here - if something is broken, and matchString() returns an error,
718 		// we just don't match
719 		compare = matchString(fValue.String,(char *)value) == MATCH_OK ? 0 : 1;
720 	} else if (fIsSpecialTime) {
721 		// the index is a shifted int64 index, but we have to match
722 		// against an unshifted value (i.e. the last_modified index)
723 		int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT;
724 		compare = compareKeys(fType,&timeValue,sizeof(int64),&fValue.Int64,sizeof(int64));
725 	} else
726 		compare = compareKeys(fType,value,size,Value(),fSize);
727 
728 	switch (fOp) {
729 		case OP_EQUAL:
730 			return compare == 0;
731 		case OP_UNEQUAL:
732 			return compare != 0;
733 		case OP_LESS_THAN:
734 			return compare < 0;
735 		case OP_LESS_THAN_OR_EQUAL:
736 			return compare <= 0;
737 		case OP_GREATER_THAN:
738 			return compare > 0;
739 		case OP_GREATER_THAN_OR_EQUAL:
740 			return compare >= 0;
741 	}
742 	FATAL(("Unknown/Unsupported operation: %d\n",fOp));
743 	return false;
744 }
745 
746 
747 void
748 Equation::Complement()
749 {
750 	D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
751 		FATAL(("op out of range!"));
752 		return;
753 	});
754 
755 	int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
756 			OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
757 	fOp = complementOp[fOp - OP_EQUAL];
758 }
759 
760 
761 status_t
762 Equation::MatchEmptyString()
763 {
764 	// there is no matching attribute, we will just bail out if we
765 	// already know that our value is not of a string type.
766 	// If not, it will be converted to a string - and then be compared with "".
767 	// That's why we have to call ConvertValue() here - but it will be
768 	// a cheap call for the next time
769 	// Should we do this only for OP_UNEQUAL?
770 	if (fType != 0 && fType != B_STRING_TYPE)
771 		return NO_MATCH;
772 
773 	status_t status = ConvertValue(B_STRING_TYPE);
774 	if (status == B_OK)
775 		status = CompareTo((const uint8 *)"",fSize) ? MATCH_OK : NO_MATCH;
776 
777 	return status;
778 }
779 
780 
781 /**	Matches the inode's attribute value with the equation.
782  *	Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong
783  */
784 
785 status_t
786 Equation::Match(Inode *inode,const char *attributeName,int32 type,const uint8 *key,size_t size)
787 {
788 	// get a pointer to the attribute in question
789 	union value value;
790 	uint8 *buffer;
791 
792 	// first, check if we are matching for a live query and use that value
793 	if (attributeName != NULL && !strcmp(fAttribute,attributeName)) {
794 		if (key == NULL) {
795 			if (type == B_STRING_TYPE)
796 				return MatchEmptyString();
797 
798 			return NO_MATCH;
799 		}
800 		buffer = const_cast<uint8 *>(key);
801 	} else if (!strcmp(fAttribute,"name")) {
802 		// if not, check for "fake" attributes, "name", "size", "last_modified",
803 		buffer = (uint8 *)inode->Name();
804 		if (buffer == NULL)
805 			return B_ERROR;
806 
807 		type = B_STRING_TYPE;
808 		size = strlen((const char *)buffer);
809 	} else if (!strcmp(fAttribute,"size")) {
810 		buffer = (uint8 *)&inode->Node()->data.size;
811 		type = B_INT64_TYPE;
812 	} else if (!strcmp(fAttribute,"last_modified")) {
813 		buffer = (uint8 *)&inode->Node()->last_modified_time;
814 		type = B_INT64_TYPE;
815 	} else {
816 		// then for attributes in the small_data section, and finally for the
817 		// real attributes
818 		Inode *attribute;
819 
820 		inode->SmallDataLock().Lock();
821 		small_data *smallData = inode->FindSmallData(fAttribute);
822 		if (smallData != NULL) {
823 			buffer = smallData->Data();
824 			type = smallData->type;
825 			size = smallData->data_size;
826 			inode->SmallDataLock().Unlock();
827 		} else {
828 			// needed to unlock the small_data section as fast as possible
829 			inode->SmallDataLock().Unlock();
830 
831 			if (inode->GetAttribute(fAttribute,&attribute) == B_OK) {
832 				buffer = (uint8 *)&value;
833 				type = attribute->Node()->type;
834 				size = attribute->Size();
835 
836 				if (size > INODE_FILE_NAME_LENGTH)
837 					size = INODE_FILE_NAME_LENGTH;
838 
839 				if (attribute->ReadAt(0,buffer,&size) < B_OK) {
840 					inode->ReleaseAttribute(attribute);
841 					return B_IO_ERROR;
842 				}
843 				inode->ReleaseAttribute(attribute);
844 			} else
845 				return MatchEmptyString();
846 		}
847 	}
848 	// prepare own value for use, if it is possible to convert it
849 	status_t status = ConvertValue(type);
850 	if (status == B_OK)
851 		status = CompareTo(buffer,size) ? MATCH_OK : NO_MATCH;
852 
853 	RETURN_ERROR(status);
854 }
855 
856 
857 void
858 Equation::CalculateScore(Index &index)
859 {
860 	// As always, these values could be tuned and refined.
861 	// And the code could also need some real world testing :-)
862 
863 	// do we have to operate on a "foreign" index?
864 	if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
865 		fScore = 0;
866 		return;
867 	}
868 
869 	// if we have a pattern, how much does it help our search?
870 	if (fIsPattern)
871 		fScore = getFirstPatternSymbol(fString) << 3;
872 	else {
873 		// Score by operator
874 		if (fOp == OP_EQUAL)
875 			// higher than pattern="255 chars+*"
876 			fScore = 2048;
877 		else
878 			// the pattern search is regarded cheaper when you have at
879 			// least one character to set your index to
880 			fScore = 5;
881 	}
882 
883 	// take index size into account (1024 is the current node size
884 	// in our B+trees)
885 	// 2048 * 2048 == 4194304 is the maximum score (for an empty
886 	// tree, since the header + 1 node are already 2048 bytes)
887 	fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
888 }
889 
890 
891 status_t
892 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator)
893 {
894 	type_code type;
895 	status_t status = index.SetTo(fAttribute);
896 
897 	// special case for OP_UNEQUAL - it will always operate through the whole index
898 	// but we need the call to the original index to get the correct type
899 	if (status < B_OK || fOp == OP_UNEQUAL) {
900 		// Try to get an index that holds all files (name)
901 		// Also sets the default type for all attributes without index
902 		// to string.
903 		type = status < B_OK ? B_STRING_TYPE : index.Type();
904 
905 		if (index.SetTo("name") < B_OK)
906 			return B_ENTRY_NOT_FOUND;
907 
908 		fHasIndex = false;
909 	} else {
910 		fHasIndex = true;
911 		type = index.Type();
912 	}
913 
914 	if (ConvertValue(type) < B_OK)
915 		return B_BAD_VALUE;
916 
917 	BPlusTree *tree;
918 	if (index.Node()->GetTree(&tree) < B_OK)
919 		return B_ERROR;
920 
921 	*iterator = new TreeIterator(tree);
922 	if (*iterator == NULL)
923 		return B_NO_MEMORY;
924 
925 	if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL
926 		|| fIsPattern)
927 		&& fHasIndex) {
928 		// set iterator to the exact position
929 
930 		int32 keySize = index.KeySize();
931 
932 		// at this point, fIsPattern is only true if it's a string type, and fOp
933 		// is either OP_EQUAL or OP_UNEQUAL
934 		if (fIsPattern) {
935 			// let's see if we can use the beginning of the key for positioning
936 			// the iterator and adjust the key size; if not, just leave the
937 			// iterator at the start and return success
938 			keySize = getFirstPatternSymbol(fString);
939 			if (keySize <= 0)
940 				return B_OK;
941 		}
942 
943 		if (keySize == 0) {
944 			if (fType == B_STRING_TYPE)
945 				keySize = strlen(fValue.String);
946 			else
947 				RETURN_ERROR(B_ENTRY_NOT_FOUND);
948 		}
949 
950 		if (fIsSpecialTime) {
951 			// we have to find the first matching shifted value
952 			off_t value = fValue.Int64 << INODE_TIME_SHIFT;
953 			status = (*iterator)->Find((uint8 *)&value,keySize);
954 			if (status == B_ENTRY_NOT_FOUND)
955 				return B_OK;
956 		} else {
957 			status = (*iterator)->Find(Value(),keySize);
958 			if (fOp == OP_EQUAL && !fIsPattern)
959 				return status;
960 			else if (status == B_ENTRY_NOT_FOUND && (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL))
961 				return B_OK;
962 		}
963 
964 		RETURN_ERROR(status);
965 	}
966 
967 	return B_OK;
968 }
969 
970 
971 status_t
972 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator,
973 		struct dirent *dirent, size_t bufferSize)
974 {
975 	while (true) {
976 		union value indexValue;
977 		uint16 keyLength;
978 		uint16 duplicate;
979 		off_t offset;
980 
981 		status_t status = iterator->GetNextEntry(&indexValue,&keyLength,(uint16)sizeof(indexValue),&offset,&duplicate);
982 		if (status < B_OK)
983 			return status;
984 
985 		// only compare against the index entry when this is the correct
986 		// index for the equation
987 		if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue,keyLength)) {
988 			// They aren't equal? let the operation decide what to do
989 			// Since we always start at the beginning of the index (or the correct
990 			// position), only some needs to be stopped if the entry doesn't fit.
991 			if (fOp == OP_LESS_THAN
992 				|| fOp == OP_LESS_THAN_OR_EQUAL
993 				|| (fOp == OP_EQUAL && !fIsPattern))
994 				return B_ENTRY_NOT_FOUND;
995 
996 			if (duplicate > 0)
997 				iterator->SkipDuplicates();
998 			continue;
999 		}
1000 
1001 		Inode *inode;
1002 		if ((status = get_vnode(volume->ID(),offset,(void **)&inode)) != B_OK) {
1003 			REPORT_ERROR(status);
1004 			FATAL(("could not get inode %Ld in index \"%s\"!\n",offset,fAttribute));
1005 			// try with next
1006 			continue;
1007 		}
1008 
1009 		// check user permissions here - but which one?!
1010 		// we could filter out all those where we don't have
1011 		// read access... (we should check for every parent
1012 		// directory if the X_OK is allowed)
1013 		// Although it's quite expensive to open all parents,
1014 		// it's likely that the application that runs the
1015 		// query will do something similar (and we don't have
1016 		// to do it for root, either).
1017 
1018 		// go up in the tree until a &&-operator is found, and check if the
1019 		// inode matches with the rest of the expression - we don't have to
1020 		// check ||-operators for that
1021 		Term *term = this;
1022 		status = MATCH_OK;
1023 
1024 		if (!fHasIndex)
1025 			status = Match(inode);
1026 
1027 		while (term != NULL && status == MATCH_OK) {
1028 			Operator *parent = (Operator *)term->Parent();
1029 			if (parent == NULL)
1030 				break;
1031 
1032 			if (parent->Op() == OP_AND) {
1033 				// choose the other child of the parent
1034 				Term *other = parent->Right();
1035 				if (other == term)
1036 					other = parent->Left();
1037 
1038 				if (other == NULL) {
1039 					FATAL(("&&-operator has only one child... (parent = %p)\n",parent));
1040 					break;
1041 				}
1042 				status = other->Match(inode);
1043 				if (status < 0) {
1044 					REPORT_ERROR(status);
1045 					status = NO_MATCH;
1046 				}
1047 			}
1048 			term = (Term *)parent;
1049 		}
1050 
1051 		if (status == MATCH_OK) {
1052 			dirent->d_dev = volume->ID();
1053 			dirent->d_ino = offset;
1054 			dirent->d_pdev = volume->ID();
1055 			dirent->d_pino = volume->ToVnode(inode->Parent());
1056 			strcpy(dirent->d_name,inode->Name());
1057 
1058 #ifdef KEEP_WRONG_DIRENT_RECLEN
1059 			// ToDo: The available file systems in BeOS apparently don't set the
1060 			// correct d_reclen - we are copying that behaviour if requested, but
1061 			// if it doesn't break compatibility, we will remove it.
1062 			dirent->d_reclen = strlen(dirent->d_name);
1063 #else
1064 			dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name);
1065 #endif
1066 		}
1067 
1068 		put_vnode(volume->ID(), inode->ID());
1069 
1070 		if (status == MATCH_OK)
1071 			return B_OK;
1072 	}
1073 	RETURN_ERROR(B_ERROR);
1074 }
1075 
1076 
1077 //	#pragma mark -
1078 
1079 
1080 Operator::Operator(Term *left, int8 op, Term *right)
1081 	: Term(op),
1082 	fLeft(left),
1083 	fRight(right)
1084 {
1085 	if (left)
1086 		left->SetParent(this);
1087 	if (right)
1088 		right->SetParent(this);
1089 }
1090 
1091 
1092 Operator::~Operator()
1093 {
1094 	delete fLeft;
1095 	delete fRight;
1096 }
1097 
1098 
1099 status_t
1100 Operator::Match(Inode *inode,const char *attribute,int32 type,const uint8 *key,size_t size)
1101 {
1102 	if (fOp == OP_AND) {
1103 		status_t status = fLeft->Match(inode,attribute,type,key,size);
1104 		if (status != MATCH_OK)
1105 			return status;
1106 
1107 		return fRight->Match(inode,attribute,type,key,size);
1108 	} else {
1109 		// choose the term with the better score for OP_OR
1110 		if (fRight->Score() > fLeft->Score()) {
1111 			status_t status = fRight->Match(inode,attribute,type,key,size);
1112 			if (status != NO_MATCH)
1113 				return status;
1114 		}
1115 		return fLeft->Match(inode,attribute,type,key,size);
1116 	}
1117 }
1118 
1119 
1120 void
1121 Operator::Complement()
1122 {
1123 	if (fOp == OP_AND)
1124 		fOp = OP_OR;
1125 	else
1126 		fOp = OP_AND;
1127 
1128 	fLeft->Complement();
1129 	fRight->Complement();
1130 }
1131 
1132 
1133 void
1134 Operator::CalculateScore(Index &index)
1135 {
1136 	fLeft->CalculateScore(index);
1137 	fRight->CalculateScore(index);
1138 }
1139 
1140 
1141 int32
1142 Operator::Score() const
1143 {
1144 	if (fOp == OP_AND) {
1145 		// return the one with the better score
1146 		if (fRight->Score() > fLeft->Score())
1147 			return fRight->Score();
1148 
1149 		return fLeft->Score();
1150 	}
1151 
1152 	// for OP_OR, be honest, and return the one with the worse score
1153 	if (fRight->Score() < fLeft->Score())
1154 		return fRight->Score();
1155 
1156 	return fLeft->Score();
1157 }
1158 
1159 
1160 status_t
1161 Operator::InitCheck()
1162 {
1163 	if (fOp != OP_AND && fOp != OP_OR
1164 		|| fLeft == NULL || fLeft->InitCheck() < B_OK
1165 		|| fRight == NULL || fRight->InitCheck() < B_OK)
1166 		return B_ERROR;
1167 
1168 	return B_OK;
1169 }
1170 
1171 
1172 #if 0
1173 Term *
1174 Operator::Copy() const
1175 {
1176 	if (fEquation != NULL) {
1177 		Equation *equation = new Equation(*fEquation);
1178 		if (equation == NULL)
1179 			return NULL;
1180 
1181 		Term *term = new Term(equation);
1182 		if (term == NULL)
1183 			delete equation;
1184 
1185 		return term;
1186 	}
1187 
1188 	Term *left = NULL, *right = NULL;
1189 
1190 	if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1191 		return NULL;
1192 	if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1193 		delete left;
1194 		return NULL;
1195 	}
1196 
1197 	Term *term = new Term(left,fOp,right);
1198 	if (term == NULL) {
1199 		delete left;
1200 		delete right;
1201 		return NULL;
1202 	}
1203 	return term;
1204 }
1205 #endif
1206 
1207 
1208 //	#pragma mark -
1209 
1210 #ifdef DEBUG
1211 void
1212 Operator::PrintToStream()
1213 {
1214 	D(__out("( "));
1215 	if (fLeft != NULL)
1216 		fLeft->PrintToStream();
1217 
1218 	char *op;
1219 	switch (fOp) {
1220 		case OP_OR: op = "OR"; break;
1221 		case OP_AND: op = "AND"; break;
1222 		default: op = "?"; break;
1223 	}
1224 	D(__out(" %s ",op));
1225 
1226 	if (fRight != NULL)
1227 		fRight->PrintToStream();
1228 
1229 	D(__out(" )"));
1230 }
1231 
1232 
1233 void
1234 Equation::PrintToStream()
1235 {
1236 	char *symbol = "???";
1237 	switch (fOp) {
1238 		case OP_EQUAL: symbol = "=="; break;
1239 		case OP_UNEQUAL: symbol = "!="; break;
1240 		case OP_GREATER_THAN: symbol = ">"; break;
1241 		case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
1242 		case OP_LESS_THAN: symbol = "<"; break;
1243 		case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
1244 	}
1245 	D(__out("[\"%s\" %s \"%s\"]",fAttribute,symbol,fString));
1246 }
1247 
1248 #endif	/* DEBUG */
1249 
1250 //	#pragma mark -
1251 
1252 
1253 Expression::Expression(char *expr)
1254 {
1255 	if (expr == NULL)
1256 		return;
1257 
1258 	fTerm = ParseOr(&expr);
1259 	if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1260 		FATAL(("Corrupt tree in expression!\n"));
1261 		delete fTerm;
1262 		fTerm = NULL;
1263 	}
1264 	D(if (fTerm != NULL) {
1265 		fTerm->PrintToStream();
1266 		D(__out("\n"));
1267 		if (*expr != '\0')
1268 			PRINT(("Unexpected end of string: \"%s\"!\n",expr));
1269 	});
1270 	fPosition = expr;
1271 }
1272 
1273 
1274 Expression::~Expression()
1275 {
1276 	delete fTerm;
1277 }
1278 
1279 
1280 Term *
1281 Expression::ParseEquation(char **expr)
1282 {
1283 	skipWhitespace(expr);
1284 
1285 	bool not = false;
1286 	if (**expr == '!') {
1287 		skipWhitespace(expr, 1);
1288 		if (**expr != '(')
1289 			return NULL;
1290 
1291 		not = true;
1292 	}
1293 
1294 	if (**expr == ')') {
1295 		// shouldn't be handled here
1296 		return NULL;
1297 	} else if (**expr == '(') {
1298 		skipWhitespace(expr, 1);
1299 
1300 		Term *term = ParseOr(expr);
1301 
1302 		skipWhitespace(expr);
1303 
1304 		if (**expr != ')') {
1305 			delete term;
1306 			return NULL;
1307 		}
1308 
1309 		// If the term is negated, we just complement the tree, to get
1310 		// rid of the not, a.k.a. DeMorgan's Law.
1311 		if (not)
1312 			term->Complement();
1313 
1314 		skipWhitespace(expr, 1);
1315 
1316 		return term;
1317 	}
1318 
1319 	Equation *equation = new Equation(expr);
1320 	if (equation == NULL || equation->InitCheck() < B_OK) {
1321 		delete equation;
1322 		return NULL;
1323 	}
1324 	return equation;
1325 }
1326 
1327 
1328 Term *
1329 Expression::ParseAnd(char **expr)
1330 {
1331 	Term *left = ParseEquation(expr);
1332 	if (left == NULL)
1333 		return NULL;
1334 
1335 	while (IsOperator(expr,'&')) {
1336 		Term *right = ParseAnd(expr);
1337 		Term *newParent = NULL;
1338 
1339 		if (right == NULL || (newParent = new Operator(left,OP_AND,right)) == NULL) {
1340 			delete left;
1341 			delete right;
1342 
1343 			return NULL;
1344 		}
1345 		left = newParent;
1346 	}
1347 
1348 	return left;
1349 }
1350 
1351 
1352 Term *
1353 Expression::ParseOr(char **expr)
1354 {
1355 	Term *left = ParseAnd(expr);
1356 	if (left == NULL)
1357 		return NULL;
1358 
1359 	while (IsOperator(expr,'|')) {
1360 		Term *right = ParseAnd(expr);
1361 		Term *newParent = NULL;
1362 
1363 		if (right == NULL || (newParent = new Operator(left,OP_OR,right)) == NULL) {
1364 			delete left;
1365 			delete right;
1366 
1367 			return NULL;
1368 		}
1369 		left = newParent;
1370 	}
1371 
1372 	return left;
1373 }
1374 
1375 
1376 bool
1377 Expression::IsOperator(char **expr, char op)
1378 {
1379 	char *string = *expr;
1380 
1381 	if (*string == op && *(string + 1) == op) {
1382 		*expr += 2;
1383 		return true;
1384 	}
1385 	return false;
1386 }
1387 
1388 
1389 status_t
1390 Expression::InitCheck()
1391 {
1392 	if (fTerm == NULL)
1393 		return B_BAD_VALUE;
1394 
1395 	return B_OK;
1396 }
1397 
1398 
1399 //	#pragma mark -
1400 
1401 
1402 Query::Query(Volume *volume,Expression *expression)
1403 	:
1404 	fVolume(volume),
1405 	fExpression(expression),
1406 	fCurrent(NULL),
1407 	fIterator(NULL),
1408 	fIndex(volume),
1409 	fPort(-1)
1410 {
1411 	// if the expression has a valid root pointer, the whole tree has
1412 	// already passed the sanity check, so that we don't have to check
1413 	// every pointer
1414 	if (volume == NULL || expression == NULL || expression->Root() == NULL)
1415 		return;
1416 
1417 	// create index on the stack and delete it afterwards
1418 	fExpression->Root()->CalculateScore(fIndex);
1419 	fIndex.Unset();
1420 
1421 	Stack<Term *> stack;
1422 	stack.Push(fExpression->Root());
1423 
1424 	Term *term;
1425 	while (stack.Pop(&term)) {
1426 		if (term->Op() < OP_EQUATION) {
1427 			Operator *op = (Operator *)term;
1428 
1429 			if (op->Op() == OP_OR) {
1430 				stack.Push(op->Left());
1431 				stack.Push(op->Right());
1432 			} else {
1433 				// For OP_AND, we can use the scoring system to decide which path to add
1434 				if (op->Right()->Score() > op->Left()->Score())
1435 					stack.Push(op->Right());
1436 				else
1437 					stack.Push(op->Left());
1438 			}
1439 		} else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK)
1440 			FATAL(("Unknown term on stack or stack error"));
1441 	}
1442 
1443 	volume->AddQuery(this);
1444 }
1445 
1446 
1447 Query::~Query()
1448 {
1449 	fVolume->RemoveQuery(this);
1450 }
1451 
1452 
1453 status_t
1454 Query::GetNextEntry(struct dirent *dirent, size_t size)
1455 {
1456 	// If we don't have an equation to use yet/anymore, get a new one
1457 	// from the stack
1458 	while (true) {
1459 		if (fIterator == NULL) {
1460 			if (!fStack.Pop(&fCurrent)
1461 				|| fCurrent == NULL
1462 				|| fCurrent->PrepareQuery(fVolume,fIndex,&fIterator) < B_OK)
1463 				return B_ENTRY_NOT_FOUND;
1464 		}
1465 		if (fCurrent == NULL)
1466 			RETURN_ERROR(B_ERROR);
1467 
1468 		status_t status = fCurrent->GetNextMatching(fVolume,fIterator,dirent,size);
1469 		if (status < B_OK) {
1470 			delete fIterator;
1471 			fIterator = NULL;
1472 			fCurrent = NULL;
1473 		} else {
1474 			// only return if we have another entry
1475 			return B_OK;
1476 		}
1477 	}
1478 }
1479 
1480 
1481 void
1482 Query::SetLiveMode(port_id port,int32 token)
1483 {
1484 	fPort = port;
1485 	fToken = token;
1486 }
1487 
1488 
1489 void
1490 Query::LiveUpdate(Inode *inode,const char *attribute,int32 type,const uint8 *oldKey,size_t oldLength,const uint8 *newKey,size_t newLength)
1491 {
1492 	if (fPort < 0 || fExpression == NULL || attribute == NULL)
1493 		return;
1494 
1495 	// ToDo: check if the attribute is part of the query at all...
1496 
1497 	status_t oldStatus = fExpression->Root()->Match(inode,attribute,type,oldKey,oldLength);
1498 	status_t newStatus = fExpression->Root()->Match(inode,attribute,type,newKey,newLength);
1499 	int32 op;
1500 	if (oldStatus == MATCH_OK && newStatus == MATCH_OK) {
1501 		// only send out a notification if the name was changed
1502 		if (oldKey == NULL || strcmp(attribute,"name"))
1503 			return;
1504 
1505 		send_notification(fPort,fToken,B_QUERY_UPDATE,B_ENTRY_REMOVED,fVolume->ID(),0,fVolume->ToVnode(inode->Parent()),0,inode->ID(),(const char *)oldKey);
1506 		op = B_ENTRY_CREATED;
1507 	} else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) {
1508 		// nothing has changed
1509 		return;
1510 	} else if (oldStatus == MATCH_OK && newStatus != MATCH_OK)
1511 		op = B_ENTRY_REMOVED;
1512 	else
1513 		op = B_ENTRY_CREATED;
1514 
1515 	// if "value" is NULL, send_notification() crashes...
1516 	const char *value = (const char *)newKey;
1517 	if (type != B_STRING_TYPE || value == NULL)
1518 		value = "";
1519 
1520 	send_notification(fPort,fToken,B_QUERY_UPDATE,op,fVolume->ID(),0,fVolume->ToVnode(inode->Parent()),0,inode->ID(),value);
1521 }
1522 
1523