xref: /haiku/src/add-ons/kernel/file_systems/bfs/Query.cpp (revision 7120e97489acbf17d86d3f33e3b2e68974fd4b23)
1 /* Query - query parsing and evaluation
2 **
3 ** Initial version by Axel Dörfler, axeld@pinc-software.de
4 ** The pattern matching is roughly based on code originally written
5 ** by J. Kercheval, and on code written by Kenneth Almquist, though
6 ** it shares no code.
7 **
8 ** This file may be used under the terms of the OpenBeOS License.
9 */
10 
11 
12 #include "Query.h"
13 #include "cpp.h"
14 #include "bfs.h"
15 #include "Debug.h"
16 #include "Stack.h"
17 #include "Volume.h"
18 #include "Inode.h"
19 #include "BPlusTree.h"
20 #include "Index.h"
21 
22 #include <SupportDefs.h>
23 #include <TypeConstants.h>
24 #include <AppDefs.h>
25 
26 #include <malloc.h>
27 #include <stdio.h>
28 #include <string.h>
29 
30 
31 // The parser has a very static design, but it will do what is required.
32 //
33 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
34 // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
35 // Apparently, the "!" (not) can only be used with brackets.
36 //
37 // If you think that there are too few NULL pointer checks in some places
38 // of the code, just read the beginning of the query constructor.
39 // The API is not fully available, just the Query and the Expression class
40 // are.
41 
42 
43 enum ops {
44 	OP_NONE,
45 
46 	OP_AND,
47 	OP_OR,
48 
49 	OP_EQUATION,
50 
51 	OP_EQUAL,
52 	OP_UNEQUAL,
53 	OP_GREATER_THAN,
54 	OP_LESS_THAN,
55 	OP_GREATER_THAN_OR_EQUAL,
56 	OP_LESS_THAN_OR_EQUAL,
57 };
58 
59 enum match {
60 	NO_MATCH = 0,
61 	MATCH_OK = 1,
62 
63 	MATCH_BAD_PATTERN = -2,
64 	MATCH_INVALID_CHARACTER
65 };
66 
67 // return values from isValidPattern()
68 enum {
69 	PATTERN_INVALID_ESCAPE = -3,
70 	PATTERN_INVALID_RANGE,
71 	PATTERN_INVALID_SET
72 };
73 
74 union value {
75 	int64	Int64;
76 	uint64	Uint64;
77 	int32	Int32;
78 	uint32	Uint32;
79 	float	Float;
80 	double	Double;
81 	char	String[INODE_FILE_NAME_LENGTH];
82 };
83 
84 // B_MIME_STRING_TYPE is defined in storage/Mime.h, but we
85 // don't need the whole file here; the type can't change anyway
86 #ifndef _MIME_H
87 #	define B_MIME_STRING_TYPE 'MIMS'
88 #endif
89 
90 class Term {
91 	public:
92 		Term(int8 op) : fOp(op), fParent(NULL) {}
93 
94 		int8		Op() const { return fOp; }
95 
96 		void		SetParent(Term *parent) { fParent = parent; }
97 		Term		*Parent() const { return fParent; }
98 
99 		virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,
100 				const uint8 *key = NULL,size_t size = 0) = 0;
101 		virtual void Complement() = 0;
102 
103 		virtual void CalculateScore(Index &index) = 0;
104 		virtual int32 Score() const = 0;
105 
106 		virtual status_t InitCheck() = 0;
107 
108 #ifdef DEBUG
109 		virtual void	PrintToStream() = 0;
110 #endif
111 
112 	protected:
113 		int8	fOp;
114 		Term	*fParent;
115 };
116 
117 // Although an Equation object is quite independent from the volume on which
118 // the query is run, there are some dependencies that are produced while
119 // querying:
120 // The type/size of the value, the score, and if it has an index or not.
121 // So you could run more than one query on the same volume, but it might return
122 // wrong values when it runs concurrently on another volume.
123 // That's not an issue right now, because we run single-threaded and don't use
124 // queries more than once.
125 
126 class Equation : public Term {
127 	public:
128 		Equation(char **expr);
129 		~Equation();
130 
131 		virtual status_t InitCheck();
132 
133 		status_t	ParseQuotedString(char **_start,char **_end);
134 		char		*CopyString(char *start, char *end);
135 
136 		virtual status_t Match(Inode *inode,const char *attribute = NULL,int32 type = 0,const uint8 *key = NULL,size_t size = 0);
137 		virtual void Complement();
138 
139 		status_t	PrepareQuery(Volume *volume, Index &index, TreeIterator **iterator);
140 		status_t	GetNextMatching(Volume *volume,TreeIterator *iterator,struct dirent *dirent,size_t bufferSize);
141 
142 		virtual void CalculateScore(Index &index);
143 		virtual int32 Score() const { return fScore; }
144 
145 #ifdef DEBUG
146 		virtual void PrintToStream();
147 #endif
148 
149 	private:
150 		Equation(const Equation &);
151 		Equation &operator=(const Equation &);
152 			// no implementation
153 
154 		status_t	ConvertValue(type_code type);
155 		bool		CompareTo(const uint8 *value, uint16 size);
156 		uint8		*Value() const { return (uint8 *)&fValue; }
157 		status_t	MatchEmptyString();
158 
159 		char		*fAttribute;
160 		char		*fString;
161 		union value fValue;
162 		type_code	fType;
163 		size_t		fSize;
164 		bool		fIsPattern;
165 		bool		fIsSpecialTime;
166 
167 		int32		fScore;
168 		bool		fHasIndex;
169 };
170 
171 class Operator : public Term {
172 	public:
173 		Operator(Term *,int8,Term *);
174 		~Operator();
175 
176 		Term		*Left() const { return fLeft; }
177 		Term		*Right() const { return fRight; }
178 
179 		virtual status_t Match(Inode *inode, const char *attribute = NULL, int32 type = 0,
180 			const uint8 *key = NULL, size_t size = 0);
181 		virtual void Complement();
182 
183 		virtual void CalculateScore(Index &index);
184 		virtual int32 Score() const;
185 
186 		virtual status_t InitCheck();
187 
188 		//Term		*Copy() const;
189 #ifdef DEBUG
190 		virtual void PrintToStream();
191 #endif
192 
193 	private:
194 		Operator(const Operator &);
195 		Operator &operator=(const Operator &);
196 			// no implementation
197 
198 		Term		*fLeft,*fRight;
199 };
200 
201 
202 //---------------------------------
203 
204 
205 void
206 skipWhitespace(char **expr, int32 skip = 0)
207 {
208 	char *string = (*expr) + skip;
209 	while (*string == ' ' || *string == '\t') string++;
210 	*expr = string;
211 }
212 
213 
214 void
215 skipWhitespaceReverse(char **expr,char *stop)
216 {
217 	char *string = *expr;
218 	while (string > stop && (*string == ' ' || *string == '\t')) string--;
219 	*expr = string;
220 }
221 
222 
223 //	#pragma mark -
224 
225 
226 uint32
227 utf8ToUnicode(char **string)
228 {
229 	uint8 *bytes = (uint8 *)*string;
230 	int32 length;
231 	uint8 mask = 0x1f;
232 
233 	switch (bytes[0] & 0xf0) {
234 		case 0xc0:
235 		case 0xd0:	length = 2; break;
236 		case 0xe0:	length = 3; break;
237 		case 0xf0:
238 			mask = 0x0f;
239 			length = 4;
240 			break;
241 		default:
242 			// valid 1-byte character
243 			// and invalid characters
244 			(*string)++;
245 			return bytes[0];
246 	}
247 	uint32 c = bytes[0] & mask;
248 	int32 i = 1;
249 	for (;i < length && (bytes[i] & 0x80) > 0;i++)
250 		c = (c << 6) | (bytes[i] & 0x3f);
251 
252 	if (i < length) {
253 		// invalid character
254 		(*string)++;
255 		return (uint32)bytes[0];
256 	}
257 	*string += length;
258 	return c;
259 }
260 
261 
262 int32
263 getFirstPatternSymbol(char *string)
264 {
265 	char c;
266 
267 	for (int32 index = 0;(c = *string++);index++) {
268 		if (c == '*' || c == '?' || c == '[')
269 			return index;
270 	}
271 	return -1;
272 }
273 
274 
275 bool
276 isPattern(char *string)
277 {
278 	return getFirstPatternSymbol(string) >= 0 ? true : false;
279 }
280 
281 
282 status_t
283 isValidPattern(char *pattern)
284 {
285 	while (*pattern) {
286 		switch (*pattern++) {
287 			case '\\':
288 				// the escape character must not be at the end of the pattern
289 				if (!*pattern++)
290 					return PATTERN_INVALID_ESCAPE;
291 				break;
292 
293 			case '[':
294 				if (pattern[0] == ']' || !pattern[0])
295 					return PATTERN_INVALID_SET;
296 
297 				while (*pattern != ']') {
298 					if (*pattern == '\\' && !*++pattern)
299 						return PATTERN_INVALID_ESCAPE;
300 
301 					if (!*pattern)
302 						return PATTERN_INVALID_SET;
303 
304 					if (pattern[0] == '-' && pattern[1] == '-')
305 						return PATTERN_INVALID_RANGE;
306 
307 					pattern++;
308 				}
309 				break;
310 		}
311 	}
312 	return B_OK;
313 }
314 
315 
316 /**	Matches the string against the given wildcard pattern.
317  *	Returns either MATCH_OK, or NO_MATCH when everything went fine,
318  *	or values < 0 (see enum at the top of Query.cpp) if an error
319  *	occurs
320  */
321 
322 status_t
323 matchString(char *pattern, char *string)
324 {
325 	while (*pattern) {
326 		// end of string == valid end of pattern?
327 		if (!string[0]) {
328 			while (pattern[0] == '*')
329 				pattern++;
330 			return !pattern[0] ? MATCH_OK : NO_MATCH;
331 		}
332 
333 		switch (*pattern++) {
334 			case '?':
335 			{
336 				// match exactly one UTF-8 character; we are
337 				// not interested in the result
338 				utf8ToUnicode(&string);
339 				break;
340 			}
341 
342 			case '*':
343 			{
344 				// compact pattern
345 				while (true) {
346 					if (pattern[0] == '?') {
347 						if (!*++string)
348 							return NO_MATCH;
349 					} else if (pattern[0] != '*')
350 						break;
351 
352 					pattern++;
353 				}
354 
355 				// if the pattern is done, we have matched the string
356 				if (!pattern[0])
357 					return MATCH_OK;
358 
359 				while(true) {
360 					// we have removed all occurences of '*' and '?'
361 					if (pattern[0] == string[0]
362 						|| pattern[0] == '['
363 						|| pattern[0] == '\\') {
364 						status_t status = matchString(pattern,string);
365 						if (status < B_OK || status == MATCH_OK)
366 							return status;
367 					}
368 
369 					// we could be nice here and just jump to the next
370 					// UTF-8 character - but we wouldn't gain that much
371 					// and it'd be slower (since we're checking for
372 					// equality before entering the recursion)
373 					if (!*++string)
374 						return NO_MATCH;
375 				}
376 				break;
377 			}
378 
379 			case '[':
380 			{
381 				bool invert = false;
382 				if (pattern[0] == '^' || pattern[0] == '!') {
383 					invert = true;
384 					pattern++;
385 				}
386 
387 				if (!pattern[0] || pattern[0] == ']')
388 					return MATCH_BAD_PATTERN;
389 
390 				uint32 c = utf8ToUnicode(&string);
391 				bool matched = false;
392 
393 				while (pattern[0] != ']') {
394 					if (!pattern[0])
395 						return MATCH_BAD_PATTERN;
396 
397 					if (pattern[0] == '\\')
398 						pattern++;
399 
400 					uint32 first = utf8ToUnicode(&pattern);
401 
402 					// Does this character match, or is this a range?
403 					if (first == c) {
404 						matched = true;
405 						break;
406 					} else if (pattern[0] == '-' && pattern[1] != ']' && pattern[1]) {
407 						pattern++;
408 
409 						if (pattern[0] == '\\') {
410 							pattern++;
411 							if (!pattern[0])
412 								return MATCH_BAD_PATTERN;
413 						}
414 						uint32 last = utf8ToUnicode(&pattern);
415 
416 						if (c >= first && c <= last) {
417 							matched = true;
418 							break;
419 						}
420 					}
421 				}
422 
423 				if (invert)
424 					matched = !matched;
425 
426 				if (matched) {
427 					while (pattern[0] != ']') {
428 						if (!pattern[0])
429 							return MATCH_BAD_PATTERN;
430 						pattern++;
431 					}
432 					pattern++;
433 					break;
434 				}
435 				return NO_MATCH;
436 			}
437 
438             case '\\':
439 				if (!pattern[0])
440 					return MATCH_BAD_PATTERN;
441 				// supposed to fall through
442 			default:
443 				if (pattern[-1] != string[0])
444 					return NO_MATCH;
445 				string++;
446 				break;
447 		}
448 	}
449 
450 	if (string[0])
451 		return NO_MATCH;
452 
453 	return MATCH_OK;
454 }
455 
456 
457 //	#pragma mark -
458 
459 
460 Equation::Equation(char **expr)
461 	: Term(OP_EQUATION),
462 	fAttribute(NULL),
463 	fString(NULL),
464 	fType(0),
465 	fIsPattern(false)
466 {
467 	char *string = *expr;
468 	char *start = string;
469 	char *end = NULL;
470 
471 	// Since the equation is the integral part of any query, we're just parsing
472 	// the whole thing here.
473 	// The whitespace at the start is already removed in Expression::ParseEquation()
474 
475 	if (*start == '"' || *start == '\'') {
476 		// string is quoted (start has to be on the beginning of a string)
477 		if (ParseQuotedString(&start, &end) < B_OK)
478 			return;
479 
480 		// set string to a valid start of the equation symbol
481 		string = end + 2;
482 		skipWhitespace(&string);
483 		if (*string != '=' && *string != '<' && *string != '>' && *string != '!') {
484 			*expr = string;
485 			return;
486 		}
487 	} else {
488 		// search the (in)equation for the actual equation symbol (and for other operators
489 		// in case the equation is malformed)
490 		while (*string && *string != '=' && *string != '<' && *string != '>' && *string != '!'
491 			&& *string != '&' && *string != '|')
492 			string++;
493 
494 		// get the attribute string	(and trim whitespace), in case
495 		// the string was not quoted
496 		end = string - 1;
497 		skipWhitespaceReverse(&end, start);
498 	}
499 
500 	// attribute string is empty (which is not allowed)
501 	if (start > end)
502 		return;
503 
504 	// at this point, "start" points to the beginning of the string, "end" points
505 	// to the last character of the string, and "string" points to the first
506 	// character of the equation symbol
507 
508 	// test for the right symbol (as this doesn't need any memory)
509 	switch (*string) {
510 		case '=':
511 			fOp = OP_EQUAL;
512 			break;
513 		case '>':
514 			fOp = *(string + 1) == '=' ? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
515 			break;
516 		case '<':
517 			fOp = *(string + 1) == '=' ? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
518 			break;
519 		case '!':
520 			if (*(string + 1) != '=')
521 				return;
522 			fOp = OP_UNEQUAL;
523 			break;
524 
525 		// any invalid characters will be rejected
526 		default:
527 			*expr = string;
528 			return;
529 	}
530 	// lets change "start" to point to the first character after the symbol
531 	if (*(string + 1) == '=')
532 		string++;
533 	string++;
534 	skipWhitespace(&string);
535 
536 	// allocate & copy the attribute string
537 
538 	fAttribute = CopyString(start, end);
539 	if (fAttribute == NULL)
540 		return;
541 
542 	start = string;
543 	if (*start == '"' || *start == '\'') {
544 		// string is quoted (start has to be on the beginning of a string)
545 		if (ParseQuotedString(&start, &end) < B_OK)
546 			return;
547 
548 		string = end + 2;
549 		skipWhitespace(&string);
550 	} else {
551 		while (*string && *string != '&' && *string != '|' && *string != ')')
552 			string++;
553 
554 		end = string - 1;
555 		skipWhitespaceReverse(&end, start);
556 	}
557 
558 	// at this point, "start" will point to the first character of the value,
559 	// "end" will point to its last character, and "start" to the first non-
560 	// whitespace character after the value string
561 
562 	fString = CopyString(start, end);
563 	if (fString == NULL)
564 		return;
565 
566 	// patterns are only allowed for these operations (and strings)
567 	if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
568 		fIsPattern = isPattern(fString);
569 		if (fIsPattern && isValidPattern(fString) < B_OK) {
570 			// we only want to have valid patterns; setting fString
571 			// to NULL will cause InitCheck() to fail
572 			free(fString);
573 			fString = NULL;
574 		}
575 	}
576 
577 	// The special time flag is set if the time values are shifted
578 	// 64-bit values to reduce the number of duplicates.
579 	// We have to be able to compare them against unshifted values
580 	// later. The only index which needs this is the last_modified
581 	// index, but we may want to open that feature for other indices,
582 	// too one day.
583 	fIsSpecialTime = !strcmp(fAttribute, "last_modified");
584 
585 	*expr = string;
586 }
587 
588 
589 Equation::~Equation()
590 {
591 	if (fAttribute != NULL)
592 		free(fAttribute);
593 	if (fString != NULL)
594 		free(fString);
595 }
596 
597 
598 status_t
599 Equation::InitCheck()
600 {
601 	if (fAttribute == NULL
602 		|| fString == NULL
603 		|| fOp == OP_NONE)
604 		return B_BAD_VALUE;
605 
606 	return B_OK;
607 }
608 
609 
610 status_t
611 Equation::ParseQuotedString(char **_start, char **_end)
612 {
613 	char *start = *_start;
614 	char quote = *start++;
615 	char *end = start;
616 
617 	for (;*end && *end != quote;end++) {
618 		if (*end == '\\')
619 			end++;
620 	}
621 	if (*end == '\0')
622 		return B_BAD_VALUE;
623 
624 	*_start = start;
625 	*_end = end - 1;
626 
627 	return B_OK;
628 }
629 
630 
631 char *
632 Equation::CopyString(char *start, char *end)
633 {
634 	// end points to the last character of the string - and the length
635 	// also has to include the null-termination
636 	int32 length = end + 2 - start;
637 	// just to make sure; since that's the max. attribute name length and
638 	// the max. string in an index, it make sense to have it that way
639 	if (length > INODE_FILE_NAME_LENGTH || length <= 0)
640 		return NULL;
641 
642 	char *copy = (char *)malloc(length);
643 	if (copy == NULL)
644 		return NULL;
645 
646 	memcpy(copy,start,length - 1);
647 	copy[length - 1] = '\0';
648 
649 	return copy;
650 }
651 
652 
653 status_t
654 Equation::ConvertValue(type_code type)
655 {
656 	// Has the type already been converted?
657 	if (type == fType)
658 		return B_OK;
659 
660 	char *string = fString;
661 
662 	switch (type) {
663 		case B_MIME_STRING_TYPE:
664 			type = B_STRING_TYPE;
665 			// supposed to fall through
666 		case B_STRING_TYPE:
667 			strncpy(fValue.String, string, INODE_FILE_NAME_LENGTH);
668 			fValue.String[INODE_FILE_NAME_LENGTH - 1] = '\0';
669 			fSize = strlen(fValue.String);
670 			break;
671 		case B_INT32_TYPE:
672 			fValue.Int32 = strtol(string, &string, 0);
673 			fSize = sizeof(int32);
674 			break;
675 		case B_UINT32_TYPE:
676 			fValue.Int32 = strtoul(string, &string, 0);
677 			fSize = sizeof(uint32);
678 			break;
679 		case B_INT64_TYPE:
680 			fValue.Int64 = strtoll(string, &string, 0);
681 			fSize = sizeof(int64);
682 			break;
683 		case B_UINT64_TYPE:
684 			fValue.Uint64 = strtoull(string, &string, 0);
685 			fSize = sizeof(uint64);
686 			break;
687 		case B_FLOAT_TYPE:
688 			fValue.Float = strtod(string, &string);
689 			fSize = sizeof(float);
690 			break;
691 		case B_DOUBLE_TYPE:
692 			fValue.Double = strtod(string, &string);
693 			fSize = sizeof(double);
694 			break;
695 		default:
696 			FATAL(("query value conversion to 0x%lx requested!\n", type));
697 			// should we fail here or just do a safety int32 conversion?
698 			return B_ERROR;
699 	}
700 
701 	fType = type;
702 
703 	// patterns are only allowed for string types
704 	if (fType != B_STRING_TYPE && fIsPattern)
705 		fIsPattern = false;
706 
707 	return B_OK;
708 }
709 
710 
711 /**	Returns true when the key matches the equation. You have to
712  *	call ConvertValue() before this one.
713  */
714 
715 bool
716 Equation::CompareTo(const uint8 *value, uint16 size)
717 {
718 	int32 compare;
719 
720 	// fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or OP_UNEQUAL
721 	if (fIsPattern) {
722 		// we have already validated the pattern, so we don't check for failing
723 		// here - if something is broken, and matchString() returns an error,
724 		// we just don't match
725 		compare = matchString(fValue.String, (char *)value) == MATCH_OK ? 0 : 1;
726 	} else if (fIsSpecialTime) {
727 		// the index is a shifted int64 index, but we have to match
728 		// against an unshifted value (i.e. the last_modified index)
729 		int64 timeValue = *(int64 *)value >> INODE_TIME_SHIFT;
730 		compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64, sizeof(int64));
731 	} else
732 		compare = compareKeys(fType, value, size, Value(), fSize);
733 
734 	switch (fOp) {
735 		case OP_EQUAL:
736 			return compare == 0;
737 		case OP_UNEQUAL:
738 			return compare != 0;
739 		case OP_LESS_THAN:
740 			return compare < 0;
741 		case OP_LESS_THAN_OR_EQUAL:
742 			return compare <= 0;
743 		case OP_GREATER_THAN:
744 			return compare > 0;
745 		case OP_GREATER_THAN_OR_EQUAL:
746 			return compare >= 0;
747 	}
748 	FATAL(("Unknown/Unsupported operation: %d\n", fOp));
749 	return false;
750 }
751 
752 
753 void
754 Equation::Complement()
755 {
756 	D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
757 		FATAL(("op out of range!"));
758 		return;
759 	});
760 
761 	int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
762 			OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
763 	fOp = complementOp[fOp - OP_EQUAL];
764 }
765 
766 
767 status_t
768 Equation::MatchEmptyString()
769 {
770 	// there is no matching attribute, we will just bail out if we
771 	// already know that our value is not of a string type.
772 	// If not, it will be converted to a string - and then be compared with "".
773 	// That's why we have to call ConvertValue() here - but it will be
774 	// a cheap call for the next time
775 	// Should we do this only for OP_UNEQUAL?
776 	if (fType != 0 && fType != B_STRING_TYPE)
777 		return NO_MATCH;
778 
779 	status_t status = ConvertValue(B_STRING_TYPE);
780 	if (status == B_OK)
781 		status = CompareTo((const uint8 *)"", fSize) ? MATCH_OK : NO_MATCH;
782 
783 	return status;
784 }
785 
786 
787 /**	Matches the inode's attribute value with the equation.
788  *	Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went wrong
789  */
790 
791 status_t
792 Equation::Match(Inode *inode, const char *attributeName, int32 type, const uint8 *key, size_t size)
793 {
794 	// get a pointer to the attribute in question
795 	union value value;
796 	uint8 *buffer;
797 	bool locked = false;
798 
799 	// first, check if we are matching for a live query and use that value
800 	if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
801 		if (key == NULL) {
802 			if (type == B_STRING_TYPE)
803 				return MatchEmptyString();
804 
805 			return NO_MATCH;
806 		}
807 		buffer = const_cast<uint8 *>(key);
808 	} else if (!strcmp(fAttribute, "name")) {
809 		// we need to lock before accessing Inode::Name()
810 		inode->SmallDataLock().Lock();
811 		locked = true;
812 
813 		// if not, check for "fake" attributes, "name", "size", "last_modified",
814 		buffer = (uint8 *)inode->Name();
815 		if (buffer == NULL) {
816 			inode->SmallDataLock().Unlock();
817 			return B_ERROR;
818 		}
819 
820 		type = B_STRING_TYPE;
821 		size = strlen((const char *)buffer);
822 	} else if (!strcmp(fAttribute,"size")) {
823 		buffer = (uint8 *)&inode->Node()->data.size;
824 		type = B_INT64_TYPE;
825 	} else if (!strcmp(fAttribute,"last_modified")) {
826 		buffer = (uint8 *)&inode->Node()->last_modified_time;
827 		type = B_INT64_TYPE;
828 	} else {
829 		// then for attributes in the small_data section, and finally for the
830 		// real attributes
831 		Inode *attribute;
832 
833 		inode->SmallDataLock().Lock();
834 		small_data *smallData = inode->FindSmallData(fAttribute);
835 		if (smallData != NULL) {
836 			buffer = smallData->Data();
837 			type = smallData->type;
838 			size = smallData->data_size;
839 			locked = true;
840 		} else {
841 			// needed to unlock the small_data section as fast as possible
842 			inode->SmallDataLock().Unlock();
843 
844 			if (inode->GetAttribute(fAttribute, &attribute) == B_OK) {
845 				buffer = (uint8 *)&value;
846 				type = attribute->Node()->type;
847 				size = attribute->Size();
848 
849 				if (size > INODE_FILE_NAME_LENGTH)
850 					size = INODE_FILE_NAME_LENGTH;
851 
852 				if (attribute->ReadAt(0, buffer, &size) < B_OK) {
853 					inode->ReleaseAttribute(attribute);
854 					return B_IO_ERROR;
855 				}
856 				inode->ReleaseAttribute(attribute);
857 			} else
858 				return MatchEmptyString();
859 		}
860 	}
861 	// prepare own value for use, if it is possible to convert it
862 	status_t status = ConvertValue(type);
863 	if (status == B_OK)
864 		status = CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
865 
866 	if (locked)
867 		inode->SmallDataLock().Unlock();
868 
869 	RETURN_ERROR(status);
870 }
871 
872 
873 void
874 Equation::CalculateScore(Index &index)
875 {
876 	// As always, these values could be tuned and refined.
877 	// And the code could also need some real world testing :-)
878 
879 	// do we have to operate on a "foreign" index?
880 	if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
881 		fScore = 0;
882 		return;
883 	}
884 
885 	// if we have a pattern, how much does it help our search?
886 	if (fIsPattern)
887 		fScore = getFirstPatternSymbol(fString) << 3;
888 	else {
889 		// Score by operator
890 		if (fOp == OP_EQUAL)
891 			// higher than pattern="255 chars+*"
892 			fScore = 2048;
893 		else
894 			// the pattern search is regarded cheaper when you have at
895 			// least one character to set your index to
896 			fScore = 5;
897 	}
898 
899 	// take index size into account (1024 is the current node size
900 	// in our B+trees)
901 	// 2048 * 2048 == 4194304 is the maximum score (for an empty
902 	// tree, since the header + 1 node are already 2048 bytes)
903 	fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
904 }
905 
906 
907 status_t
908 Equation::PrepareQuery(Volume */*volume*/, Index &index, TreeIterator **iterator)
909 {
910 	type_code type;
911 	status_t status = index.SetTo(fAttribute);
912 
913 	// special case for OP_UNEQUAL - it will always operate through the whole index
914 	// but we need the call to the original index to get the correct type
915 	if (status < B_OK || fOp == OP_UNEQUAL) {
916 		// Try to get an index that holds all files (name)
917 		// Also sets the default type for all attributes without index
918 		// to string.
919 		type = status < B_OK ? B_STRING_TYPE : index.Type();
920 
921 		if (index.SetTo("name") < B_OK)
922 			return B_ENTRY_NOT_FOUND;
923 
924 		fHasIndex = false;
925 	} else {
926 		fHasIndex = true;
927 		type = index.Type();
928 	}
929 
930 	if (ConvertValue(type) < B_OK)
931 		return B_BAD_VALUE;
932 
933 	BPlusTree *tree;
934 	if (index.Node()->GetTree(&tree) < B_OK)
935 		return B_ERROR;
936 
937 	*iterator = new TreeIterator(tree);
938 	if (*iterator == NULL)
939 		return B_NO_MEMORY;
940 
941 	if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL
942 		|| fIsPattern)
943 		&& fHasIndex) {
944 		// set iterator to the exact position
945 
946 		int32 keySize = index.KeySize();
947 
948 		// at this point, fIsPattern is only true if it's a string type, and fOp
949 		// is either OP_EQUAL or OP_UNEQUAL
950 		if (fIsPattern) {
951 			// let's see if we can use the beginning of the key for positioning
952 			// the iterator and adjust the key size; if not, just leave the
953 			// iterator at the start and return success
954 			keySize = getFirstPatternSymbol(fString);
955 			if (keySize <= 0)
956 				return B_OK;
957 		}
958 
959 		if (keySize == 0) {
960 			if (fType == B_STRING_TYPE)
961 				keySize = strlen(fValue.String);
962 			else
963 				RETURN_ERROR(B_ENTRY_NOT_FOUND);
964 		}
965 
966 		if (fIsSpecialTime) {
967 			// we have to find the first matching shifted value
968 			off_t value = fValue.Int64 << INODE_TIME_SHIFT;
969 			status = (*iterator)->Find((uint8 *)&value, keySize);
970 			if (status == B_ENTRY_NOT_FOUND)
971 				return B_OK;
972 		} else {
973 			status = (*iterator)->Find(Value(),keySize);
974 			if (fOp == OP_EQUAL && !fIsPattern)
975 				return status;
976 			else if (status == B_ENTRY_NOT_FOUND
977 				&& (fIsPattern || fOp == OP_GREATER_THAN || fOp == OP_GREATER_THAN_OR_EQUAL))
978 				return B_OK;
979 		}
980 
981 		RETURN_ERROR(status);
982 	}
983 
984 	return B_OK;
985 }
986 
987 
988 status_t
989 Equation::GetNextMatching(Volume *volume, TreeIterator *iterator,
990 	struct dirent *dirent, size_t bufferSize)
991 {
992 	while (true) {
993 		union value indexValue;
994 		uint16 keyLength;
995 		uint16 duplicate;
996 		off_t offset;
997 
998 		status_t status = iterator->GetNextEntry(&indexValue, &keyLength,
999 			(uint16)sizeof(indexValue), &offset, &duplicate);
1000 		if (status < B_OK)
1001 			return status;
1002 
1003 		// only compare against the index entry when this is the correct
1004 		// index for the equation
1005 		if (fHasIndex && duplicate < 2 && !CompareTo((uint8 *)&indexValue, keyLength)) {
1006 			// They aren't equal? let the operation decide what to do
1007 			// Since we always start at the beginning of the index (or the correct
1008 			// position), only some needs to be stopped if the entry doesn't fit.
1009 			if (fOp == OP_LESS_THAN
1010 				|| fOp == OP_LESS_THAN_OR_EQUAL
1011 				|| (fOp == OP_EQUAL && !fIsPattern))
1012 				return B_ENTRY_NOT_FOUND;
1013 
1014 			if (duplicate > 0)
1015 				iterator->SkipDuplicates();
1016 			continue;
1017 		}
1018 
1019 		Inode *inode;
1020 		if ((status = get_vnode(volume->ID(), offset, (void **)&inode)) != B_OK) {
1021 			REPORT_ERROR(status);
1022 			FATAL(("could not get inode %Ld in index \"%s\"!\n", offset, fAttribute));
1023 			// try with next
1024 			continue;
1025 		}
1026 
1027 		// ToDo: check user permissions here - but which one?!
1028 		// we could filter out all those where we don't have
1029 		// read access... (we should check for every parent
1030 		// directory if the X_OK is allowed)
1031 		// Although it's quite expensive to open all parents,
1032 		// it's likely that the application that runs the
1033 		// query will do something similar (and we don't have
1034 		// to do it for root, either).
1035 
1036 		// go up in the tree until a &&-operator is found, and check if the
1037 		// inode matches with the rest of the expression - we don't have to
1038 		// check ||-operators for that
1039 		Term *term = this;
1040 		status = MATCH_OK;
1041 
1042 		if (!fHasIndex)
1043 			status = Match(inode);
1044 
1045 		while (term != NULL && status == MATCH_OK) {
1046 			Operator *parent = (Operator *)term->Parent();
1047 			if (parent == NULL)
1048 				break;
1049 
1050 			if (parent->Op() == OP_AND) {
1051 				// choose the other child of the parent
1052 				Term *other = parent->Right();
1053 				if (other == term)
1054 					other = parent->Left();
1055 
1056 				if (other == NULL) {
1057 					FATAL(("&&-operator has only one child... (parent = %p)\n", parent));
1058 					break;
1059 				}
1060 				status = other->Match(inode);
1061 				if (status < 0) {
1062 					REPORT_ERROR(status);
1063 					status = NO_MATCH;
1064 				}
1065 			}
1066 			term = (Term *)parent;
1067 		}
1068 
1069 		if (status == MATCH_OK) {
1070 			dirent->d_dev = volume->ID();
1071 			dirent->d_ino = offset;
1072 			dirent->d_pdev = volume->ID();
1073 			dirent->d_pino = volume->ToVnode(inode->Parent());
1074 
1075 			if (inode->GetName(dirent->d_name) < B_OK)
1076 				FATAL(("inode %Ld in query has no name!\n", inode->BlockNumber()));
1077 
1078 #ifdef KEEP_WRONG_DIRENT_RECLEN
1079 			// ToDo: The available file systems in BeOS apparently don't set the
1080 			// correct d_reclen - we are copying that behaviour if requested, but
1081 			// if it doesn't break compatibility, we will remove it.
1082 			dirent->d_reclen = strlen(dirent->d_name);
1083 #else
1084 			dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name);
1085 #endif
1086 		}
1087 
1088 		put_vnode(volume->ID(), inode->ID());
1089 
1090 		if (status == MATCH_OK)
1091 			return B_OK;
1092 	}
1093 	RETURN_ERROR(B_ERROR);
1094 }
1095 
1096 
1097 //	#pragma mark -
1098 
1099 
1100 Operator::Operator(Term *left, int8 op, Term *right)
1101 	: Term(op),
1102 	fLeft(left),
1103 	fRight(right)
1104 {
1105 	if (left)
1106 		left->SetParent(this);
1107 	if (right)
1108 		right->SetParent(this);
1109 }
1110 
1111 
1112 Operator::~Operator()
1113 {
1114 	delete fLeft;
1115 	delete fRight;
1116 }
1117 
1118 
1119 status_t
1120 Operator::Match(Inode *inode, const char *attribute, int32 type, const uint8 *key, size_t size)
1121 {
1122 	if (fOp == OP_AND) {
1123 		status_t status = fLeft->Match(inode, attribute, type, key, size);
1124 		if (status != MATCH_OK)
1125 			return status;
1126 
1127 		return fRight->Match(inode, attribute, type, key, size);
1128 	} else {
1129 		// choose the term with the better score for OP_OR
1130 		if (fRight->Score() > fLeft->Score()) {
1131 			status_t status = fRight->Match(inode, attribute, type, key, size);
1132 			if (status != NO_MATCH)
1133 				return status;
1134 		}
1135 		return fLeft->Match(inode, attribute, type, key, size);
1136 	}
1137 }
1138 
1139 
1140 void
1141 Operator::Complement()
1142 {
1143 	if (fOp == OP_AND)
1144 		fOp = OP_OR;
1145 	else
1146 		fOp = OP_AND;
1147 
1148 	fLeft->Complement();
1149 	fRight->Complement();
1150 }
1151 
1152 
1153 void
1154 Operator::CalculateScore(Index &index)
1155 {
1156 	fLeft->CalculateScore(index);
1157 	fRight->CalculateScore(index);
1158 }
1159 
1160 
1161 int32
1162 Operator::Score() const
1163 {
1164 	if (fOp == OP_AND) {
1165 		// return the one with the better score
1166 		if (fRight->Score() > fLeft->Score())
1167 			return fRight->Score();
1168 
1169 		return fLeft->Score();
1170 	}
1171 
1172 	// for OP_OR, be honest, and return the one with the worse score
1173 	if (fRight->Score() < fLeft->Score())
1174 		return fRight->Score();
1175 
1176 	return fLeft->Score();
1177 }
1178 
1179 
1180 status_t
1181 Operator::InitCheck()
1182 {
1183 	if (fOp != OP_AND && fOp != OP_OR
1184 		|| fLeft == NULL || fLeft->InitCheck() < B_OK
1185 		|| fRight == NULL || fRight->InitCheck() < B_OK)
1186 		return B_ERROR;
1187 
1188 	return B_OK;
1189 }
1190 
1191 
1192 #if 0
1193 Term *
1194 Operator::Copy() const
1195 {
1196 	if (fEquation != NULL) {
1197 		Equation *equation = new Equation(*fEquation);
1198 		if (equation == NULL)
1199 			return NULL;
1200 
1201 		Term *term = new Term(equation);
1202 		if (term == NULL)
1203 			delete equation;
1204 
1205 		return term;
1206 	}
1207 
1208 	Term *left = NULL, *right = NULL;
1209 
1210 	if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1211 		return NULL;
1212 	if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1213 		delete left;
1214 		return NULL;
1215 	}
1216 
1217 	Term *term = new Term(left,fOp,right);
1218 	if (term == NULL) {
1219 		delete left;
1220 		delete right;
1221 		return NULL;
1222 	}
1223 	return term;
1224 }
1225 #endif
1226 
1227 
1228 //	#pragma mark -
1229 
1230 #ifdef DEBUG
1231 void
1232 Operator::PrintToStream()
1233 {
1234 	D(__out("( "));
1235 	if (fLeft != NULL)
1236 		fLeft->PrintToStream();
1237 
1238 	char *op;
1239 	switch (fOp) {
1240 		case OP_OR: op = "OR"; break;
1241 		case OP_AND: op = "AND"; break;
1242 		default: op = "?"; break;
1243 	}
1244 	D(__out(" %s ",op));
1245 
1246 	if (fRight != NULL)
1247 		fRight->PrintToStream();
1248 
1249 	D(__out(" )"));
1250 }
1251 
1252 
1253 void
1254 Equation::PrintToStream()
1255 {
1256 	char *symbol = "???";
1257 	switch (fOp) {
1258 		case OP_EQUAL: symbol = "=="; break;
1259 		case OP_UNEQUAL: symbol = "!="; break;
1260 		case OP_GREATER_THAN: symbol = ">"; break;
1261 		case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
1262 		case OP_LESS_THAN: symbol = "<"; break;
1263 		case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
1264 	}
1265 	D(__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString));
1266 }
1267 
1268 #endif	/* DEBUG */
1269 
1270 //	#pragma mark -
1271 
1272 
1273 Expression::Expression(char *expr)
1274 {
1275 	if (expr == NULL)
1276 		return;
1277 
1278 	fTerm = ParseOr(&expr);
1279 	if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1280 		FATAL(("Corrupt tree in expression!\n"));
1281 		delete fTerm;
1282 		fTerm = NULL;
1283 	}
1284 	D(if (fTerm != NULL) {
1285 		fTerm->PrintToStream();
1286 		D(__out("\n"));
1287 		if (*expr != '\0')
1288 			PRINT(("Unexpected end of string: \"%s\"!\n", expr));
1289 	});
1290 	fPosition = expr;
1291 }
1292 
1293 
1294 Expression::~Expression()
1295 {
1296 	delete fTerm;
1297 }
1298 
1299 
1300 Term *
1301 Expression::ParseEquation(char **expr)
1302 {
1303 	skipWhitespace(expr);
1304 
1305 	bool not = false;
1306 	if (**expr == '!') {
1307 		skipWhitespace(expr, 1);
1308 		if (**expr != '(')
1309 			return NULL;
1310 
1311 		not = true;
1312 	}
1313 
1314 	if (**expr == ')') {
1315 		// shouldn't be handled here
1316 		return NULL;
1317 	} else if (**expr == '(') {
1318 		skipWhitespace(expr, 1);
1319 
1320 		Term *term = ParseOr(expr);
1321 
1322 		skipWhitespace(expr);
1323 
1324 		if (**expr != ')') {
1325 			delete term;
1326 			return NULL;
1327 		}
1328 
1329 		// If the term is negated, we just complement the tree, to get
1330 		// rid of the not, a.k.a. DeMorgan's Law.
1331 		if (not)
1332 			term->Complement();
1333 
1334 		skipWhitespace(expr, 1);
1335 
1336 		return term;
1337 	}
1338 
1339 	Equation *equation = new Equation(expr);
1340 	if (equation == NULL || equation->InitCheck() < B_OK) {
1341 		delete equation;
1342 		return NULL;
1343 	}
1344 	return equation;
1345 }
1346 
1347 
1348 Term *
1349 Expression::ParseAnd(char **expr)
1350 {
1351 	Term *left = ParseEquation(expr);
1352 	if (left == NULL)
1353 		return NULL;
1354 
1355 	while (IsOperator(expr,'&')) {
1356 		Term *right = ParseAnd(expr);
1357 		Term *newParent = NULL;
1358 
1359 		if (right == NULL || (newParent = new Operator(left, OP_AND, right)) == NULL) {
1360 			delete left;
1361 			delete right;
1362 
1363 			return NULL;
1364 		}
1365 		left = newParent;
1366 	}
1367 
1368 	return left;
1369 }
1370 
1371 
1372 Term *
1373 Expression::ParseOr(char **expr)
1374 {
1375 	Term *left = ParseAnd(expr);
1376 	if (left == NULL)
1377 		return NULL;
1378 
1379 	while (IsOperator(expr,'|')) {
1380 		Term *right = ParseAnd(expr);
1381 		Term *newParent = NULL;
1382 
1383 		if (right == NULL || (newParent = new Operator(left, OP_OR, right)) == NULL) {
1384 			delete left;
1385 			delete right;
1386 
1387 			return NULL;
1388 		}
1389 		left = newParent;
1390 	}
1391 
1392 	return left;
1393 }
1394 
1395 
1396 bool
1397 Expression::IsOperator(char **expr, char op)
1398 {
1399 	char *string = *expr;
1400 
1401 	if (*string == op && *(string + 1) == op) {
1402 		*expr += 2;
1403 		return true;
1404 	}
1405 	return false;
1406 }
1407 
1408 
1409 status_t
1410 Expression::InitCheck()
1411 {
1412 	if (fTerm == NULL)
1413 		return B_BAD_VALUE;
1414 
1415 	return B_OK;
1416 }
1417 
1418 
1419 //	#pragma mark -
1420 
1421 
1422 Query::Query(Volume *volume, Expression *expression)
1423 	:
1424 	fVolume(volume),
1425 	fExpression(expression),
1426 	fCurrent(NULL),
1427 	fIterator(NULL),
1428 	fIndex(volume),
1429 	fPort(-1)
1430 {
1431 	// if the expression has a valid root pointer, the whole tree has
1432 	// already passed the sanity check, so that we don't have to check
1433 	// every pointer
1434 	if (volume == NULL || expression == NULL || expression->Root() == NULL)
1435 		return;
1436 
1437 	// create index on the stack and delete it afterwards
1438 	fExpression->Root()->CalculateScore(fIndex);
1439 	fIndex.Unset();
1440 
1441 	Stack<Term *> stack;
1442 	stack.Push(fExpression->Root());
1443 
1444 	Term *term;
1445 	while (stack.Pop(&term)) {
1446 		if (term->Op() < OP_EQUATION) {
1447 			Operator *op = (Operator *)term;
1448 
1449 			if (op->Op() == OP_OR) {
1450 				stack.Push(op->Left());
1451 				stack.Push(op->Right());
1452 			} else {
1453 				// For OP_AND, we can use the scoring system to decide which path to add
1454 				if (op->Right()->Score() > op->Left()->Score())
1455 					stack.Push(op->Right());
1456 				else
1457 					stack.Push(op->Left());
1458 			}
1459 		} else if (term->Op() == OP_EQUATION || fStack.Push((Equation *)term) < B_OK)
1460 			FATAL(("Unknown term on stack or stack error"));
1461 	}
1462 
1463 	volume->AddQuery(this);
1464 }
1465 
1466 
1467 Query::~Query()
1468 {
1469 	fVolume->RemoveQuery(this);
1470 }
1471 
1472 
1473 status_t
1474 Query::GetNextEntry(struct dirent *dirent, size_t size)
1475 {
1476 	// If we don't have an equation to use yet/anymore, get a new one
1477 	// from the stack
1478 	while (true) {
1479 		if (fIterator == NULL) {
1480 			if (!fStack.Pop(&fCurrent)
1481 				|| fCurrent == NULL
1482 				|| fCurrent->PrepareQuery(fVolume, fIndex, &fIterator) < B_OK)
1483 				return B_ENTRY_NOT_FOUND;
1484 		}
1485 		if (fCurrent == NULL)
1486 			RETURN_ERROR(B_ERROR);
1487 
1488 		status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent, size);
1489 		if (status < B_OK) {
1490 			delete fIterator;
1491 			fIterator = NULL;
1492 			fCurrent = NULL;
1493 		} else {
1494 			// only return if we have another entry
1495 			return B_OK;
1496 		}
1497 	}
1498 }
1499 
1500 
1501 void
1502 Query::SetLiveMode(port_id port, int32 token)
1503 {
1504 	fPort = port;
1505 	fToken = token;
1506 }
1507 
1508 
1509 void
1510 Query::LiveUpdate(Inode *inode, const char *attribute, int32 type, const uint8 *oldKey,
1511 	size_t oldLength, const uint8 *newKey, size_t newLength)
1512 {
1513 	if (fPort < 0 || fExpression == NULL || attribute == NULL)
1514 		return;
1515 
1516 	// ToDo: check if the attribute is part of the query at all...
1517 
1518 	status_t oldStatus = fExpression->Root()->Match(inode, attribute, type, oldKey, oldLength);
1519 	status_t newStatus = fExpression->Root()->Match(inode, attribute, type, newKey, newLength);
1520 	int32 op;
1521 	if (oldStatus == MATCH_OK && newStatus == MATCH_OK) {
1522 		// only send out a notification if the name was changed
1523 		if (oldKey == NULL || strcmp(attribute,"name"))
1524 			return;
1525 
1526 		send_notification(fPort, fToken, B_QUERY_UPDATE, B_ENTRY_REMOVED, fVolume->ID(), 0,
1527 			fVolume->ToVnode(inode->Parent()), 0, inode->ID(), (const char *)oldKey);
1528 		op = B_ENTRY_CREATED;
1529 	} else if (oldStatus != MATCH_OK && newStatus != MATCH_OK) {
1530 		// nothing has changed
1531 		return;
1532 	} else if (oldStatus == MATCH_OK && newStatus != MATCH_OK)
1533 		op = B_ENTRY_REMOVED;
1534 	else
1535 		op = B_ENTRY_CREATED;
1536 
1537 	// if "value" is NULL, send_notification() crashes...
1538 	const char *value = (const char *)newKey;
1539 	if (type != B_STRING_TYPE || value == NULL)
1540 		value = "";
1541 
1542 	send_notification(fPort, fToken, B_QUERY_UPDATE, op, fVolume->ID(), 0,
1543 		fVolume->ToVnode(inode->Parent()), 0, inode->ID(), value);
1544 }
1545 
1546