xref: /haiku/src/add-ons/kernel/file_systems/bfs/Query.cpp (revision a5c0d1a80e18f50987966fda2005210092d7671b)
1 /*
2  * Copyright 2001-2020, Axel Dörfler, axeld@pinc-software.de.
3  * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
4  * This file may be used under the terms of the MIT License.
5  */
6 
7 
8 /*!	Query parsing and evaluation
9 
10 	The pattern matching is roughly based on code originally written
11 	by J. Kercheval, and on code written by Kenneth Almquist, though
12 	it shares no code.
13 */
14 
15 
16 #include "Query.h"
17 
18 #include <file_systems/QueryParserUtils.h>
19 #include <query_private.h>
20 
21 #include "BPlusTree.h"
22 #include "bfs.h"
23 #include "Debug.h"
24 #include "Index.h"
25 #include "Inode.h"
26 #include "Volume.h"
27 
28 
29 // The parser has a very static design, but it will do what is required.
30 //
31 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
32 // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
33 // Apparently, the "!" (not) can only be used with brackets.
34 //
35 // If you think that there are too few NULL pointer checks in some places
36 // of the code, just read the beginning of the query constructor.
37 // The API is not fully available, just the Query and the Expression class
38 // are.
39 
40 
41 using namespace QueryParser;
42 
43 
44 enum ops {
45 	OP_NONE,
46 
47 	OP_AND,
48 	OP_OR,
49 
50 	OP_EQUATION,
51 		// is only used for invalid equations
52 
53 	OP_EQUAL,
54 	OP_UNEQUAL,
55 	OP_GREATER_THAN,
56 	OP_LESS_THAN,
57 	OP_GREATER_THAN_OR_EQUAL,
58 	OP_LESS_THAN_OR_EQUAL,
59 };
60 
61 
62 union value {
63 	int64	Int64;
64 	uint64	Uint64;
65 	int32	Int32;
66 	uint32	Uint32;
67 	float	Float;
68 	double	Double;
69 	char	String[MAX_INDEX_KEY_LENGTH + 1];
70 };
71 
72 
73 /*!	Abstract base class for the operator/equation classes.
74 */
75 class Term {
76 public:
77 								Term(int8 op) : fOp(op), fParent(NULL) {}
78 	virtual						~Term() {}
79 
80 			int8				Op() const { return fOp; }
81 
82 			void				SetParent(Term* parent) { fParent = parent; }
83 			Term*				Parent() const { return fParent; }
84 
85 	virtual	status_t			Match(Inode* inode,
86 									const char* attribute = NULL,
87 									int32 type = 0, const uint8* key = NULL,
88 									size_t size = 0) = 0;
89 	virtual	void				Complement() = 0;
90 
91 	virtual	void				CalculateScore(Index& index) = 0;
92 	virtual	int32				Score() const = 0;
93 
94 	virtual	status_t			InitCheck() = 0;
95 
96 #ifdef DEBUG
97 	virtual	void				PrintToStream() = 0;
98 #endif
99 
100 protected:
101 			int8				fOp;
102 			Term*				fParent;
103 };
104 
105 
106 /*!	An Equation object represents an "attribute-equation operator-value" pair.
107 
108 	Although an Equation object is quite independent from the volume on which
109 	the query is run, there are some dependencies that are produced while
110 	querying:
111 	The type/size of the value, the score, and if it has an index or not.
112 	So you could run more than one query on the same volume, but it might return
113 	wrong values when it runs concurrently on another volume.
114 	That's not an issue right now, because we run single-threaded and don't use
115 	queries more than once.
116 */
117 class Equation : public Term {
118 public:
119 								Equation(char** _expression);
120 	virtual						~Equation();
121 
122 	virtual	status_t			InitCheck();
123 
124 	virtual	status_t			Match(Inode* inode,
125 									const char* attribute = NULL,
126 									int32 type = 0, const uint8* key = NULL,
127 									size_t size = 0);
128 	virtual void				Complement();
129 
130 			status_t			PrepareQuery(Volume* volume, Index& index,
131 									TreeIterator** iterator,
132 									bool queryNonIndexed);
133 			status_t			GetNextMatching(Volume* volume,
134 									TreeIterator* iterator,
135 									struct dirent* dirent, size_t bufferSize);
136 
137 	virtual	void				CalculateScore(Index &index);
138 	virtual	int32				Score() const { return fScore; }
139 
140 #ifdef DEBUG
141 	virtual	void				PrintToStream();
142 #endif
143 
144 private:
145 								Equation(const Equation& other);
146 								Equation& operator=(const Equation& other);
147 									// no implementation
148 
149 			status_t			_ParseQuotedString(char** _start, char** _end);
150 			char*				_CopyString(char* start, char* end);
151 	inline	bool				_IsEquationChar(char c) const;
152 	inline	bool				_IsOperatorChar(char c) const;
153 			status_t			_ConvertValue(type_code type);
154 			bool				_CompareTo(const uint8* value, uint16 size);
155 			uint8*				_Value() const { return (uint8*)&fValue; }
156 
157 private:
158 			char*				fAttribute;
159 			char*				fString;
160 			union value			fValue;
161 			type_code			fType;
162 			size_t				fSize;
163 			bool				fIsPattern;
164 			bool				fIsSpecialTime;
165 
166 			int32				fScore;
167 			bool				fHasIndex;
168 };
169 
170 
171 /*!	The Operator class does not represent a generic operator, but only those
172 	that combine two equations, namely "or", and "and".
173 */
174 class Operator : public Term {
175 public:
176 								Operator(Term* left, int8 op, Term* right);
177 	virtual						~Operator();
178 
179 			Term*				Left() const { return fLeft; }
180 			Term*				Right() const { return fRight; }
181 
182 	virtual	status_t			Match(Inode* inode,
183 									const char* attribute = NULL,
184 									int32 type = 0, const uint8* key = NULL,
185 									size_t size = 0);
186 	virtual	void				Complement();
187 
188 	virtual	void				CalculateScore(Index& index);
189 	virtual	int32				Score() const;
190 
191 	virtual	status_t			InitCheck();
192 
193 #ifdef DEBUG
194 	virtual	void				PrintToStream();
195 #endif
196 
197 private:
198 								Operator(const Operator& other);
199 								Operator& operator=(const Operator& other);
200 									// no implementation
201 
202 private:
203 			Term*				fLeft;
204 			Term*				fRight;
205 };
206 
207 
208 //	#pragma mark -
209 
210 
211 Equation::Equation(char** _expression)
212 	:
213 	Term(OP_EQUATION),
214 	fAttribute(NULL),
215 	fString(NULL),
216 	fType(0),
217 	fIsPattern(false)
218 {
219 	char* string = *_expression;
220 	char* start = string;
221 	char* end = NULL;
222 
223 	// Since the equation is the integral part of any query, we're just parsing
224 	// the whole thing here.
225 	// The whitespace at the start is already removed in
226 	// Expression::ParseEquation()
227 
228 	if (*start == '"' || *start == '\'') {
229 		// string is quoted (start has to be on the beginning of a string)
230 		if (_ParseQuotedString(&start, &end) < B_OK)
231 			return;
232 
233 		// set string to a valid start of the equation symbol
234 		string = end + 2;
235 		skipWhitespace(&string);
236 		if (!_IsEquationChar(string[0])) {
237 			*_expression = string;
238 			return;
239 		}
240 	} else {
241 		// search the (in)equation for the actual equation symbol (and for other operators
242 		// in case the equation is malformed)
243 		while (string[0] != 0 && !_IsOperatorChar(string[0])
244 			&& !_IsEquationChar(string[0])) {
245 			if (string[0] == '\\' && string[1] != 0)
246 				string++;
247 			string++;
248 		}
249 
250 		// get the attribute string	(and trim whitespace), in case
251 		// the string was not quoted
252 		end = string - 1;
253 		skipWhitespaceReverse(&end, start);
254 	}
255 
256 	// attribute string is empty (which is not allowed)
257 	if (start > end)
258 		return;
259 
260 	// At this point, "start" points to the beginning of the string, "end"
261 	// points to the last character of the string, and "string" points to the
262 	// first character of the equation symbol
263 
264 	// test for the right symbol (as this doesn't need any memory)
265 	switch (*string) {
266 		case '=':
267 			fOp = OP_EQUAL;
268 			break;
269 		case '>':
270 			fOp = *(string + 1) == '='
271 				? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
272 			break;
273 		case '<':
274 			fOp = *(string + 1) == '='
275 				? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
276 			break;
277 		case '!':
278 			if (*(string + 1) != '=')
279 				return;
280 			fOp = OP_UNEQUAL;
281 			break;
282 
283 		// any invalid characters will be rejected
284 		default:
285 			*_expression = string;
286 			return;
287 	}
288 
289 	// lets change "start" to point to the first character after the symbol
290 	if (*(string + 1) == '=')
291 		string++;
292 	string++;
293 	skipWhitespace(&string);
294 
295 	// allocate & copy the attribute string
296 
297 	fAttribute = _CopyString(start, end);
298 	if (fAttribute == NULL)
299 		return;
300 
301 	start = string;
302 	if (*start == '"' || *start == '\'') {
303 		// string is quoted (start has to be on the beginning of a string)
304 		if (_ParseQuotedString(&start, &end) < B_OK)
305 			return;
306 
307 		string = end + 2;
308 		skipWhitespace(&string);
309 	} else {
310 		while (string[0] && !_IsOperatorChar(string[0]) && string[0] != ')')
311 			string++;
312 
313 		end = string - 1;
314 		skipWhitespaceReverse(&end, start);
315 	}
316 
317 	// At this point, "start" will point to the first character of the value,
318 	// "end" will point to its last character, and "start" to the first non-
319 	// whitespace character after the value string.
320 
321 	fString = _CopyString(start, end);
322 	if (fString == NULL)
323 		return;
324 
325 	// Patterns are only allowed for these operations (and strings)
326 	if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
327 		fIsPattern = isPattern(fString);
328 		if (fIsPattern && isValidPattern(fString) < B_OK) {
329 			// Only valid patterns are allowed; setting fString
330 			// to NULL will cause InitCheck() to fail
331 			free(fString);
332 			fString = NULL;
333 		}
334 	}
335 
336 	// The special time flag is set if the time values are shifted
337 	// 64-bit values to reduce the number of duplicates.
338 	// We have to be able to compare them against unshifted values
339 	// later. The only index which needs this is the last_modified
340 	// index, but we may want to open that feature for other indices,
341 	// too one day.
342 	fIsSpecialTime = !strcmp(fAttribute, "last_modified");
343 
344 	*_expression = string;
345 }
346 
347 
348 Equation::~Equation()
349 {
350 	free(fAttribute);
351 	free(fString);
352 }
353 
354 
355 status_t
356 Equation::InitCheck()
357 {
358 	if (fAttribute == NULL || fString == NULL || fOp == OP_NONE)
359 		return B_BAD_VALUE;
360 
361 	return B_OK;
362 }
363 
364 
365 /*!	Matches the inode's attribute value with the equation.
366 	Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went
367 	wrong.
368 */
369 status_t
370 Equation::Match(Inode* inode, const char* attributeName, int32 type,
371 	const uint8* key, size_t size)
372 {
373 	// get a pointer to the attribute in question
374 	NodeGetter nodeGetter(inode->GetVolume());
375 	union value value;
376 	uint8* buffer = (uint8*)&value;
377 	bool locked = false;
378 
379 	// first, check if we are matching for a live query and use that value
380 	if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
381 		if (key == NULL)
382 			return NO_MATCH;
383 
384 		buffer = const_cast<uint8*>(key);
385 	} else if (!strcmp(fAttribute, "name")) {
386 		// we need to lock before accessing Inode::Name()
387 		status_t status = nodeGetter.SetTo(inode);
388 		if (status != B_OK)
389 			return status;
390 
391 		recursive_lock_lock(&inode->SmallDataLock());
392 		locked = true;
393 
394 		// if not, check for "fake" attributes ("name", "size", "last_modified")
395 		buffer = (uint8*)inode->Name(nodeGetter.Node());
396 		if (buffer == NULL) {
397 			recursive_lock_unlock(&inode->SmallDataLock());
398 			return B_ERROR;
399 		}
400 
401 		type = B_STRING_TYPE;
402 		size = strlen((const char*)buffer);
403 	} else if (!strcmp(fAttribute, "size")) {
404 #ifdef BFS_NATIVE_ENDIAN
405 		buffer = (uint8*)&inode->Node().data.size;
406 #else
407 		value.Int64 = inode->Size();
408 #endif
409 		type = B_INT64_TYPE;
410 	} else if (!strcmp(fAttribute, "last_modified")) {
411 #ifdef BFS_NATIVE_ENDIAN
412 		buffer = (uint8*)&inode->Node().last_modified_time;
413 #else
414 		value.Int64 = inode->Node().LastModifiedTime();
415 #endif
416 		type = B_INT64_TYPE;
417 	} else {
418 		// then for attributes in the small_data section, and finally for the
419 		// real attributes
420 		status_t status = nodeGetter.SetTo(inode);
421 		if (status != B_OK)
422 			return status;
423 
424 		Inode* attribute;
425 
426 		recursive_lock_lock(&inode->SmallDataLock());
427 		small_data* smallData = inode->FindSmallData(nodeGetter.Node(),
428 			fAttribute);
429 		if (smallData != NULL) {
430 			buffer = smallData->Data();
431 			type = smallData->type;
432 			size = smallData->data_size;
433 			locked = true;
434 		} else {
435 			// needed to unlock the small_data section as fast as possible
436 			recursive_lock_unlock(&inode->SmallDataLock());
437 			nodeGetter.Unset();
438 
439 			if (inode->GetAttribute(fAttribute, &attribute) == B_OK) {
440 				buffer = (uint8*)&value;
441 				type = attribute->Type();
442 				size = attribute->Size();
443 
444 				if (size > MAX_INDEX_KEY_LENGTH)
445 					size = MAX_INDEX_KEY_LENGTH;
446 
447 				if (attribute->ReadAt(0, buffer, &size) < B_OK) {
448 					inode->ReleaseAttribute(attribute);
449 					return B_IO_ERROR;
450 				}
451 				inode->ReleaseAttribute(attribute);
452 			} else
453 				return NO_MATCH;
454 		}
455 	}
456 	// prepare own value for use, if it is possible to convert it
457 	status_t status = _ConvertValue(type);
458 	if (status == B_OK)
459 		status = _CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
460 
461 	if (locked)
462 		recursive_lock_unlock(&inode->SmallDataLock());
463 
464 	RETURN_ERROR(status);
465 }
466 
467 
468 void
469 Equation::Complement()
470 {
471 	D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
472 		FATAL(("op out of range!"));
473 		return;
474 	});
475 
476 	int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
477 			OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
478 	fOp = complementOp[fOp - OP_EQUAL];
479 }
480 
481 
482 status_t
483 Equation::PrepareQuery(Volume* /*volume*/, Index& index,
484 	TreeIterator** iterator, bool queryNonIndexed)
485 {
486 	status_t status = index.SetTo(fAttribute);
487 
488 	// if we should query attributes without an index, we can just proceed here
489 	if (status != B_OK && !queryNonIndexed)
490 		return B_ENTRY_NOT_FOUND;
491 
492 	type_code type;
493 
494 	// Special case for OP_UNEQUAL - it will always operate through the whole
495 	// index but we need the call to the original index to get the correct type
496 	if (status != B_OK || fOp == OP_UNEQUAL) {
497 		// Try to get an index that holds all files (name)
498 		// Also sets the default type for all attributes without index
499 		// to string.
500 		type = status < B_OK ? B_STRING_TYPE : index.Type();
501 
502 		if (index.SetTo("name") != B_OK)
503 			return B_ENTRY_NOT_FOUND;
504 
505 		fHasIndex = false;
506 	} else {
507 		fHasIndex = true;
508 		type = index.Type();
509 	}
510 
511 	if (_ConvertValue(type) < B_OK)
512 		return B_BAD_VALUE;
513 
514 	BPlusTree* tree = index.Node()->Tree();
515 	if (tree == NULL)
516 		return B_ERROR;
517 
518 	*iterator = new(std::nothrow) TreeIterator(tree);
519 	if (*iterator == NULL)
520 		return B_NO_MEMORY;
521 
522 	if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN
523 			|| fOp == OP_GREATER_THAN_OR_EQUAL || fIsPattern)
524 		&& fHasIndex) {
525 		// set iterator to the exact position
526 
527 		int32 keySize = index.KeySize();
528 
529 		// At this point, fIsPattern is only true if it's a string type, and fOp
530 		// is either OP_EQUAL or OP_UNEQUAL
531 		if (fIsPattern) {
532 			// let's see if we can use the beginning of the key for positioning
533 			// the iterator and adjust the key size; if not, just leave the
534 			// iterator at the start and return success
535 			keySize = getFirstPatternSymbol(fString);
536 			if (keySize <= 0)
537 				return B_OK;
538 		}
539 
540 		if (keySize == 0) {
541 			// B_STRING_TYPE doesn't have a fixed length, so it was set
542 			// to 0 before - we compute the correct value here
543 			if (fType == B_STRING_TYPE) {
544 				keySize = strlen(fValue.String);
545 
546 				// The empty string is a special case - we normally don't check
547 				// for the trailing null byte, in the case for the empty string
548 				// we do it explicitly, because there can't be keys in the
549 				// B+tree with a length of zero
550 				if (keySize == 0)
551 					keySize = 1;
552 			} else
553 				RETURN_ERROR(B_ENTRY_NOT_FOUND);
554 		}
555 
556 		if (fIsSpecialTime) {
557 			// we have to find the first matching shifted value
558 			off_t value = fValue.Int64 << INODE_TIME_SHIFT;
559 			status = (*iterator)->Find((uint8*)&value, keySize);
560 			if (status == B_ENTRY_NOT_FOUND)
561 				return B_OK;
562 		} else {
563 			status = (*iterator)->Find(_Value(), keySize);
564 			if (fOp == OP_EQUAL && !fIsPattern)
565 				return status;
566 			else if (status == B_ENTRY_NOT_FOUND
567 				&& (fIsPattern || fOp == OP_GREATER_THAN
568 					|| fOp == OP_GREATER_THAN_OR_EQUAL))
569 				return B_OK;
570 		}
571 
572 		RETURN_ERROR(status);
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 status_t
580 Equation::GetNextMatching(Volume* volume, TreeIterator* iterator,
581 	struct dirent* dirent, size_t bufferSize)
582 {
583 	while (true) {
584 		union value indexValue;
585 		uint16 keyLength;
586 		uint16 duplicate;
587 		off_t offset;
588 
589 		status_t status = iterator->GetNextEntry(&indexValue, &keyLength,
590 			(uint16)sizeof(indexValue), &offset, &duplicate);
591 		if (status != B_OK)
592 			return status;
593 
594 		// only compare against the index entry when this is the correct
595 		// index for the equation
596 		if (fHasIndex && duplicate < 2
597 			&& !_CompareTo((uint8*)&indexValue, keyLength)) {
598 			// They aren't equal? Let the operation decide what to do. Since
599 			// we always start at the beginning of the index (or the correct
600 			// position), only some needs to be stopped if the entry doesn't
601 			// fit.
602 			if (fOp == OP_LESS_THAN
603 				|| fOp == OP_LESS_THAN_OR_EQUAL
604 				|| (fOp == OP_EQUAL && !fIsPattern))
605 				return B_ENTRY_NOT_FOUND;
606 
607 			if (duplicate > 0)
608 				iterator->SkipDuplicates();
609 			continue;
610 		}
611 
612 		Vnode vnode(volume, offset);
613 		Inode* inode;
614 		if ((status = vnode.Get(&inode)) != B_OK) {
615 			REPORT_ERROR(status);
616 			FATAL(("could not get inode %" B_PRIdOFF " in index \"%s\"!\n",
617 				offset, fAttribute));
618 			// try with next
619 			continue;
620 		}
621 
622 		// TODO: check user permissions here - but which one?!
623 		// we could filter out all those where we don't have
624 		// read access... (we should check for every parent
625 		// directory if the X_OK is allowed)
626 		// Although it's quite expensive to open all parents,
627 		// it's likely that the application that runs the
628 		// query will do something similar (and we don't have
629 		// to do it for root, either).
630 
631 		// go up in the tree until a &&-operator is found, and check if the
632 		// inode matches with the rest of the expression - we don't have to
633 		// check ||-operators for that
634 		Term* term = this;
635 		status = MATCH_OK;
636 
637 		if (!fHasIndex)
638 			status = Match(inode);
639 
640 		while (term != NULL && status == MATCH_OK) {
641 			Operator* parent = (Operator*)term->Parent();
642 			if (parent == NULL)
643 				break;
644 
645 			if (parent->Op() == OP_AND) {
646 				// choose the other child of the parent
647 				Term* other = parent->Right();
648 				if (other == term)
649 					other = parent->Left();
650 
651 				if (other == NULL) {
652 					FATAL(("&&-operator has only one child... (parent = %p)\n",
653 						parent));
654 					break;
655 				}
656 				status = other->Match(inode);
657 				if (status < 0) {
658 					REPORT_ERROR(status);
659 					status = NO_MATCH;
660 				}
661 			}
662 			term = (Term*)parent;
663 		}
664 
665 		if (status == MATCH_OK) {
666 			dirent->d_dev = volume->ID();
667 			dirent->d_ino = offset;
668 			dirent->d_pdev = volume->ID();
669 			dirent->d_pino = volume->ToVnode(inode->Parent());
670 			dirent->d_reclen = offsetof(struct dirent, d_name);
671 
672 			if (inode->GetName(dirent->d_name) < B_OK) {
673 				FATAL(("inode %" B_PRIdOFF " in query has no name!\n",
674 					inode->BlockNumber()));
675 			} else {
676 				dirent->d_reclen += strlen(dirent->d_name) + 1;
677 			}
678 		}
679 
680 		if (status == MATCH_OK)
681 			return B_OK;
682 	}
683 	RETURN_ERROR(B_ERROR);
684 }
685 
686 
687 void
688 Equation::CalculateScore(Index &index)
689 {
690 	// As always, these values could be tuned and refined.
691 	// And the code could also need some real world testing :-)
692 
693 	// do we have to operate on a "foreign" index?
694 	if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
695 		fScore = 0;
696 		return;
697 	}
698 
699 	// if we have a pattern, how much does it help our search?
700 	if (fIsPattern)
701 		fScore = getFirstPatternSymbol(fString) << 3;
702 	else {
703 		// Score by operator
704 		if (fOp == OP_EQUAL)
705 			// higher than pattern="255 chars+*"
706 			fScore = 2048;
707 		else
708 			// the pattern search is regarded cheaper when you have at
709 			// least one character to set your index to
710 			fScore = 5;
711 	}
712 
713 	// take index size into account (1024 is the current node size
714 	// in our B+trees)
715 	// 2048 * 2048 == 4194304 is the maximum score (for an empty
716 	// tree, since the header + 1 node are already 2048 bytes)
717 	fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
718 }
719 
720 
721 status_t
722 Equation::_ParseQuotedString(char** _start, char** _end)
723 {
724 	char* start = *_start;
725 	char quote = *start++;
726 	char* end = start;
727 
728 	for (; *end && *end != quote; end++) {
729 		if (*end == '\\')
730 			end++;
731 	}
732 	if (*end == '\0')
733 		return B_BAD_VALUE;
734 
735 	*_start = start;
736 	*_end = end - 1;
737 
738 	return B_OK;
739 }
740 
741 
742 char*
743 Equation::_CopyString(char* start, char* end)
744 {
745 	// end points to the last character of the string - and the length
746 	// also has to include the null-termination
747 	int32 length = end + 2 - start;
748 	// just to make sure; since that's the max. attribute name length and
749 	// the max. string in an index, it make sense to have it that way
750 	if (length > MAX_INDEX_KEY_LENGTH + 1 || length <= 0)
751 		return NULL;
752 
753 	char* copy = (char*)malloc(length);
754 	if (copy == NULL)
755 		return NULL;
756 
757 	// Filter out remaining escaping slashes
758 	for (int32 i = 0; i < length; i++) {
759 		char c = start++[0];
760 		if (c == '\\' && i < length) {
761 			length--;
762 			i--;
763 			continue;
764 		}
765 		copy[i] = c;
766 	}
767 	copy[length - 1] = '\0';
768 
769 	return copy;
770 }
771 
772 
773 bool
774 Equation::_IsEquationChar(char c) const
775 {
776 	return c == '=' || c == '<' || c == '>' || c == '!';
777 }
778 
779 
780 bool
781 Equation::_IsOperatorChar(char c) const
782 {
783 	return c == '&' || c == '|';
784 }
785 
786 
787 status_t
788 Equation::_ConvertValue(type_code type)
789 {
790 	// Has the type already been converted?
791 	if (type == fType)
792 		return B_OK;
793 
794 	char* string = fString;
795 
796 	switch (type) {
797 		case B_MIME_STRING_TYPE:
798 			type = B_STRING_TYPE;
799 			// supposed to fall through
800 		case B_STRING_TYPE:
801 			strncpy(fValue.String, string, MAX_INDEX_KEY_LENGTH + 1);
802 			fValue.String[MAX_INDEX_KEY_LENGTH] = '\0';
803 			fSize = strlen(fValue.String);
804 			break;
805 		case B_TIME_TYPE:
806 			type = B_INT32_TYPE;
807 			// supposed to fall through
808 		case B_INT32_TYPE:
809 			fValue.Int32 = strtol(string, &string, 0);
810 			fSize = sizeof(int32);
811 			break;
812 		case B_UINT32_TYPE:
813 			fValue.Int32 = strtoul(string, &string, 0);
814 			fSize = sizeof(uint32);
815 			break;
816 		case B_INT64_TYPE:
817 			fValue.Int64 = strtoll(string, &string, 0);
818 			fSize = sizeof(int64);
819 			break;
820 		case B_UINT64_TYPE:
821 			fValue.Uint64 = strtoull(string, &string, 0);
822 			fSize = sizeof(uint64);
823 			break;
824 		case B_FLOAT_TYPE:
825 			fValue.Float = strtod(string, &string);
826 			fSize = sizeof(float);
827 			break;
828 		case B_DOUBLE_TYPE:
829 			fValue.Double = strtod(string, &string);
830 			fSize = sizeof(double);
831 			break;
832 		default:
833 			FATAL(("query value conversion to 0x%x requested!\n", (int)type));
834 			// should we fail here or just do a safety int32 conversion?
835 			return B_ERROR;
836 	}
837 
838 	fType = type;
839 
840 	// patterns are only allowed for string types
841 	if (fType != B_STRING_TYPE && fIsPattern)
842 		fIsPattern = false;
843 
844 	return B_OK;
845 }
846 
847 
848 /*!	Returns true when the key matches the equation. You have to
849 	call ConvertValue() before this one.
850 */
851 bool
852 Equation::_CompareTo(const uint8* value, uint16 size)
853 {
854 	int32 compare;
855 
856 	// fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or
857 	// OP_UNEQUAL
858 	if (fIsPattern) {
859 		// we have already validated the pattern, so we don't check for failing
860 		// here - if something is broken, and matchString() returns an error,
861 		// we just don't match
862 		compare = matchString(fValue.String, (char*)value) == MATCH_OK ? 0 : 1;
863 	} else if (fIsSpecialTime) {
864 		// the index is a shifted int64 index, but we have to match
865 		// against an unshifted value (i.e. the last_modified index)
866 		int64 timeValue = *(int64*)value >> INODE_TIME_SHIFT;
867 		compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64,
868 			sizeof(int64));
869 	} else
870 		compare = compareKeys(fType, value, size, _Value(), fSize);
871 
872 	switch (fOp) {
873 		case OP_EQUAL:
874 			return compare == 0;
875 		case OP_UNEQUAL:
876 			return compare != 0;
877 		case OP_LESS_THAN:
878 			return compare < 0;
879 		case OP_LESS_THAN_OR_EQUAL:
880 			return compare <= 0;
881 		case OP_GREATER_THAN:
882 			return compare > 0;
883 		case OP_GREATER_THAN_OR_EQUAL:
884 			return compare >= 0;
885 	}
886 	FATAL(("Unknown/Unsupported operation: %d\n", fOp));
887 	return false;
888 }
889 
890 
891 //	#pragma mark -
892 
893 
894 Operator::Operator(Term* left, int8 op, Term* right)
895 	:
896 	Term(op),
897 	fLeft(left),
898 	fRight(right)
899 {
900 	if (left)
901 		left->SetParent(this);
902 	if (right)
903 		right->SetParent(this);
904 }
905 
906 
907 Operator::~Operator()
908 {
909 	delete fLeft;
910 	delete fRight;
911 }
912 
913 
914 status_t
915 Operator::Match(Inode* inode, const char* attribute, int32 type,
916 	const uint8* key, size_t size)
917 {
918 	if (fOp == OP_AND) {
919 		status_t status = fLeft->Match(inode, attribute, type, key, size);
920 		if (status != MATCH_OK)
921 			return status;
922 
923 		return fRight->Match(inode, attribute, type, key, size);
924 	} else {
925 		// choose the term with the better score for OP_OR
926 		Term* first;
927 		Term* second;
928 		if (fRight->Score() > fLeft->Score()) {
929 			first = fLeft;
930 			second = fRight;
931 		} else {
932 			first = fRight;
933 			second = fLeft;
934 		}
935 
936 		status_t status = first->Match(inode, attribute, type, key, size);
937 		if (status != NO_MATCH)
938 			return status;
939 
940 		return second->Match(inode, attribute, type, key, size);
941 	}
942 }
943 
944 
945 void
946 Operator::Complement()
947 {
948 	if (fOp == OP_AND)
949 		fOp = OP_OR;
950 	else
951 		fOp = OP_AND;
952 
953 	fLeft->Complement();
954 	fRight->Complement();
955 }
956 
957 
958 void
959 Operator::CalculateScore(Index &index)
960 {
961 	fLeft->CalculateScore(index);
962 	fRight->CalculateScore(index);
963 }
964 
965 
966 int32
967 Operator::Score() const
968 {
969 	if (fOp == OP_AND) {
970 		// return the one with the better score
971 		if (fRight->Score() > fLeft->Score())
972 			return fRight->Score();
973 
974 		return fLeft->Score();
975 	}
976 
977 	// for OP_OR, be honest, and return the one with the worse score
978 	if (fRight->Score() < fLeft->Score())
979 		return fRight->Score();
980 
981 	return fLeft->Score();
982 }
983 
984 
985 status_t
986 Operator::InitCheck()
987 {
988 	if ((fOp != OP_AND && fOp != OP_OR)
989 		|| fLeft == NULL || fLeft->InitCheck() < B_OK
990 		|| fRight == NULL || fRight->InitCheck() < B_OK)
991 		return B_ERROR;
992 
993 	return B_OK;
994 }
995 
996 
997 #if 0
998 Term*
999 Operator::Copy() const
1000 {
1001 	if (fEquation != NULL) {
1002 		Equation* equation = new(std::nothrow) Equation(*fEquation);
1003 		if (equation == NULL)
1004 			return NULL;
1005 
1006 		Term* term = new(std::nothrow) Term(equation);
1007 		if (term == NULL)
1008 			delete equation;
1009 
1010 		return term;
1011 	}
1012 
1013 	Term* left = NULL;
1014 	Term* right = NULL;
1015 
1016 	if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1017 		return NULL;
1018 	if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1019 		delete left;
1020 		return NULL;
1021 	}
1022 
1023 	Term* term = new(std::nothrow) Term(left, fOp, right);
1024 	if (term == NULL) {
1025 		delete left;
1026 		delete right;
1027 		return NULL;
1028 	}
1029 	return term;
1030 }
1031 #endif
1032 
1033 
1034 //	#pragma mark -
1035 
1036 #ifdef DEBUG
1037 
1038 void
1039 Operator::PrintToStream()
1040 {
1041 	__out("( ");
1042 	if (fLeft != NULL)
1043 		fLeft->PrintToStream();
1044 
1045 	const char* op;
1046 	switch (fOp) {
1047 		case OP_OR: op = "OR"; break;
1048 		case OP_AND: op = "AND"; break;
1049 		default: op = "?"; break;
1050 	}
1051 	__out(" %s ", op);
1052 
1053 	if (fRight != NULL)
1054 		fRight->PrintToStream();
1055 
1056 	__out(" )");
1057 }
1058 
1059 
1060 void
1061 Equation::PrintToStream()
1062 {
1063 	const char* symbol = "???";
1064 	switch (fOp) {
1065 		case OP_EQUAL: symbol = "=="; break;
1066 		case OP_UNEQUAL: symbol = "!="; break;
1067 		case OP_GREATER_THAN: symbol = ">"; break;
1068 		case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
1069 		case OP_LESS_THAN: symbol = "<"; break;
1070 		case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
1071 	}
1072 	__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString);
1073 }
1074 
1075 #endif	// DEBUG
1076 
1077 //	#pragma mark -
1078 
1079 
1080 Expression::Expression(char* expr)
1081 {
1082 	if (expr == NULL)
1083 		return;
1084 
1085 	fTerm = ParseOr(&expr);
1086 	if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1087 		FATAL(("Corrupt tree in expression!\n"));
1088 		delete fTerm;
1089 		fTerm = NULL;
1090 	}
1091 	D(if (fTerm != NULL) {
1092 		fTerm->PrintToStream();
1093 		D(__out("\n"));
1094 		if (*expr != '\0')
1095 			PRINT(("Unexpected end of string: \"%s\"!\n", expr));
1096 	});
1097 	fPosition = expr;
1098 }
1099 
1100 
1101 Expression::~Expression()
1102 {
1103 	delete fTerm;
1104 }
1105 
1106 
1107 Term*
1108 Expression::ParseEquation(char** expr)
1109 {
1110 	skipWhitespace(expr);
1111 
1112 	bool _not = false;
1113 	if (**expr == '!') {
1114 		skipWhitespace(expr, 1);
1115 		if (**expr != '(')
1116 			return NULL;
1117 
1118 		_not = true;
1119 	}
1120 
1121 	if (**expr == ')') {
1122 		// shouldn't be handled here
1123 		return NULL;
1124 	} else if (**expr == '(') {
1125 		skipWhitespace(expr, 1);
1126 
1127 		Term* term = ParseOr(expr);
1128 
1129 		skipWhitespace(expr);
1130 
1131 		if (**expr != ')') {
1132 			delete term;
1133 			return NULL;
1134 		}
1135 
1136 		// If the term is negated, we just complement the tree, to get
1137 		// rid of the not, a.k.a. DeMorgan's Law.
1138 		if (_not)
1139 			term->Complement();
1140 
1141 		skipWhitespace(expr, 1);
1142 
1143 		return term;
1144 	}
1145 
1146 	Equation* equation = new(std::nothrow) Equation(expr);
1147 	if (equation == NULL || equation->InitCheck() < B_OK) {
1148 		delete equation;
1149 		return NULL;
1150 	}
1151 	return equation;
1152 }
1153 
1154 
1155 Term*
1156 Expression::ParseAnd(char** expr)
1157 {
1158 	Term* left = ParseEquation(expr);
1159 	if (left == NULL)
1160 		return NULL;
1161 
1162 	while (IsOperator(expr, '&')) {
1163 		Term* right = ParseAnd(expr);
1164 		Term* newParent = NULL;
1165 
1166 		if (right == NULL || (newParent = new(std::nothrow) Operator(left,
1167 				OP_AND, right)) == NULL) {
1168 			delete left;
1169 			delete right;
1170 
1171 			return NULL;
1172 		}
1173 		left = newParent;
1174 	}
1175 
1176 	return left;
1177 }
1178 
1179 
1180 Term*
1181 Expression::ParseOr(char** expr)
1182 {
1183 	Term* left = ParseAnd(expr);
1184 	if (left == NULL)
1185 		return NULL;
1186 
1187 	while (IsOperator(expr, '|')) {
1188 		Term* right = ParseAnd(expr);
1189 		Term* newParent = NULL;
1190 
1191 		if (right == NULL || (newParent = new(std::nothrow) Operator(left,
1192 				OP_OR, right)) == NULL) {
1193 			delete left;
1194 			delete right;
1195 
1196 			return NULL;
1197 		}
1198 		left = newParent;
1199 	}
1200 
1201 	return left;
1202 }
1203 
1204 
1205 bool
1206 Expression::IsOperator(char** expr, char op)
1207 {
1208 	char* string = *expr;
1209 
1210 	if (*string == op && *(string + 1) == op) {
1211 		*expr += 2;
1212 		return true;
1213 	}
1214 	return false;
1215 }
1216 
1217 
1218 status_t
1219 Expression::InitCheck()
1220 {
1221 	if (fTerm == NULL)
1222 		return B_BAD_VALUE;
1223 
1224 	return B_OK;
1225 }
1226 
1227 
1228 //	#pragma mark -
1229 
1230 
1231 Query::Query(Volume* volume, Expression* expression, uint32 flags)
1232 	:
1233 	fVolume(volume),
1234 	fExpression(expression),
1235 	fCurrent(NULL),
1236 	fIterator(NULL),
1237 	fIndex(volume),
1238 	fFlags(flags),
1239 	fPort(-1)
1240 {
1241 	// If the expression has a valid root pointer, the whole tree has
1242 	// already passed the sanity check, so that we don't have to check
1243 	// every pointer
1244 	if (volume == NULL || expression == NULL || expression->Root() == NULL)
1245 		return;
1246 
1247 	// create index on the stack and delete it afterwards
1248 	fExpression->Root()->CalculateScore(fIndex);
1249 	fIndex.Unset();
1250 
1251 	Rewind();
1252 
1253 	if ((fFlags & B_LIVE_QUERY) != 0)
1254 		volume->AddQuery(this);
1255 }
1256 
1257 
1258 Query::~Query()
1259 {
1260 	if ((fFlags & B_LIVE_QUERY) != 0)
1261 		fVolume->RemoveQuery(this);
1262 }
1263 
1264 
1265 status_t
1266 Query::Rewind()
1267 {
1268 	// free previous stuff
1269 
1270 	fStack.MakeEmpty();
1271 
1272 	delete fIterator;
1273 	fIterator = NULL;
1274 	fCurrent = NULL;
1275 
1276 	// put the whole expression on the stack
1277 
1278 	Stack<Term*> stack;
1279 	stack.Push(fExpression->Root());
1280 
1281 	Term* term;
1282 	while (stack.Pop(&term)) {
1283 		if (term->Op() < OP_EQUATION) {
1284 			Operator* op = (Operator*)term;
1285 
1286 			if (op->Op() == OP_OR) {
1287 				stack.Push(op->Left());
1288 				stack.Push(op->Right());
1289 			} else {
1290 				// For OP_AND, we can use the scoring system to decide which
1291 				// path to add
1292 				if (op->Right()->Score() > op->Left()->Score())
1293 					stack.Push(op->Right());
1294 				else
1295 					stack.Push(op->Left());
1296 			}
1297 		} else if (term->Op() == OP_EQUATION
1298 			|| fStack.Push((Equation*)term) != B_OK)
1299 			FATAL(("Unknown term on stack or stack error"));
1300 	}
1301 
1302 	return B_OK;
1303 }
1304 
1305 
1306 status_t
1307 Query::GetNextEntry(struct dirent* dirent, size_t size)
1308 {
1309 	// If we don't have an equation to use yet/anymore, get a new one
1310 	// from the stack
1311 	while (true) {
1312 		if (fIterator == NULL) {
1313 			if (!fStack.Pop(&fCurrent)
1314 				|| fCurrent == NULL)
1315 				return B_ENTRY_NOT_FOUND;
1316 
1317 			status_t status = fCurrent->PrepareQuery(fVolume, fIndex,
1318 				&fIterator, fFlags & B_QUERY_NON_INDEXED);
1319 			if (status == B_ENTRY_NOT_FOUND) {
1320 				// try next equation
1321 				continue;
1322 			}
1323 
1324 			if (status != B_OK)
1325 				return status;
1326 		}
1327 		if (fCurrent == NULL)
1328 			RETURN_ERROR(B_ERROR);
1329 
1330 		status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent,
1331 			size);
1332 		if (status != B_OK) {
1333 			delete fIterator;
1334 			fIterator = NULL;
1335 			fCurrent = NULL;
1336 		} else {
1337 			// only return if we have another entry
1338 			return B_OK;
1339 		}
1340 	}
1341 }
1342 
1343 
1344 void
1345 Query::SetLiveMode(port_id port, int32 token)
1346 {
1347 	fPort = port;
1348 	fToken = token;
1349 
1350 	if ((fFlags & B_LIVE_QUERY) == 0) {
1351 		// you can decide at any point to set the live query mode,
1352 		// only live queries have to be updated by attribute changes
1353 		fFlags |= B_LIVE_QUERY;
1354 		fVolume->AddQuery(this);
1355 	}
1356 }
1357 
1358 
1359 void
1360 Query::LiveUpdate(Inode* inode, const char* attribute, int32 type,
1361 	const uint8* oldKey, size_t oldLength, const uint8* newKey,
1362 	size_t newLength)
1363 {
1364 	if (fPort < 0 || fExpression == NULL || attribute == NULL)
1365 		return;
1366 
1367 	// TODO: check if the attribute is part of the query at all...
1368 
1369 	status_t oldStatus = fExpression->Root()->Match(inode, attribute, type,
1370 		oldKey, oldLength);
1371 	status_t newStatus = fExpression->Root()->Match(inode, attribute, type,
1372 		newKey, newLength);
1373 
1374 	bool entryCreated = false;
1375 	bool stillInQuery = false;
1376 
1377 	if (oldStatus != MATCH_OK) {
1378 		if (newStatus != MATCH_OK) {
1379 			// nothing has changed
1380 			return;
1381 		}
1382 		entryCreated = true;
1383 	} else if (newStatus != MATCH_OK) {
1384 		// entry got removed
1385 		entryCreated = false;
1386 	} else if ((fFlags & B_ATTR_CHANGE_NOTIFICATION) != 0) {
1387 		// The entry stays in the query
1388 		stillInQuery = true;
1389 	} else
1390 		return;
1391 
1392 	// we may need to get the name of the inode
1393 
1394 	char nameBuffer[B_FILE_NAME_LENGTH];
1395 	const char* name;
1396 
1397 	if (strcmp(attribute, "name")) {
1398 		if (inode->GetName(nameBuffer) != B_OK)
1399 			nameBuffer[0] = '\0';
1400 		name = nameBuffer;
1401 	} else {
1402 		// a shortcut to prevent having to scan the attribute section
1403 		name = (const char*)newKey;
1404 	}
1405 
1406 	// notify query listeners
1407 
1408 	if (stillInQuery) {
1409 		notify_query_attr_changed(fPort, fToken, fVolume->ID(),
1410 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1411 	} else if (entryCreated) {
1412 		notify_query_entry_created(fPort, fToken, fVolume->ID(),
1413 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1414 	} else {
1415 		notify_query_entry_removed(fPort, fToken, fVolume->ID(),
1416 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1417 	}
1418 }
1419 
1420 
1421 void
1422 Query::LiveUpdateRenameMove(Inode* inode, ino_t oldDirectoryID,
1423 	const char* oldName, size_t oldLength, ino_t newDirectoryID,
1424 	const char* newName, size_t newLength)
1425 {
1426 	if (fPort < 0 || fExpression == NULL)
1427 		return;
1428 
1429 	// TODO: check if the attribute is part of the query at all...
1430 
1431 	status_t oldStatus = fExpression->Root()->Match(inode, "name",
1432 		B_STRING_TYPE, (const uint8*)oldName, oldLength);
1433 	status_t newStatus = fExpression->Root()->Match(inode, "name",
1434 		B_STRING_TYPE, (const uint8*)newName, newLength);
1435 
1436 	if (oldStatus != MATCH_OK || oldStatus != newStatus)
1437 		return;
1438 
1439 	// The entry stays in the query, notify query listeners about the rename
1440 	// or move
1441 
1442 	notify_query_entry_removed(fPort, fToken, fVolume->ID(),
1443 		oldDirectoryID, oldName, inode->ID());
1444 
1445 	notify_query_entry_created(fPort, fToken, fVolume->ID(),
1446 		newDirectoryID, newName, inode->ID());
1447 }
1448