xref: /haiku/src/add-ons/kernel/file_systems/bfs/Query.cpp (revision 5e7964b0a929555415798dea3373db9ac4611caa)
1 /*
2  * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de.
3  * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
4  * This file may be used under the terms of the MIT License.
5  */
6 
7 
8 /*!	Query parsing and evaluation
9 
10 	The pattern matching is roughly based on code originally written
11 	by J. Kercheval, and on code written by Kenneth Almquist, though
12 	it shares no code.
13 */
14 
15 
16 #include "Query.h"
17 
18 #include <file_systems/QueryParserUtils.h>
19 #include <query_private.h>
20 
21 #include "BPlusTree.h"
22 #include "bfs.h"
23 #include "Debug.h"
24 #include "Index.h"
25 #include "Inode.h"
26 #include "Volume.h"
27 
28 
29 // The parser has a very static design, but it will do what is required.
30 //
31 // ParseOr(), ParseAnd(), ParseEquation() are guarantying the operator
32 // precedence, that is =,!=,>,<,>=,<= .. && .. ||.
33 // Apparently, the "!" (not) can only be used with brackets.
34 //
35 // If you think that there are too few NULL pointer checks in some places
36 // of the code, just read the beginning of the query constructor.
37 // The API is not fully available, just the Query and the Expression class
38 // are.
39 
40 
41 using namespace QueryParser;
42 
43 
44 enum ops {
45 	OP_NONE,
46 
47 	OP_AND,
48 	OP_OR,
49 
50 	OP_EQUATION,
51 		// is only used for invalid equations
52 
53 	OP_EQUAL,
54 	OP_UNEQUAL,
55 	OP_GREATER_THAN,
56 	OP_LESS_THAN,
57 	OP_GREATER_THAN_OR_EQUAL,
58 	OP_LESS_THAN_OR_EQUAL,
59 };
60 
61 
62 union value {
63 	int64	Int64;
64 	uint64	Uint64;
65 	int32	Int32;
66 	uint32	Uint32;
67 	float	Float;
68 	double	Double;
69 	char	String[MAX_INDEX_KEY_LENGTH + 1];
70 };
71 
72 
73 /*!	Abstract base class for the operator/equation classes.
74 */
75 class Term {
76 public:
77 								Term(int8 op) : fOp(op), fParent(NULL) {}
78 	virtual						~Term() {}
79 
80 			int8				Op() const { return fOp; }
81 
82 			void				SetParent(Term* parent) { fParent = parent; }
83 			Term*				Parent() const { return fParent; }
84 
85 	virtual	status_t			Match(Inode* inode,
86 									const char* attribute = NULL,
87 									int32 type = 0, const uint8* key = NULL,
88 									size_t size = 0) = 0;
89 	virtual	void				Complement() = 0;
90 
91 	virtual	void				CalculateScore(Index& index) = 0;
92 	virtual	int32				Score() const = 0;
93 
94 	virtual	status_t			InitCheck() = 0;
95 
96 #ifdef DEBUG
97 	virtual	void				PrintToStream() = 0;
98 #endif
99 
100 protected:
101 			int8				fOp;
102 			Term*				fParent;
103 };
104 
105 
106 /*!	An Equation object represents an "attribute-equation operator-value" pair.
107 
108 	Although an Equation object is quite independent from the volume on which
109 	the query is run, there are some dependencies that are produced while
110 	querying:
111 	The type/size of the value, the score, and if it has an index or not.
112 	So you could run more than one query on the same volume, but it might return
113 	wrong values when it runs concurrently on another volume.
114 	That's not an issue right now, because we run single-threaded and don't use
115 	queries more than once.
116 */
117 class Equation : public Term {
118 public:
119 								Equation(char** _expression);
120 	virtual						~Equation();
121 
122 	virtual	status_t			InitCheck();
123 
124 	virtual	status_t			Match(Inode* inode,
125 									const char* attribute = NULL,
126 									int32 type = 0, const uint8* key = NULL,
127 									size_t size = 0);
128 	virtual void				Complement();
129 
130 			status_t			PrepareQuery(Volume* volume, Index& index,
131 									TreeIterator** iterator,
132 									bool queryNonIndexed);
133 			status_t			GetNextMatching(Volume* volume,
134 									TreeIterator* iterator,
135 									struct dirent* dirent, size_t bufferSize);
136 
137 	virtual	void				CalculateScore(Index &index);
138 	virtual	int32				Score() const { return fScore; }
139 
140 #ifdef DEBUG
141 	virtual	void				PrintToStream();
142 #endif
143 
144 private:
145 								Equation(const Equation& other);
146 								Equation& operator=(const Equation& other);
147 									// no implementation
148 
149 			status_t			_ParseQuotedString(char** _start, char** _end);
150 			char*				_CopyString(char* start, char* end);
151 	inline	bool				_IsEquationChar(char c) const;
152 	inline	bool				_IsOperatorChar(char c) const;
153 			status_t			_ConvertValue(type_code type);
154 			bool				_CompareTo(const uint8* value, uint16 size);
155 			uint8*				_Value() const { return (uint8*)&fValue; }
156 
157 private:
158 			char*				fAttribute;
159 			char*				fString;
160 			union value			fValue;
161 			type_code			fType;
162 			size_t				fSize;
163 			bool				fIsPattern;
164 			bool				fIsSpecialTime;
165 
166 			int32				fScore;
167 			bool				fHasIndex;
168 };
169 
170 
171 /*!	The Operator class does not represent a generic operator, but only those
172 	that combine two equations, namely "or", and "and".
173 */
174 class Operator : public Term {
175 public:
176 								Operator(Term* left, int8 op, Term* right);
177 	virtual						~Operator();
178 
179 			Term*				Left() const { return fLeft; }
180 			Term*				Right() const { return fRight; }
181 
182 	virtual	status_t			Match(Inode* inode,
183 									const char* attribute = NULL,
184 									int32 type = 0, const uint8* key = NULL,
185 									size_t size = 0);
186 	virtual	void				Complement();
187 
188 	virtual	void				CalculateScore(Index& index);
189 	virtual	int32				Score() const;
190 
191 	virtual	status_t			InitCheck();
192 
193 #ifdef DEBUG
194 	virtual	void				PrintToStream();
195 #endif
196 
197 private:
198 								Operator(const Operator& other);
199 								Operator& operator=(const Operator& other);
200 									// no implementation
201 
202 private:
203 			Term*				fLeft;
204 			Term*				fRight;
205 };
206 
207 
208 //	#pragma mark -
209 
210 
211 Equation::Equation(char** _expression)
212 	:
213 	Term(OP_EQUATION),
214 	fAttribute(NULL),
215 	fString(NULL),
216 	fType(0),
217 	fIsPattern(false)
218 {
219 	char* string = *_expression;
220 	char* start = string;
221 	char* end = NULL;
222 
223 	// Since the equation is the integral part of any query, we're just parsing
224 	// the whole thing here.
225 	// The whitespace at the start is already removed in
226 	// Expression::ParseEquation()
227 
228 	if (*start == '"' || *start == '\'') {
229 		// string is quoted (start has to be on the beginning of a string)
230 		if (_ParseQuotedString(&start, &end) < B_OK)
231 			return;
232 
233 		// set string to a valid start of the equation symbol
234 		string = end + 2;
235 		skipWhitespace(&string);
236 		if (!_IsEquationChar(string[0])) {
237 			*_expression = string;
238 			return;
239 		}
240 	} else {
241 		// search the (in)equation for the actual equation symbol (and for other operators
242 		// in case the equation is malformed)
243 		while (string[0] != 0 && !_IsOperatorChar(string[0])
244 			&& !_IsEquationChar(string[0])) {
245 			if (string[0] == '\\' && string[1] != 0)
246 				string++;
247 			string++;
248 		}
249 
250 		// get the attribute string	(and trim whitespace), in case
251 		// the string was not quoted
252 		end = string - 1;
253 		skipWhitespaceReverse(&end, start);
254 	}
255 
256 	// attribute string is empty (which is not allowed)
257 	if (start > end)
258 		return;
259 
260 	// At this point, "start" points to the beginning of the string, "end"
261 	// points to the last character of the string, and "string" points to the
262 	// first character of the equation symbol
263 
264 	// test for the right symbol (as this doesn't need any memory)
265 	switch (*string) {
266 		case '=':
267 			fOp = OP_EQUAL;
268 			break;
269 		case '>':
270 			fOp = *(string + 1) == '='
271 				? OP_GREATER_THAN_OR_EQUAL : OP_GREATER_THAN;
272 			break;
273 		case '<':
274 			fOp = *(string + 1) == '='
275 				? OP_LESS_THAN_OR_EQUAL : OP_LESS_THAN;
276 			break;
277 		case '!':
278 			if (*(string + 1) != '=')
279 				return;
280 			fOp = OP_UNEQUAL;
281 			break;
282 
283 		// any invalid characters will be rejected
284 		default:
285 			*_expression = string;
286 			return;
287 	}
288 
289 	// lets change "start" to point to the first character after the symbol
290 	if (*(string + 1) == '=')
291 		string++;
292 	string++;
293 	skipWhitespace(&string);
294 
295 	// allocate & copy the attribute string
296 
297 	fAttribute = _CopyString(start, end);
298 	if (fAttribute == NULL)
299 		return;
300 
301 	start = string;
302 	if (*start == '"' || *start == '\'') {
303 		// string is quoted (start has to be on the beginning of a string)
304 		if (_ParseQuotedString(&start, &end) < B_OK)
305 			return;
306 
307 		string = end + 2;
308 		skipWhitespace(&string);
309 	} else {
310 		while (string[0] && !_IsOperatorChar(string[0]) && string[0] != ')')
311 			string++;
312 
313 		end = string - 1;
314 		skipWhitespaceReverse(&end, start);
315 	}
316 
317 	// At this point, "start" will point to the first character of the value,
318 	// "end" will point to its last character, and "start" to the first non-
319 	// whitespace character after the value string.
320 
321 	fString = _CopyString(start, end);
322 	if (fString == NULL)
323 		return;
324 
325 	// Patterns are only allowed for these operations (and strings)
326 	if (fOp == OP_EQUAL || fOp == OP_UNEQUAL) {
327 		fIsPattern = isPattern(fString);
328 		if (fIsPattern && isValidPattern(fString) < B_OK) {
329 			// Only valid patterns are allowed; setting fString
330 			// to NULL will cause InitCheck() to fail
331 			free(fString);
332 			fString = NULL;
333 		}
334 	}
335 
336 	// The special time flag is set if the time values are shifted
337 	// 64-bit values to reduce the number of duplicates.
338 	// We have to be able to compare them against unshifted values
339 	// later. The only index which needs this is the last_modified
340 	// index, but we may want to open that feature for other indices,
341 	// too one day.
342 	fIsSpecialTime = !strcmp(fAttribute, "last_modified");
343 
344 	*_expression = string;
345 }
346 
347 
348 Equation::~Equation()
349 {
350 	free(fAttribute);
351 	free(fString);
352 }
353 
354 
355 status_t
356 Equation::InitCheck()
357 {
358 	if (fAttribute == NULL || fString == NULL || fOp == OP_NONE)
359 		return B_BAD_VALUE;
360 
361 	return B_OK;
362 }
363 
364 
365 /*!	Matches the inode's attribute value with the equation.
366 	Returns MATCH_OK if it matches, NO_MATCH if not, < 0 if something went
367 	wrong.
368 */
369 status_t
370 Equation::Match(Inode* inode, const char* attributeName, int32 type,
371 	const uint8* key, size_t size)
372 {
373 	// get a pointer to the attribute in question
374 	NodeGetter nodeGetter(inode->GetVolume());
375 	union value value;
376 	uint8* buffer = (uint8*)&value;
377 	bool locked = false;
378 
379 	// first, check if we are matching for a live query and use that value
380 	if (attributeName != NULL && !strcmp(fAttribute, attributeName)) {
381 		if (key == NULL)
382 			return NO_MATCH;
383 
384 		buffer = const_cast<uint8*>(key);
385 	} else if (!strcmp(fAttribute, "name")) {
386 		// we need to lock before accessing Inode::Name()
387 		nodeGetter.SetToNode(inode);
388 		if (nodeGetter.Node() == NULL)
389 			return B_IO_ERROR;
390 
391 		recursive_lock_lock(&inode->SmallDataLock());
392 		locked = true;
393 
394 		// if not, check for "fake" attributes ("name", "size", "last_modified")
395 		buffer = (uint8*)inode->Name(nodeGetter.Node());
396 		if (buffer == NULL) {
397 			recursive_lock_unlock(&inode->SmallDataLock());
398 			return B_ERROR;
399 		}
400 
401 		type = B_STRING_TYPE;
402 		size = strlen((const char*)buffer);
403 	} else if (!strcmp(fAttribute, "size")) {
404 #ifdef BFS_NATIVE_ENDIAN
405 		buffer = (uint8*)&inode->Node().data.size;
406 #else
407 		value.Int64 = inode->Size();
408 #endif
409 		type = B_INT64_TYPE;
410 	} else if (!strcmp(fAttribute, "last_modified")) {
411 #ifdef BFS_NATIVE_ENDIAN
412 		buffer = (uint8*)&inode->Node().last_modified_time;
413 #else
414 		value.Int64 = inode->Node().LastModifiedTime();
415 #endif
416 		type = B_INT64_TYPE;
417 	} else {
418 		// then for attributes in the small_data section, and finally for the
419 		// real attributes
420 		nodeGetter.SetToNode(inode);
421 		if (nodeGetter.Node() == NULL)
422 			return B_IO_ERROR;
423 
424 		Inode* attribute;
425 
426 		recursive_lock_lock(&inode->SmallDataLock());
427 		small_data* smallData = inode->FindSmallData(nodeGetter.Node(),
428 			fAttribute);
429 		if (smallData != NULL) {
430 			buffer = smallData->Data();
431 			type = smallData->type;
432 			size = smallData->data_size;
433 			locked = true;
434 		} else {
435 			// needed to unlock the small_data section as fast as possible
436 			recursive_lock_unlock(&inode->SmallDataLock());
437 			nodeGetter.Unset();
438 
439 			if (inode->GetAttribute(fAttribute, &attribute) == B_OK) {
440 				buffer = (uint8*)&value;
441 				type = attribute->Type();
442 				size = attribute->Size();
443 
444 				if (size > MAX_INDEX_KEY_LENGTH)
445 					size = MAX_INDEX_KEY_LENGTH;
446 
447 				if (attribute->ReadAt(0, buffer, &size) < B_OK) {
448 					inode->ReleaseAttribute(attribute);
449 					return B_IO_ERROR;
450 				}
451 				inode->ReleaseAttribute(attribute);
452 			} else
453 				return NO_MATCH;
454 		}
455 	}
456 	// prepare own value for use, if it is possible to convert it
457 	status_t status = _ConvertValue(type);
458 	if (status == B_OK)
459 		status = _CompareTo(buffer, size) ? MATCH_OK : NO_MATCH;
460 
461 	if (locked)
462 		recursive_lock_unlock(&inode->SmallDataLock());
463 
464 	RETURN_ERROR(status);
465 }
466 
467 
468 void
469 Equation::Complement()
470 {
471 	D(if (fOp <= OP_EQUATION || fOp > OP_LESS_THAN_OR_EQUAL) {
472 		FATAL(("op out of range!"));
473 		return;
474 	});
475 
476 	int8 complementOp[] = {OP_UNEQUAL, OP_EQUAL, OP_LESS_THAN_OR_EQUAL,
477 			OP_GREATER_THAN_OR_EQUAL, OP_LESS_THAN, OP_GREATER_THAN};
478 	fOp = complementOp[fOp - OP_EQUAL];
479 }
480 
481 
482 status_t
483 Equation::PrepareQuery(Volume* /*volume*/, Index& index,
484 	TreeIterator** iterator, bool queryNonIndexed)
485 {
486 	status_t status = index.SetTo(fAttribute);
487 
488 	// if we should query attributes without an index, we can just proceed here
489 	if (status != B_OK && !queryNonIndexed)
490 		return B_ENTRY_NOT_FOUND;
491 
492 	type_code type;
493 
494 	// Special case for OP_UNEQUAL - it will always operate through the whole
495 	// index but we need the call to the original index to get the correct type
496 	if (status != B_OK || fOp == OP_UNEQUAL) {
497 		// Try to get an index that holds all files (name)
498 		// Also sets the default type for all attributes without index
499 		// to string.
500 		type = status < B_OK ? B_STRING_TYPE : index.Type();
501 
502 		if (index.SetTo("name") != B_OK)
503 			return B_ENTRY_NOT_FOUND;
504 
505 		fHasIndex = false;
506 	} else {
507 		fHasIndex = true;
508 		type = index.Type();
509 	}
510 
511 	if (_ConvertValue(type) < B_OK)
512 		return B_BAD_VALUE;
513 
514 	BPlusTree* tree = index.Node()->Tree();
515 	if (tree == NULL)
516 		return B_ERROR;
517 
518 	*iterator = new(std::nothrow) TreeIterator(tree);
519 	if (*iterator == NULL)
520 		return B_NO_MEMORY;
521 
522 	if ((fOp == OP_EQUAL || fOp == OP_GREATER_THAN
523 			|| fOp == OP_GREATER_THAN_OR_EQUAL || fIsPattern)
524 		&& fHasIndex) {
525 		// set iterator to the exact position
526 
527 		int32 keySize = index.KeySize();
528 
529 		// At this point, fIsPattern is only true if it's a string type, and fOp
530 		// is either OP_EQUAL or OP_UNEQUAL
531 		if (fIsPattern) {
532 			// let's see if we can use the beginning of the key for positioning
533 			// the iterator and adjust the key size; if not, just leave the
534 			// iterator at the start and return success
535 			keySize = getFirstPatternSymbol(fString);
536 			if (keySize <= 0)
537 				return B_OK;
538 		}
539 
540 		if (keySize == 0) {
541 			// B_STRING_TYPE doesn't have a fixed length, so it was set
542 			// to 0 before - we compute the correct value here
543 			if (fType == B_STRING_TYPE) {
544 				keySize = strlen(fValue.String);
545 
546 				// The empty string is a special case - we normally don't check
547 				// for the trailing null byte, in the case for the empty string
548 				// we do it explicitly, because there can't be keys in the
549 				// B+tree with a length of zero
550 				if (keySize == 0)
551 					keySize = 1;
552 			} else
553 				RETURN_ERROR(B_ENTRY_NOT_FOUND);
554 		}
555 
556 		if (fIsSpecialTime) {
557 			// we have to find the first matching shifted value
558 			off_t value = fValue.Int64 << INODE_TIME_SHIFT;
559 			status = (*iterator)->Find((uint8*)&value, keySize);
560 			if (status == B_ENTRY_NOT_FOUND)
561 				return B_OK;
562 		} else {
563 			status = (*iterator)->Find(_Value(), keySize);
564 			if (fOp == OP_EQUAL && !fIsPattern)
565 				return status;
566 			else if (status == B_ENTRY_NOT_FOUND
567 				&& (fIsPattern || fOp == OP_GREATER_THAN
568 					|| fOp == OP_GREATER_THAN_OR_EQUAL))
569 				return B_OK;
570 		}
571 
572 		RETURN_ERROR(status);
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 status_t
580 Equation::GetNextMatching(Volume* volume, TreeIterator* iterator,
581 	struct dirent* dirent, size_t bufferSize)
582 {
583 	while (true) {
584 		union value indexValue;
585 		uint16 keyLength;
586 		uint16 duplicate;
587 		off_t offset;
588 
589 		status_t status = iterator->GetNextEntry(&indexValue, &keyLength,
590 			(uint16)sizeof(indexValue), &offset, &duplicate);
591 		if (status != B_OK)
592 			return status;
593 
594 		// only compare against the index entry when this is the correct
595 		// index for the equation
596 		if (fHasIndex && duplicate < 2
597 			&& !_CompareTo((uint8*)&indexValue, keyLength)) {
598 			// They aren't equal? Let the operation decide what to do. Since
599 			// we always start at the beginning of the index (or the correct
600 			// position), only some needs to be stopped if the entry doesn't
601 			// fit.
602 			if (fOp == OP_LESS_THAN
603 				|| fOp == OP_LESS_THAN_OR_EQUAL
604 				|| (fOp == OP_EQUAL && !fIsPattern))
605 				return B_ENTRY_NOT_FOUND;
606 
607 			if (duplicate > 0)
608 				iterator->SkipDuplicates();
609 			continue;
610 		}
611 
612 		Vnode vnode(volume, offset);
613 		Inode* inode;
614 		if ((status = vnode.Get(&inode)) != B_OK) {
615 			REPORT_ERROR(status);
616 			FATAL(("could not get inode %" B_PRIdOFF " in index \"%s\"!\n",
617 				offset, fAttribute));
618 			// try with next
619 			continue;
620 		}
621 
622 		// TODO: check user permissions here - but which one?!
623 		// we could filter out all those where we don't have
624 		// read access... (we should check for every parent
625 		// directory if the X_OK is allowed)
626 		// Although it's quite expensive to open all parents,
627 		// it's likely that the application that runs the
628 		// query will do something similar (and we don't have
629 		// to do it for root, either).
630 
631 		// go up in the tree until a &&-operator is found, and check if the
632 		// inode matches with the rest of the expression - we don't have to
633 		// check ||-operators for that
634 		Term* term = this;
635 		status = MATCH_OK;
636 
637 		if (!fHasIndex)
638 			status = Match(inode);
639 
640 		while (term != NULL && status == MATCH_OK) {
641 			Operator* parent = (Operator*)term->Parent();
642 			if (parent == NULL)
643 				break;
644 
645 			if (parent->Op() == OP_AND) {
646 				// choose the other child of the parent
647 				Term* other = parent->Right();
648 				if (other == term)
649 					other = parent->Left();
650 
651 				if (other == NULL) {
652 					FATAL(("&&-operator has only one child... (parent = %p)\n",
653 						parent));
654 					break;
655 				}
656 				status = other->Match(inode);
657 				if (status < 0) {
658 					REPORT_ERROR(status);
659 					status = NO_MATCH;
660 				}
661 			}
662 			term = (Term*)parent;
663 		}
664 
665 		if (status == MATCH_OK) {
666 			dirent->d_dev = volume->ID();
667 			dirent->d_ino = offset;
668 			dirent->d_pdev = volume->ID();
669 			dirent->d_pino = volume->ToVnode(inode->Parent());
670 
671 			if (inode->GetName(dirent->d_name) < B_OK) {
672 				FATAL(("inode %" B_PRIdOFF " in query has no name!\n",
673 					inode->BlockNumber()));
674 			}
675 
676 			dirent->d_reclen = sizeof(struct dirent) + strlen(dirent->d_name);
677 		}
678 
679 		if (status == MATCH_OK)
680 			return B_OK;
681 	}
682 	RETURN_ERROR(B_ERROR);
683 }
684 
685 
686 void
687 Equation::CalculateScore(Index &index)
688 {
689 	// As always, these values could be tuned and refined.
690 	// And the code could also need some real world testing :-)
691 
692 	// do we have to operate on a "foreign" index?
693 	if (fOp == OP_UNEQUAL || index.SetTo(fAttribute) < B_OK) {
694 		fScore = 0;
695 		return;
696 	}
697 
698 	// if we have a pattern, how much does it help our search?
699 	if (fIsPattern)
700 		fScore = getFirstPatternSymbol(fString) << 3;
701 	else {
702 		// Score by operator
703 		if (fOp == OP_EQUAL)
704 			// higher than pattern="255 chars+*"
705 			fScore = 2048;
706 		else
707 			// the pattern search is regarded cheaper when you have at
708 			// least one character to set your index to
709 			fScore = 5;
710 	}
711 
712 	// take index size into account (1024 is the current node size
713 	// in our B+trees)
714 	// 2048 * 2048 == 4194304 is the maximum score (for an empty
715 	// tree, since the header + 1 node are already 2048 bytes)
716 	fScore = fScore * ((2048 * 1024LL) / index.Node()->Size());
717 }
718 
719 
720 status_t
721 Equation::_ParseQuotedString(char** _start, char** _end)
722 {
723 	char* start = *_start;
724 	char quote = *start++;
725 	char* end = start;
726 
727 	for (; *end && *end != quote; end++) {
728 		if (*end == '\\')
729 			end++;
730 	}
731 	if (*end == '\0')
732 		return B_BAD_VALUE;
733 
734 	*_start = start;
735 	*_end = end - 1;
736 
737 	return B_OK;
738 }
739 
740 
741 char*
742 Equation::_CopyString(char* start, char* end)
743 {
744 	// end points to the last character of the string - and the length
745 	// also has to include the null-termination
746 	int32 length = end + 2 - start;
747 	// just to make sure; since that's the max. attribute name length and
748 	// the max. string in an index, it make sense to have it that way
749 	if (length > MAX_INDEX_KEY_LENGTH + 1 || length <= 0)
750 		return NULL;
751 
752 	char* copy = (char*)malloc(length);
753 	if (copy == NULL)
754 		return NULL;
755 
756 	// Filter out remaining escaping slashes
757 	for (int32 i = 0; i < length; i++) {
758 		char c = start++[0];
759 		if (c == '\\' && i < length) {
760 			length--;
761 			i--;
762 			continue;
763 		}
764 		copy[i] = c;
765 	}
766 	copy[length - 1] = '\0';
767 
768 	return copy;
769 }
770 
771 
772 bool
773 Equation::_IsEquationChar(char c) const
774 {
775 	return c == '=' || c == '<' || c == '>' || c == '!';
776 }
777 
778 
779 bool
780 Equation::_IsOperatorChar(char c) const
781 {
782 	return c == '&' || c == '|';
783 }
784 
785 
786 status_t
787 Equation::_ConvertValue(type_code type)
788 {
789 	// Has the type already been converted?
790 	if (type == fType)
791 		return B_OK;
792 
793 	char* string = fString;
794 
795 	switch (type) {
796 		case B_MIME_STRING_TYPE:
797 			type = B_STRING_TYPE;
798 			// supposed to fall through
799 		case B_STRING_TYPE:
800 			strncpy(fValue.String, string, MAX_INDEX_KEY_LENGTH + 1);
801 			fValue.String[MAX_INDEX_KEY_LENGTH] = '\0';
802 			fSize = strlen(fValue.String);
803 			break;
804 		case B_TIME_TYPE:
805 			type = B_INT32_TYPE;
806 			// supposed to fall through
807 		case B_INT32_TYPE:
808 			fValue.Int32 = strtol(string, &string, 0);
809 			fSize = sizeof(int32);
810 			break;
811 		case B_UINT32_TYPE:
812 			fValue.Int32 = strtoul(string, &string, 0);
813 			fSize = sizeof(uint32);
814 			break;
815 		case B_INT64_TYPE:
816 			fValue.Int64 = strtoll(string, &string, 0);
817 			fSize = sizeof(int64);
818 			break;
819 		case B_UINT64_TYPE:
820 			fValue.Uint64 = strtoull(string, &string, 0);
821 			fSize = sizeof(uint64);
822 			break;
823 		case B_FLOAT_TYPE:
824 			fValue.Float = strtod(string, &string);
825 			fSize = sizeof(float);
826 			break;
827 		case B_DOUBLE_TYPE:
828 			fValue.Double = strtod(string, &string);
829 			fSize = sizeof(double);
830 			break;
831 		default:
832 			FATAL(("query value conversion to 0x%x requested!\n", (int)type));
833 			// should we fail here or just do a safety int32 conversion?
834 			return B_ERROR;
835 	}
836 
837 	fType = type;
838 
839 	// patterns are only allowed for string types
840 	if (fType != B_STRING_TYPE && fIsPattern)
841 		fIsPattern = false;
842 
843 	return B_OK;
844 }
845 
846 
847 /*!	Returns true when the key matches the equation. You have to
848 	call ConvertValue() before this one.
849 */
850 bool
851 Equation::_CompareTo(const uint8* value, uint16 size)
852 {
853 	int32 compare;
854 
855 	// fIsPattern is only true if it's a string type, and fOp OP_EQUAL, or
856 	// OP_UNEQUAL
857 	if (fIsPattern) {
858 		// we have already validated the pattern, so we don't check for failing
859 		// here - if something is broken, and matchString() returns an error,
860 		// we just don't match
861 		compare = matchString(fValue.String, (char*)value) == MATCH_OK ? 0 : 1;
862 	} else if (fIsSpecialTime) {
863 		// the index is a shifted int64 index, but we have to match
864 		// against an unshifted value (i.e. the last_modified index)
865 		int64 timeValue = *(int64*)value >> INODE_TIME_SHIFT;
866 		compare = compareKeys(fType, &timeValue, sizeof(int64), &fValue.Int64,
867 			sizeof(int64));
868 	} else
869 		compare = compareKeys(fType, value, size, _Value(), fSize);
870 
871 	switch (fOp) {
872 		case OP_EQUAL:
873 			return compare == 0;
874 		case OP_UNEQUAL:
875 			return compare != 0;
876 		case OP_LESS_THAN:
877 			return compare < 0;
878 		case OP_LESS_THAN_OR_EQUAL:
879 			return compare <= 0;
880 		case OP_GREATER_THAN:
881 			return compare > 0;
882 		case OP_GREATER_THAN_OR_EQUAL:
883 			return compare >= 0;
884 	}
885 	FATAL(("Unknown/Unsupported operation: %d\n", fOp));
886 	return false;
887 }
888 
889 
890 //	#pragma mark -
891 
892 
893 Operator::Operator(Term* left, int8 op, Term* right)
894 	:
895 	Term(op),
896 	fLeft(left),
897 	fRight(right)
898 {
899 	if (left)
900 		left->SetParent(this);
901 	if (right)
902 		right->SetParent(this);
903 }
904 
905 
906 Operator::~Operator()
907 {
908 	delete fLeft;
909 	delete fRight;
910 }
911 
912 
913 status_t
914 Operator::Match(Inode* inode, const char* attribute, int32 type,
915 	const uint8* key, size_t size)
916 {
917 	if (fOp == OP_AND) {
918 		status_t status = fLeft->Match(inode, attribute, type, key, size);
919 		if (status != MATCH_OK)
920 			return status;
921 
922 		return fRight->Match(inode, attribute, type, key, size);
923 	} else {
924 		// choose the term with the better score for OP_OR
925 		Term* first;
926 		Term* second;
927 		if (fRight->Score() > fLeft->Score()) {
928 			first = fLeft;
929 			second = fRight;
930 		} else {
931 			first = fRight;
932 			second = fLeft;
933 		}
934 
935 		status_t status = first->Match(inode, attribute, type, key, size);
936 		if (status != NO_MATCH)
937 			return status;
938 
939 		return second->Match(inode, attribute, type, key, size);
940 	}
941 }
942 
943 
944 void
945 Operator::Complement()
946 {
947 	if (fOp == OP_AND)
948 		fOp = OP_OR;
949 	else
950 		fOp = OP_AND;
951 
952 	fLeft->Complement();
953 	fRight->Complement();
954 }
955 
956 
957 void
958 Operator::CalculateScore(Index &index)
959 {
960 	fLeft->CalculateScore(index);
961 	fRight->CalculateScore(index);
962 }
963 
964 
965 int32
966 Operator::Score() const
967 {
968 	if (fOp == OP_AND) {
969 		// return the one with the better score
970 		if (fRight->Score() > fLeft->Score())
971 			return fRight->Score();
972 
973 		return fLeft->Score();
974 	}
975 
976 	// for OP_OR, be honest, and return the one with the worse score
977 	if (fRight->Score() < fLeft->Score())
978 		return fRight->Score();
979 
980 	return fLeft->Score();
981 }
982 
983 
984 status_t
985 Operator::InitCheck()
986 {
987 	if ((fOp != OP_AND && fOp != OP_OR)
988 		|| fLeft == NULL || fLeft->InitCheck() < B_OK
989 		|| fRight == NULL || fRight->InitCheck() < B_OK)
990 		return B_ERROR;
991 
992 	return B_OK;
993 }
994 
995 
996 #if 0
997 Term*
998 Operator::Copy() const
999 {
1000 	if (fEquation != NULL) {
1001 		Equation* equation = new(std::nothrow) Equation(*fEquation);
1002 		if (equation == NULL)
1003 			return NULL;
1004 
1005 		Term* term = new(std::nothrow) Term(equation);
1006 		if (term == NULL)
1007 			delete equation;
1008 
1009 		return term;
1010 	}
1011 
1012 	Term* left = NULL;
1013 	Term* right = NULL;
1014 
1015 	if (fLeft != NULL && (left = fLeft->Copy()) == NULL)
1016 		return NULL;
1017 	if (fRight != NULL && (right = fRight->Copy()) == NULL) {
1018 		delete left;
1019 		return NULL;
1020 	}
1021 
1022 	Term* term = new(std::nothrow) Term(left, fOp, right);
1023 	if (term == NULL) {
1024 		delete left;
1025 		delete right;
1026 		return NULL;
1027 	}
1028 	return term;
1029 }
1030 #endif
1031 
1032 
1033 //	#pragma mark -
1034 
1035 #ifdef DEBUG
1036 
1037 void
1038 Operator::PrintToStream()
1039 {
1040 	__out("( ");
1041 	if (fLeft != NULL)
1042 		fLeft->PrintToStream();
1043 
1044 	const char* op;
1045 	switch (fOp) {
1046 		case OP_OR: op = "OR"; break;
1047 		case OP_AND: op = "AND"; break;
1048 		default: op = "?"; break;
1049 	}
1050 	__out(" %s ", op);
1051 
1052 	if (fRight != NULL)
1053 		fRight->PrintToStream();
1054 
1055 	__out(" )");
1056 }
1057 
1058 
1059 void
1060 Equation::PrintToStream()
1061 {
1062 	const char* symbol = "???";
1063 	switch (fOp) {
1064 		case OP_EQUAL: symbol = "=="; break;
1065 		case OP_UNEQUAL: symbol = "!="; break;
1066 		case OP_GREATER_THAN: symbol = ">"; break;
1067 		case OP_GREATER_THAN_OR_EQUAL: symbol = ">="; break;
1068 		case OP_LESS_THAN: symbol = "<"; break;
1069 		case OP_LESS_THAN_OR_EQUAL: symbol = "<="; break;
1070 	}
1071 	__out("[\"%s\" %s \"%s\"]", fAttribute, symbol, fString);
1072 }
1073 
1074 #endif	// DEBUG
1075 
1076 //	#pragma mark -
1077 
1078 
1079 Expression::Expression(char* expr)
1080 {
1081 	if (expr == NULL)
1082 		return;
1083 
1084 	fTerm = ParseOr(&expr);
1085 	if (fTerm != NULL && fTerm->InitCheck() < B_OK) {
1086 		FATAL(("Corrupt tree in expression!\n"));
1087 		delete fTerm;
1088 		fTerm = NULL;
1089 	}
1090 	D(if (fTerm != NULL) {
1091 		fTerm->PrintToStream();
1092 		D(__out("\n"));
1093 		if (*expr != '\0')
1094 			PRINT(("Unexpected end of string: \"%s\"!\n", expr));
1095 	});
1096 	fPosition = expr;
1097 }
1098 
1099 
1100 Expression::~Expression()
1101 {
1102 	delete fTerm;
1103 }
1104 
1105 
1106 Term*
1107 Expression::ParseEquation(char** expr)
1108 {
1109 	skipWhitespace(expr);
1110 
1111 	bool _not = false;
1112 	if (**expr == '!') {
1113 		skipWhitespace(expr, 1);
1114 		if (**expr != '(')
1115 			return NULL;
1116 
1117 		_not = true;
1118 	}
1119 
1120 	if (**expr == ')') {
1121 		// shouldn't be handled here
1122 		return NULL;
1123 	} else if (**expr == '(') {
1124 		skipWhitespace(expr, 1);
1125 
1126 		Term* term = ParseOr(expr);
1127 
1128 		skipWhitespace(expr);
1129 
1130 		if (**expr != ')') {
1131 			delete term;
1132 			return NULL;
1133 		}
1134 
1135 		// If the term is negated, we just complement the tree, to get
1136 		// rid of the not, a.k.a. DeMorgan's Law.
1137 		if (_not)
1138 			term->Complement();
1139 
1140 		skipWhitespace(expr, 1);
1141 
1142 		return term;
1143 	}
1144 
1145 	Equation* equation = new(std::nothrow) Equation(expr);
1146 	if (equation == NULL || equation->InitCheck() < B_OK) {
1147 		delete equation;
1148 		return NULL;
1149 	}
1150 	return equation;
1151 }
1152 
1153 
1154 Term*
1155 Expression::ParseAnd(char** expr)
1156 {
1157 	Term* left = ParseEquation(expr);
1158 	if (left == NULL)
1159 		return NULL;
1160 
1161 	while (IsOperator(expr, '&')) {
1162 		Term* right = ParseAnd(expr);
1163 		Term* newParent = NULL;
1164 
1165 		if (right == NULL || (newParent = new(std::nothrow) Operator(left,
1166 				OP_AND, right)) == NULL) {
1167 			delete left;
1168 			delete right;
1169 
1170 			return NULL;
1171 		}
1172 		left = newParent;
1173 	}
1174 
1175 	return left;
1176 }
1177 
1178 
1179 Term*
1180 Expression::ParseOr(char** expr)
1181 {
1182 	Term* left = ParseAnd(expr);
1183 	if (left == NULL)
1184 		return NULL;
1185 
1186 	while (IsOperator(expr, '|')) {
1187 		Term* right = ParseAnd(expr);
1188 		Term* newParent = NULL;
1189 
1190 		if (right == NULL || (newParent = new(std::nothrow) Operator(left,
1191 				OP_OR, right)) == NULL) {
1192 			delete left;
1193 			delete right;
1194 
1195 			return NULL;
1196 		}
1197 		left = newParent;
1198 	}
1199 
1200 	return left;
1201 }
1202 
1203 
1204 bool
1205 Expression::IsOperator(char** expr, char op)
1206 {
1207 	char* string = *expr;
1208 
1209 	if (*string == op && *(string + 1) == op) {
1210 		*expr += 2;
1211 		return true;
1212 	}
1213 	return false;
1214 }
1215 
1216 
1217 status_t
1218 Expression::InitCheck()
1219 {
1220 	if (fTerm == NULL)
1221 		return B_BAD_VALUE;
1222 
1223 	return B_OK;
1224 }
1225 
1226 
1227 //	#pragma mark -
1228 
1229 
1230 Query::Query(Volume* volume, Expression* expression, uint32 flags)
1231 	:
1232 	fVolume(volume),
1233 	fExpression(expression),
1234 	fCurrent(NULL),
1235 	fIterator(NULL),
1236 	fIndex(volume),
1237 	fFlags(flags),
1238 	fPort(-1)
1239 {
1240 	// If the expression has a valid root pointer, the whole tree has
1241 	// already passed the sanity check, so that we don't have to check
1242 	// every pointer
1243 	if (volume == NULL || expression == NULL || expression->Root() == NULL)
1244 		return;
1245 
1246 	// create index on the stack and delete it afterwards
1247 	fExpression->Root()->CalculateScore(fIndex);
1248 	fIndex.Unset();
1249 
1250 	Rewind();
1251 
1252 	if ((fFlags & B_LIVE_QUERY) != 0)
1253 		volume->AddQuery(this);
1254 }
1255 
1256 
1257 Query::~Query()
1258 {
1259 	if ((fFlags & B_LIVE_QUERY) != 0)
1260 		fVolume->RemoveQuery(this);
1261 }
1262 
1263 
1264 status_t
1265 Query::Rewind()
1266 {
1267 	// free previous stuff
1268 
1269 	fStack.MakeEmpty();
1270 
1271 	delete fIterator;
1272 	fIterator = NULL;
1273 	fCurrent = NULL;
1274 
1275 	// put the whole expression on the stack
1276 
1277 	Stack<Term*> stack;
1278 	stack.Push(fExpression->Root());
1279 
1280 	Term* term;
1281 	while (stack.Pop(&term)) {
1282 		if (term->Op() < OP_EQUATION) {
1283 			Operator* op = (Operator*)term;
1284 
1285 			if (op->Op() == OP_OR) {
1286 				stack.Push(op->Left());
1287 				stack.Push(op->Right());
1288 			} else {
1289 				// For OP_AND, we can use the scoring system to decide which
1290 				// path to add
1291 				if (op->Right()->Score() > op->Left()->Score())
1292 					stack.Push(op->Right());
1293 				else
1294 					stack.Push(op->Left());
1295 			}
1296 		} else if (term->Op() == OP_EQUATION
1297 			|| fStack.Push((Equation*)term) != B_OK)
1298 			FATAL(("Unknown term on stack or stack error"));
1299 	}
1300 
1301 	return B_OK;
1302 }
1303 
1304 
1305 status_t
1306 Query::GetNextEntry(struct dirent* dirent, size_t size)
1307 {
1308 	// If we don't have an equation to use yet/anymore, get a new one
1309 	// from the stack
1310 	while (true) {
1311 		if (fIterator == NULL) {
1312 			if (!fStack.Pop(&fCurrent)
1313 				|| fCurrent == NULL)
1314 				return B_ENTRY_NOT_FOUND;
1315 
1316 			status_t status = fCurrent->PrepareQuery(fVolume, fIndex,
1317 				&fIterator, fFlags & B_QUERY_NON_INDEXED);
1318 			if (status == B_ENTRY_NOT_FOUND) {
1319 				// try next equation
1320 				continue;
1321 			}
1322 
1323 			if (status != B_OK)
1324 				return status;
1325 		}
1326 		if (fCurrent == NULL)
1327 			RETURN_ERROR(B_ERROR);
1328 
1329 		status_t status = fCurrent->GetNextMatching(fVolume, fIterator, dirent,
1330 			size);
1331 		if (status != B_OK) {
1332 			delete fIterator;
1333 			fIterator = NULL;
1334 			fCurrent = NULL;
1335 		} else {
1336 			// only return if we have another entry
1337 			return B_OK;
1338 		}
1339 	}
1340 }
1341 
1342 
1343 void
1344 Query::SetLiveMode(port_id port, int32 token)
1345 {
1346 	fPort = port;
1347 	fToken = token;
1348 
1349 	if ((fFlags & B_LIVE_QUERY) == 0) {
1350 		// you can decide at any point to set the live query mode,
1351 		// only live queries have to be updated by attribute changes
1352 		fFlags |= B_LIVE_QUERY;
1353 		fVolume->AddQuery(this);
1354 	}
1355 }
1356 
1357 
1358 void
1359 Query::LiveUpdate(Inode* inode, const char* attribute, int32 type,
1360 	const uint8* oldKey, size_t oldLength, const uint8* newKey,
1361 	size_t newLength)
1362 {
1363 	if (fPort < 0 || fExpression == NULL || attribute == NULL)
1364 		return;
1365 
1366 	// TODO: check if the attribute is part of the query at all...
1367 
1368 	status_t oldStatus = fExpression->Root()->Match(inode, attribute, type,
1369 		oldKey, oldLength);
1370 	status_t newStatus = fExpression->Root()->Match(inode, attribute, type,
1371 		newKey, newLength);
1372 
1373 	bool entryCreated = false;
1374 	bool stillInQuery = false;
1375 
1376 	if (oldStatus != MATCH_OK) {
1377 		if (newStatus != MATCH_OK) {
1378 			// nothing has changed
1379 			return;
1380 		}
1381 		entryCreated = true;
1382 	} else if (newStatus != MATCH_OK) {
1383 		// entry got removed
1384 		entryCreated = false;
1385 	} else if ((fFlags & B_ATTR_CHANGE_NOTIFICATION) != 0) {
1386 		// The entry stays in the query
1387 		stillInQuery = true;
1388 	} else
1389 		return;
1390 
1391 	// we may need to get the name of the inode
1392 
1393 	char nameBuffer[B_FILE_NAME_LENGTH];
1394 	const char* name;
1395 
1396 	if (strcmp(attribute, "name")) {
1397 		if (inode->GetName(nameBuffer) != B_OK)
1398 			nameBuffer[0] = '\0';
1399 		name = nameBuffer;
1400 	} else {
1401 		// a shortcut to prevent having to scan the attribute section
1402 		name = (const char*)newKey;
1403 	}
1404 
1405 	// notify query listeners
1406 
1407 	if (stillInQuery) {
1408 		notify_query_attr_changed(fPort, fToken, fVolume->ID(),
1409 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1410 	} else if (entryCreated) {
1411 		notify_query_entry_created(fPort, fToken, fVolume->ID(),
1412 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1413 	} else {
1414 		notify_query_entry_removed(fPort, fToken, fVolume->ID(),
1415 			fVolume->ToVnode(inode->Parent()), name, inode->ID());
1416 	}
1417 }
1418 
1419 
1420 void
1421 Query::LiveUpdateRenameMove(Inode* inode, ino_t oldDirectoryID,
1422 	const char* oldName, size_t oldLength, ino_t newDirectoryID,
1423 	const char* newName, size_t newLength)
1424 {
1425 	if (fPort < 0 || fExpression == NULL)
1426 		return;
1427 
1428 	// TODO: check if the attribute is part of the query at all...
1429 
1430 	status_t oldStatus = fExpression->Root()->Match(inode, "name",
1431 		B_STRING_TYPE, (const uint8*)oldName, oldLength);
1432 	status_t newStatus = fExpression->Root()->Match(inode, "name",
1433 		B_STRING_TYPE, (const uint8*)newName, newLength);
1434 
1435 	if (oldStatus != MATCH_OK || oldStatus != newStatus)
1436 		return;
1437 
1438 	// The entry stays in the query, notify query listeners about the rename
1439 	// or move
1440 
1441 	notify_query_entry_removed(fPort, fToken, fVolume->ID(),
1442 		oldDirectoryID, oldName, inode->ID());
1443 
1444 	notify_query_entry_created(fPort, fToken, fVolume->ID(),
1445 		newDirectoryID, newName, inode->ID());
1446 }
1447