xref: /haiku/src/kits/tracker/TrackerString.cpp (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include "TrackerString.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 
40 TrackerString::TrackerString()
41 {
42 }
43 
44 
45 TrackerString::TrackerString(const char* string)
46 	:	BString(string)
47 {
48 }
49 
50 
51 TrackerString::TrackerString(const TrackerString &string)
52 	:	BString(string)
53 {
54 }
55 
56 
57 TrackerString::TrackerString(const char* string, int32 maxLength)
58 	:	BString(string, maxLength)
59 {
60 }
61 
62 
63 TrackerString::~TrackerString()
64 {
65 }
66 
67 
68 bool
69 TrackerString::Matches(const char* string, bool caseSensitivity,
70 	TrackerStringExpressionType expressionType) const
71 {
72 	switch (expressionType) {
73 		default:
74 		case kNone:
75 			return false;
76 
77 		case kStartsWith:
78 			return StartsWith(string, caseSensitivity);
79 
80 		case kEndsWith:
81 			return EndsWith(string, caseSensitivity);
82 
83 		case kContains:
84 			return Contains(string, caseSensitivity);
85 
86 		case kGlobMatch:
87 			return MatchesGlob(string, caseSensitivity);
88 
89 		case kRegexpMatch:
90 			return MatchesRegExp(string, caseSensitivity);
91 	}
92 }
93 
94 
95 bool
96 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
97 {
98 	BString patternString(pattern);
99 	BString textString(String());
100 
101 	if (caseSensitivity == false) {
102 		patternString.ToLower();
103 		textString.ToLower();
104 	}
105 
106 	RegExp expression(patternString);
107 
108 	if (expression.InitCheck() != B_OK)
109 		return false;
110 
111 	return expression.Matches(textString);
112 }
113 
114 
115 bool
116 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
117 {
118 	return StringMatchesPattern(String(), string, caseSensitivity);
119 }
120 
121 
122 bool
123 TrackerString::EndsWith(const char* string, bool caseSensitivity) const
124 {
125 	// If "string" is longer than "this",
126 	// we should simply return false
127 	int32 position = Length() - (int32)strlen(string);
128 	if (position < 0)
129 		return false;
130 
131 	if (caseSensitivity)
132 		return FindLast(string) == position;
133 	else
134 		return IFindLast(string) == position;
135 }
136 
137 
138 bool
139 TrackerString::StartsWith(const char* string, bool caseSensitivity) const
140 {
141 	if (caseSensitivity)
142 		return FindFirst(string) == 0;
143 	else
144 		return IFindFirst(string) == 0;
145 }
146 
147 
148 bool
149 TrackerString::Contains(const char* string, bool caseSensitivity) const
150 {
151 	if (caseSensitivity)
152 		return FindFirst(string) > -1;
153 	else
154 		return IFindFirst(string) > -1;
155 }
156 
157 
158 // About the ?Find* functions:
159 // The leading star here has been compliance with BString,
160 // simplicity and performance. Therefore unncessary copying
161 // has been avoided, as unncessary function calls.
162 // The copying has been avoided by implementing the
163 // ?Find*(const char*) functions rather than
164 // the ?Find*(TrackerString &) functions.
165 // The function calls has been avoided by
166 // inserting a check on the first character
167 // before calling the str*cmp functions.
168 
169 
170 int32
171 TrackerString::FindFirst(const BString &string) const
172 {
173 	return FindFirst(string.String(), 0);
174 }
175 
176 
177 int32
178 TrackerString::FindFirst(const char* string) const
179 {
180 	return FindFirst(string, 0);
181 }
182 
183 
184 int32
185 TrackerString::FindFirst(const BString &string, int32 fromOffset) const
186 {
187 	return FindFirst(string.String(), fromOffset);
188 }
189 
190 
191 int32
192 TrackerString::FindFirst(const char* string, int32 fromOffset) const
193 {
194 	if (!string)
195 		return -1;
196 
197 	int32 length = Length();
198 	uint32 stringLength = strlen(string);
199 
200 	// The following two checks are required to be compatible
201 	// with BString:
202 	if (length <= 0)
203 		return -1;
204 
205 	if (stringLength == 0)
206 		return fromOffset;
207 
208 	int32 stop = length - static_cast<int32>(stringLength);
209 	int32 start = MAX(0, MIN(fromOffset, stop));
210 	int32 position = -1;
211 
212 	for (int32 i = start; i <= stop; i++) {
213 		if (string[0] == ByteAt(i)) {
214 			// This check is to avoid mute str*cmp() calls. Performance.
215 			if (strncmp(string, String() + i, stringLength) == 0) {
216 				position = i;
217 				break;
218 			}
219 		}
220 	}
221 
222 	return position;
223 }
224 
225 
226 int32
227 TrackerString::FindFirst(char ch) const
228 {
229 	char string[2] = {ch, '\0'};
230 	return FindFirst(string, 0);
231 }
232 
233 
234 int32
235 TrackerString::FindFirst(char ch, int32 fromOffset) const
236 {
237 	char string[2] = {ch, '\0'};
238 	return FindFirst(string, fromOffset);
239 }
240 
241 
242 int32
243 TrackerString::FindLast(const BString &string) const
244 {
245 	return FindLast(string.String(), Length() - 1);
246 }
247 
248 
249 int32
250 TrackerString::FindLast(const char* string) const
251 {
252 	return FindLast(string, Length() - 1);
253 }
254 
255 
256 int32
257 TrackerString::FindLast(const BString &string, int32 beforeOffset) const
258 {
259 	return FindLast(string.String(), beforeOffset);
260 }
261 
262 
263 int32
264 TrackerString::FindLast(const char* string, int32 beforeOffset) const
265 {
266 	if (!string)
267 		return -1;
268 
269 	int32 length = Length();
270 	uint32 stringLength = strlen(string);
271 
272 	// The following two checks are required to be compatible
273 	// with BString:
274 	if (length <= 0)
275 		return -1;
276 
277 	if (stringLength == 0)
278 		return beforeOffset;
279 
280 	int32 start = MIN(beforeOffset,
281 		length - static_cast<int32>(stringLength));
282 	int32 stop = 0;
283 	int32 position = -1;
284 
285 	for (int32 i = start; i >= stop; i--) {
286 		if (string[0] == ByteAt(i)) {
287 			// This check is to avoid mute str*cmp() calls. Performance.
288 			if (strncmp(string, String() + i, stringLength) == 0) {
289 				position = i;
290 				break;
291 			}
292 		}
293 	}
294 
295 	return position;
296 }
297 
298 
299 int32
300 TrackerString::FindLast(char ch) const
301 {
302 	char string[2] = {ch, '\0'};
303 	return FindLast(string, Length() - 1);
304 }
305 
306 
307 int32
308 TrackerString::FindLast(char ch, int32 beforeOffset) const
309 {
310 	char string[2] = {ch, '\0'};
311 	return FindLast(string, beforeOffset);
312 }
313 
314 
315 int32
316 TrackerString::IFindFirst(const BString &string) const
317 {
318 	return IFindFirst(string.String(), 0);
319 }
320 
321 
322 int32
323 TrackerString::IFindFirst(const char* string) const
324 {
325 	return IFindFirst(string, 0);
326 }
327 
328 
329 int32
330 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const
331 {
332 	return IFindFirst(string.String(), fromOffset);
333 }
334 
335 
336 int32
337 TrackerString::IFindFirst(const char* string, int32 fromOffset) const
338 {
339 	if (!string)
340 		return -1;
341 
342 	int32 length = Length();
343 	uint32 stringLength = strlen(string);
344 
345 	// The following two checks are required to be compatible
346 	// with BString:
347 	if (length <= 0)
348 		return -1;
349 
350 	if (stringLength == 0)
351 		return fromOffset;
352 
353 	int32 stop = length - static_cast<int32>(stringLength);
354 	int32 start = MAX(0, MIN(fromOffset, stop));
355 	int32 position = -1;
356 
357 	for (int32 i = start; i <= stop; i++) {
358 		if (tolower(string[0]) == tolower(ByteAt(i))) {
359 			// This check is to avoid mute str*cmp() calls. Performance.
360 			if (strncasecmp(string, String() + i, stringLength) == 0) {
361 				position = i;
362 				break;
363 			}
364 		}
365 	}
366 
367 	return position;
368 }
369 
370 
371 int32
372 TrackerString::IFindLast(const BString &string) const
373 {
374 	return IFindLast(string.String(), Length() - 1);
375 }
376 
377 
378 int32
379 TrackerString::IFindLast(const char* string) const
380 {
381 	return IFindLast(string, Length() - 1);
382 }
383 
384 
385 int32
386 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const
387 {
388 	return IFindLast(string.String(), beforeOffset);
389 }
390 
391 
392 int32
393 TrackerString::IFindLast(const char* string, int32 beforeOffset) const
394 {
395 	if (!string)
396 		return -1;
397 
398 	int32 length = Length();
399 	uint32 stringLength = strlen(string);
400 
401 	// The following two checks are required to be compatible
402 	// with BString:
403 	if (length <= 0)
404 		return -1;
405 
406 	if (stringLength == 0)
407 		return beforeOffset;
408 
409 	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
410 	int32 stop = 0;
411 	int32 position = -1;
412 
413 	for (int32 i = start; i >= stop; i--) {
414 		if (tolower(string[0]) == tolower(ByteAt(i))) {
415 			// This check is to avoid mute str*cmp() calls. Performance.
416 			if (strncasecmp(string, String() + i, stringLength) == 0) {
417 				position = i;
418 				break;
419 			}
420 		}
421 	}
422 
423 	return position;
424 }
425 
426 
427 // MatchesBracketExpression() assumes 'pattern' to point to the
428 // character following the initial '[' in a bracket expression.
429 // The reason is that an encountered '[' will be taken literally.
430 // (Makes it possible to match a '[' with the expression '[[]').
431 bool
432 TrackerString::MatchesBracketExpression(const char* string,
433 	const char* pattern, bool caseSensitivity) const
434 {
435 	bool GlyphMatch = IsStartOfGlyph(string[0]);
436 
437 	if (IsInsideGlyph(string[0]))
438 		return false;
439 
440 	char testChar = ConditionalToLower(string[0], caseSensitivity);
441 	bool match = false;
442 
443 	bool inverse = *pattern == '^' || *pattern == '!';
444 		// We allow both ^ and ! as a initial inverting character.
445 
446 	if (inverse)
447 		pattern++;
448 
449 	while (!match && *pattern != ']' && *pattern != '\0') {
450 		switch (*pattern) {
451 			case '-':
452 			{
453 				char start = ConditionalToLower(*(pattern - 1),
454 					caseSensitivity);
455 				char stop = ConditionalToLower(*(pattern + 1),
456 					caseSensitivity);
457 
458 				if (IsGlyph(start) || IsGlyph(stop))
459 					return false;
460 						// Not a valid range!
461 
462 				if ((islower(start) && islower(stop))
463 					|| (isupper(start) && isupper(stop))
464 					|| (isdigit(start) && isdigit(stop))) {
465 					// Make sure 'start' and 'stop' are of the same type.
466 					match = start <= testChar && testChar <= stop;
467 				} else {
468 					// If no valid range, we've got a syntax error.
469 					return false;
470 				}
471 
472 				break;
473 			}
474 
475 			default:
476 				if (GlyphMatch)
477 					match = UTF8CharsAreEqual(string, pattern);
478 				else
479 					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
480 				break;
481 		}
482 
483 		if (!match) {
484 			pattern++;
485 			if (IsInsideGlyph(pattern[0]))
486 				pattern = MoveToEndOfGlyph(pattern);
487 		}
488 	}
489 
490 	// Consider an unmatched bracket a failure
491 	// (i.e. when detecting a '\0' instead of a ']'.)
492 	if (*pattern == '\0')
493 		return false;
494 
495 	return (match ^ inverse) != 0;
496 }
497 
498 
499 bool
500 TrackerString::StringMatchesPattern(const char* string, const char* pattern,
501 	bool caseSensitivity) const
502 {
503 	// One could do this dynamically, counting the number of *'s,
504 	// but then you have to free them at every exit of this
505 	// function, which is awkward and ugly.
506 	const int32 kWildCardMaximum = 100;
507 	const char* pStorage[kWildCardMaximum];
508 	const char* sStorage[kWildCardMaximum];
509 
510 	int32 patternLevel = 0;
511 
512 	if (string == NULL || pattern == NULL)
513 		return false;
514 
515 	while (*pattern != '\0') {
516 		switch (*pattern) {
517 			case '?':
518 				pattern++;
519 				string++;
520 				if (IsInsideGlyph(string[0]))
521 					string = MoveToEndOfGlyph(string);
522 
523 				break;
524 
525 			case '*':
526 			{
527 				// Collapse any ** and *? constructions:
528 				while (*pattern == '*' || *pattern == '?') {
529 					pattern++;
530 					if (*pattern == '?' && string != '\0') {
531 						string++;
532 						if (IsInsideGlyph(string[0]))
533 							string = MoveToEndOfGlyph(string);
534 					}
535 				}
536 
537 				if (*pattern == '\0') {
538 					// An ending * matches all strings.
539 					return true;
540 				}
541 
542 				bool match = false;
543 				const char* pBefore = pattern - 1;
544 
545 				if (*pattern == '[') {
546 					pattern++;
547 
548 					while (!match && *string != '\0') {
549 						match = MatchesBracketExpression(string++, pattern,
550 							caseSensitivity);
551 					}
552 
553 					while (*pattern != ']' && *pattern != '\0') {
554 						// Skip the rest of the bracket:
555 						pattern++;
556 					}
557 
558 					if (*pattern == '\0') {
559 						// Failure if no closing bracket;
560 						return false;
561 					}
562 				} else {
563 					// No bracket, just one character:
564 					while (!match && *string != '\0') {
565 						if (IsGlyph(string[0]))
566 							match = UTF8CharsAreEqual(string++, pattern);
567 						else {
568 							match = CharsAreEqual(*string++, *pattern,
569 								caseSensitivity);
570 						}
571 					}
572 				}
573 
574 				if (!match)
575 					return false;
576 				else {
577 					pStorage[patternLevel] = pBefore;
578 					if (IsInsideGlyph(string[0]))
579 						string = MoveToEndOfGlyph(string);
580 
581 					sStorage[patternLevel++] = string;
582 					if (patternLevel > kWildCardMaximum)
583 						return false;
584 
585 					pattern++;
586 					if (IsInsideGlyph(pattern[0]))
587 						pattern = MoveToEndOfGlyph(pattern);
588 				}
589 				break;
590 			}
591 
592 			case '[':
593 				pattern++;
594 
595 				if (!MatchesBracketExpression(string, pattern,
596 						caseSensitivity)) {
597 					if (patternLevel > 0) {
598 						pattern = pStorage[--patternLevel];
599 						string = sStorage[patternLevel];
600 					} else
601 						return false;
602 				} else {
603 					// Skip the rest of the bracket:
604 					while (*pattern != ']' && *pattern != '\0')
605 						pattern++;
606 
607 					// Failure if no closing bracket;
608 					if (*pattern == '\0')
609 						return false;
610 
611 					string++;
612 					if (IsInsideGlyph(string[0]))
613 						string = MoveToEndOfGlyph(string);
614 					pattern++;
615 				}
616 				break;
617 
618 			default:
619 			{
620 				bool equal = false;
621 				if (IsGlyph(string[0]))
622 					equal = UTF8CharsAreEqual(string, pattern);
623 				else
624 					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
625 
626 				if (equal) {
627 					pattern++;
628 					if (IsInsideGlyph(pattern[0]))
629 						pattern = MoveToEndOfGlyph(pattern);
630 					string++;
631 					if (IsInsideGlyph(string[0]))
632 						string = MoveToEndOfGlyph(string);
633 				} else if (patternLevel > 0) {
634 						pattern = pStorage[--patternLevel];
635 						string = sStorage[patternLevel];
636 				} else
637 					return false;
638 
639 				break;
640 			}
641 		}
642 
643 		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
644 			pattern = pStorage[--patternLevel];
645 			string = sStorage[patternLevel];
646 		}
647 	}
648 
649 	return *string == '\0' && *pattern == '\0';
650 }
651 
652 
653 bool
654 TrackerString::UTF8CharsAreEqual(const char* string1,
655 	const char* string2) const
656 {
657 	const char* s1 = string1;
658 	const char* s2 = string2;
659 
660 	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
661 		s1++;
662 		s2++;
663 
664 		while (IsInsideGlyph(*s1) && *s1 == *s2) {
665 			s1++;
666 			s2++;
667 		}
668 
669 		return !IsInsideGlyph(*s1)
670 			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
671 	} else
672 		return false;
673 }
674 
675 
676 const char*
677 TrackerString::MoveToEndOfGlyph(const char* string) const
678 {
679 	const char* ptr = string;
680 
681 	while (IsInsideGlyph(*ptr))
682 		ptr++;
683 
684 	return ptr;
685 }
686 
687 
688 bool
689 TrackerString::IsGlyph(char ch) const
690 {
691 	return (ch & 0x80) == 0x80;
692 }
693 
694 
695 bool
696 TrackerString::IsInsideGlyph(char ch) const
697 {
698 	return (ch & 0xC0) == 0x80;
699 }
700 
701 
702 bool
703 TrackerString::IsStartOfGlyph(char ch) const
704 {
705 	return (ch & 0xC0) == 0xC0;
706 }
707