xref: /haiku/src/kits/tracker/TrackerString.cpp (revision 7d6915b4d08ffe728cd38af02843d5e98ddfe0db)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include "TrackerString.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 
41 
42 //	#pragma mark - TrackerString
43 
44 
45 TrackerString::TrackerString()
46 {
47 }
48 
49 
50 TrackerString::TrackerString(const char* string)
51 	:
52 	BString(string)
53 {
54 }
55 
56 
57 TrackerString::TrackerString(const TrackerString &string)
58 	:
59 	BString(string)
60 {
61 }
62 
63 
64 TrackerString::TrackerString(const char* string, int32 maxLength)
65 	:
66 	BString(string, maxLength)
67 {
68 }
69 
70 
71 TrackerString::~TrackerString()
72 {
73 }
74 
75 
76 bool
77 TrackerString::Matches(const char* string, bool caseSensitivity,
78 	TrackerStringExpressionType expressionType) const
79 {
80 	switch (expressionType) {
81 		default:
82 		case kNone:
83 			return false;
84 
85 		case kStartsWith:
86 			return StartsWith(string, caseSensitivity);
87 
88 		case kEndsWith:
89 			return EndsWith(string, caseSensitivity);
90 
91 		case kContains:
92 			return Contains(string, caseSensitivity);
93 
94 		case kGlobMatch:
95 			return MatchesGlob(string, caseSensitivity);
96 
97 		case kRegexpMatch:
98 			return MatchesRegExp(string, caseSensitivity);
99 	}
100 }
101 
102 
103 bool
104 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
105 {
106 	BString patternString(pattern);
107 	BString textString(String());
108 
109 	if (caseSensitivity == false) {
110 		patternString.ToLower();
111 		textString.ToLower();
112 	}
113 
114 	RegExp expression(patternString);
115 
116 	if (expression.InitCheck() != B_OK)
117 		return false;
118 
119 	return expression.Matches(textString);
120 }
121 
122 
123 bool
124 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
125 {
126 	return StringMatchesPattern(String(), string, caseSensitivity);
127 }
128 
129 
130 bool
131 TrackerString::EndsWith(const char* string, bool caseSensitivity) const
132 {
133 	// If "string" is longer than "this",
134 	// we should simply return false
135 	int32 position = Length() - (int32)strlen(string);
136 	if (position < 0)
137 		return false;
138 
139 	if (caseSensitivity)
140 		return FindLast(string) == position;
141 	else
142 		return IFindLast(string) == position;
143 }
144 
145 
146 bool
147 TrackerString::StartsWith(const char* string, bool caseSensitivity) const
148 {
149 	if (caseSensitivity)
150 		return FindFirst(string) == 0;
151 	else
152 		return IFindFirst(string) == 0;
153 }
154 
155 
156 bool
157 TrackerString::Contains(const char* string, bool caseSensitivity) const
158 {
159 	if (caseSensitivity)
160 		return FindFirst(string) > -1;
161 	else
162 		return IFindFirst(string) > -1;
163 }
164 
165 
166 // About the ?Find* functions:
167 // The leading star here has been compliance with BString,
168 // simplicity and performance. Therefore unncessary copying
169 // has been avoided, as unncessary function calls.
170 // The copying has been avoided by implementing the
171 // ?Find*(const char*) functions rather than
172 // the ?Find*(TrackerString &) functions.
173 // The function calls has been avoided by
174 // inserting a check on the first character
175 // before calling the str*cmp functions.
176 
177 
178 int32
179 TrackerString::FindFirst(const BString &string) const
180 {
181 	return FindFirst(string.String(), 0);
182 }
183 
184 
185 int32
186 TrackerString::FindFirst(const char* string) const
187 {
188 	return FindFirst(string, 0);
189 }
190 
191 
192 int32
193 TrackerString::FindFirst(const BString &string, int32 fromOffset) const
194 {
195 	return FindFirst(string.String(), fromOffset);
196 }
197 
198 
199 int32
200 TrackerString::FindFirst(const char* string, int32 fromOffset) const
201 {
202 	if (string == NULL)
203 		return -1;
204 
205 	int32 length = Length();
206 	uint32 stringLength = strlen(string);
207 
208 	// The following two checks are required to be compatible
209 	// with BString:
210 	if (length <= 0)
211 		return -1;
212 
213 	if (stringLength == 0)
214 		return fromOffset;
215 
216 	int32 stop = length - static_cast<int32>(stringLength);
217 	int32 start = MAX(0, MIN(fromOffset, stop));
218 	int32 position = -1;
219 
220 	for (int32 i = start; i <= stop; i++) {
221 		if (string[0] == ByteAt(i)) {
222 			// This check is to avoid mute str*cmp() calls. Performance.
223 			if (strncmp(string, String() + i, stringLength) == 0) {
224 				position = i;
225 				break;
226 			}
227 		}
228 	}
229 
230 	return position;
231 }
232 
233 
234 int32
235 TrackerString::FindFirst(char ch) const
236 {
237 	char string[2] = {ch, '\0'};
238 	return FindFirst(string, 0);
239 }
240 
241 
242 int32
243 TrackerString::FindFirst(char ch, int32 fromOffset) const
244 {
245 	char string[2] = {ch, '\0'};
246 	return FindFirst(string, fromOffset);
247 }
248 
249 
250 int32
251 TrackerString::FindLast(const BString &string) const
252 {
253 	return FindLast(string.String(), Length() - 1);
254 }
255 
256 
257 int32
258 TrackerString::FindLast(const char* string) const
259 {
260 	return FindLast(string, Length() - 1);
261 }
262 
263 
264 int32
265 TrackerString::FindLast(const BString &string, int32 beforeOffset) const
266 {
267 	return FindLast(string.String(), beforeOffset);
268 }
269 
270 
271 int32
272 TrackerString::FindLast(const char* string, int32 beforeOffset) const
273 {
274 	if (string == NULL)
275 		return -1;
276 
277 	int32 length = Length();
278 	uint32 stringLength = strlen(string);
279 
280 	// The following two checks are required to be compatible
281 	// with BString:
282 	if (length <= 0)
283 		return -1;
284 
285 	if (stringLength == 0)
286 		return beforeOffset;
287 
288 	int32 start = MIN(beforeOffset,
289 		length - static_cast<int32>(stringLength));
290 	int32 stop = 0;
291 	int32 position = -1;
292 
293 	for (int32 i = start; i >= stop; i--) {
294 		if (string[0] == ByteAt(i)) {
295 			// This check is to avoid mute str*cmp() calls. Performance.
296 			if (strncmp(string, String() + i, stringLength) == 0) {
297 				position = i;
298 				break;
299 			}
300 		}
301 	}
302 
303 	return position;
304 }
305 
306 
307 int32
308 TrackerString::FindLast(char ch) const
309 {
310 	char string[2] = {ch, '\0'};
311 	return FindLast(string, Length() - 1);
312 }
313 
314 
315 int32
316 TrackerString::FindLast(char ch, int32 beforeOffset) const
317 {
318 	char string[2] = {ch, '\0'};
319 	return FindLast(string, beforeOffset);
320 }
321 
322 
323 int32
324 TrackerString::IFindFirst(const BString &string) const
325 {
326 	return IFindFirst(string.String(), 0);
327 }
328 
329 
330 int32
331 TrackerString::IFindFirst(const char* string) const
332 {
333 	return IFindFirst(string, 0);
334 }
335 
336 
337 int32
338 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const
339 {
340 	return IFindFirst(string.String(), fromOffset);
341 }
342 
343 
344 int32
345 TrackerString::IFindFirst(const char* string, int32 fromOffset) const
346 {
347 	if (string == NULL)
348 		return -1;
349 
350 	int32 length = Length();
351 	uint32 stringLength = strlen(string);
352 
353 	// The following two checks are required to be compatible
354 	// with BString:
355 	if (length <= 0)
356 		return -1;
357 
358 	if (stringLength == 0)
359 		return fromOffset;
360 
361 	int32 stop = length - static_cast<int32>(stringLength);
362 	int32 start = MAX(0, MIN(fromOffset, stop));
363 	int32 position = -1;
364 
365 	for (int32 i = start; i <= stop; i++) {
366 		if (tolower(string[0]) == tolower(ByteAt(i))) {
367 			// This check is to avoid mute str*cmp() calls. Performance.
368 			if (strncasecmp(string, String() + i, stringLength) == 0) {
369 				position = i;
370 				break;
371 			}
372 		}
373 	}
374 
375 	return position;
376 }
377 
378 
379 int32
380 TrackerString::IFindLast(const BString &string) const
381 {
382 	return IFindLast(string.String(), Length() - 1);
383 }
384 
385 
386 int32
387 TrackerString::IFindLast(const char* string) const
388 {
389 	return IFindLast(string, Length() - 1);
390 }
391 
392 
393 int32
394 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const
395 {
396 	return IFindLast(string.String(), beforeOffset);
397 }
398 
399 
400 int32
401 TrackerString::IFindLast(const char* string, int32 beforeOffset) const
402 {
403 	if (string == NULL)
404 		return -1;
405 
406 	int32 length = Length();
407 	uint32 stringLength = strlen(string);
408 
409 	// The following two checks are required to be compatible
410 	// with BString:
411 	if (length <= 0)
412 		return -1;
413 
414 	if (stringLength == 0)
415 		return beforeOffset;
416 
417 	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
418 	int32 stop = 0;
419 	int32 position = -1;
420 
421 	for (int32 i = start; i >= stop; i--) {
422 		if (tolower(string[0]) == tolower(ByteAt(i))) {
423 			// This check is to avoid mute str*cmp() calls. Performance.
424 			if (strncasecmp(string, String() + i, stringLength) == 0) {
425 				position = i;
426 				break;
427 			}
428 		}
429 	}
430 
431 	return position;
432 }
433 
434 
435 // MatchesBracketExpression() assumes 'pattern' to point to the
436 // character following the initial '[' in a bracket expression.
437 // The reason is that an encountered '[' will be taken literally.
438 // (Makes it possible to match a '[' with the expression '[[]').
439 bool
440 TrackerString::MatchesBracketExpression(const char* string,
441 	const char* pattern, bool caseSensitivity) const
442 {
443 	bool GlyphMatch = IsStartOfGlyph(string[0]);
444 
445 	if (IsInsideGlyph(string[0]))
446 		return false;
447 
448 	char testChar = ConditionalToLower(string[0], caseSensitivity);
449 	bool match = false;
450 
451 	bool inverse = *pattern == '^' || *pattern == '!';
452 		// We allow both ^ and ! as a initial inverting character.
453 
454 	if (inverse)
455 		pattern++;
456 
457 	while (!match && *pattern != ']' && *pattern != '\0') {
458 		switch (*pattern) {
459 			case '-':
460 			{
461 				char start = ConditionalToLower(*(pattern - 1),
462 					caseSensitivity);
463 				char stop = ConditionalToLower(*(pattern + 1),
464 					caseSensitivity);
465 
466 				if (IsGlyph(start) || IsGlyph(stop))
467 					return false;
468 						// Not a valid range!
469 
470 				if ((islower(start) && islower(stop))
471 					|| (isupper(start) && isupper(stop))
472 					|| (isdigit(start) && isdigit(stop))) {
473 					// Make sure 'start' and 'stop' are of the same type.
474 					match = start <= testChar && testChar <= stop;
475 				} else {
476 					// If no valid range, we've got a syntax error.
477 					return false;
478 				}
479 
480 				break;
481 			}
482 
483 			default:
484 				if (GlyphMatch)
485 					match = UTF8CharsAreEqual(string, pattern);
486 				else
487 					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
488 				break;
489 		}
490 
491 		if (!match) {
492 			pattern++;
493 			if (IsInsideGlyph(pattern[0]))
494 				pattern = MoveToEndOfGlyph(pattern);
495 		}
496 	}
497 
498 	// Consider an unmatched bracket a failure
499 	// (i.e. when detecting a '\0' instead of a ']'.)
500 	if (*pattern == '\0')
501 		return false;
502 
503 	return (match ^ inverse) != 0;
504 }
505 
506 
507 bool
508 TrackerString::StringMatchesPattern(const char* string, const char* pattern,
509 	bool caseSensitivity) const
510 {
511 	// One could do this dynamically, counting the number of *'s,
512 	// but then you have to free them at every exit of this
513 	// function, which is awkward and ugly.
514 	const int32 kWildCardMaximum = 100;
515 	const char* pStorage[kWildCardMaximum];
516 	const char* sStorage[kWildCardMaximum];
517 
518 	int32 patternLevel = 0;
519 
520 	if (string == NULL || pattern == NULL)
521 		return false;
522 
523 	while (*pattern != '\0') {
524 		switch (*pattern) {
525 			case '?':
526 				pattern++;
527 				string++;
528 				if (IsInsideGlyph(string[0]))
529 					string = MoveToEndOfGlyph(string);
530 				break;
531 
532 			case '*':
533 			{
534 				// Collapse any ** and *? constructions:
535 				while (*pattern == '*' || *pattern == '?') {
536 					pattern++;
537 					if (*pattern == '?' && string != '\0') {
538 						string++;
539 						if (IsInsideGlyph(string[0]))
540 							string = MoveToEndOfGlyph(string);
541 					}
542 				}
543 
544 				if (*pattern == '\0') {
545 					// An ending * matches all strings.
546 					return true;
547 				}
548 
549 				bool match = false;
550 				const char* pBefore = pattern - 1;
551 
552 				if (*pattern == '[') {
553 					pattern++;
554 
555 					while (!match && *string != '\0') {
556 						match = MatchesBracketExpression(string++, pattern,
557 							caseSensitivity);
558 					}
559 
560 					while (*pattern != ']' && *pattern != '\0') {
561 						// Skip the rest of the bracket:
562 						pattern++;
563 					}
564 
565 					if (*pattern == '\0') {
566 						// Failure if no closing bracket;
567 						return false;
568 					}
569 				} else {
570 					// No bracket, just one character:
571 					while (!match && *string != '\0') {
572 						if (IsGlyph(string[0]))
573 							match = UTF8CharsAreEqual(string++, pattern);
574 						else {
575 							match = CharsAreEqual(*string++, *pattern,
576 								caseSensitivity);
577 						}
578 					}
579 				}
580 
581 				if (!match)
582 					return false;
583 				else {
584 					pStorage[patternLevel] = pBefore;
585 					if (IsInsideGlyph(string[0]))
586 						string = MoveToEndOfGlyph(string);
587 
588 					sStorage[patternLevel++] = string;
589 					if (patternLevel > kWildCardMaximum)
590 						return false;
591 
592 					pattern++;
593 					if (IsInsideGlyph(pattern[0]))
594 						pattern = MoveToEndOfGlyph(pattern);
595 				}
596 				break;
597 			}
598 
599 			case '[':
600 				pattern++;
601 
602 				if (!MatchesBracketExpression(string, pattern,
603 						caseSensitivity)) {
604 					if (patternLevel > 0) {
605 						pattern = pStorage[--patternLevel];
606 						string = sStorage[patternLevel];
607 					} else
608 						return false;
609 				} else {
610 					// Skip the rest of the bracket:
611 					while (*pattern != ']' && *pattern != '\0')
612 						pattern++;
613 
614 					// Failure if no closing bracket;
615 					if (*pattern == '\0')
616 						return false;
617 
618 					string++;
619 					if (IsInsideGlyph(string[0]))
620 						string = MoveToEndOfGlyph(string);
621 					pattern++;
622 				}
623 				break;
624 
625 			default:
626 			{
627 				bool equal = false;
628 				if (IsGlyph(string[0]))
629 					equal = UTF8CharsAreEqual(string, pattern);
630 				else
631 					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
632 
633 				if (equal) {
634 					pattern++;
635 					if (IsInsideGlyph(pattern[0]))
636 						pattern = MoveToEndOfGlyph(pattern);
637 					string++;
638 					if (IsInsideGlyph(string[0]))
639 						string = MoveToEndOfGlyph(string);
640 				} else if (patternLevel > 0) {
641 						pattern = pStorage[--patternLevel];
642 						string = sStorage[patternLevel];
643 				} else
644 					return false;
645 				break;
646 			}
647 		}
648 
649 		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
650 			pattern = pStorage[--patternLevel];
651 			string = sStorage[patternLevel];
652 		}
653 	}
654 
655 	return *string == '\0' && *pattern == '\0';
656 }
657 
658 
659 bool
660 TrackerString::UTF8CharsAreEqual(const char* string1,
661 	const char* string2) const
662 {
663 	const char* s1 = string1;
664 	const char* s2 = string2;
665 
666 	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
667 		s1++;
668 		s2++;
669 
670 		while (IsInsideGlyph(*s1) && *s1 == *s2) {
671 			s1++;
672 			s2++;
673 		}
674 
675 		return !IsInsideGlyph(*s1)
676 			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
677 	} else
678 		return false;
679 }
680 
681 
682 const char*
683 TrackerString::MoveToEndOfGlyph(const char* string) const
684 {
685 	const char* ptr = string;
686 
687 	while (IsInsideGlyph(*ptr))
688 		ptr++;
689 
690 	return ptr;
691 }
692 
693 
694 bool
695 TrackerString::IsGlyph(char ch) const
696 {
697 	return (ch & 0x80) == 0x80;
698 }
699 
700 
701 bool
702 TrackerString::IsInsideGlyph(char ch) const
703 {
704 	return (ch & 0xC0) == 0x80;
705 }
706 
707 
708 bool
709 TrackerString::IsStartOfGlyph(char ch) const
710 {
711 	return (ch & 0xC0) == 0xC0;
712 }
713