xref: /haiku/src/kits/tracker/TrackerString.cpp (revision 22b073d41fb677ebe56994a1587f6caabea84441)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include "TrackerString.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 
40 
41 //	#pragma mark - TrackerString
42 
43 
44 TrackerString::TrackerString()
45 {
46 }
47 
48 
49 TrackerString::TrackerString(const char* string)
50 	:
51 	BString(string)
52 {
53 }
54 
55 
56 TrackerString::TrackerString(const TrackerString &string)
57 	:
58 	BString(string)
59 {
60 }
61 
62 
63 TrackerString::TrackerString(const char* string, int32 maxLength)
64 	:
65 	BString(string, maxLength)
66 {
67 }
68 
69 
70 TrackerString::~TrackerString()
71 {
72 }
73 
74 
75 bool
76 TrackerString::Matches(const char* string, bool caseSensitivity,
77 	TrackerStringExpressionType expressionType) const
78 {
79 	switch (expressionType) {
80 		default:
81 		case kNone:
82 			return false;
83 
84 		case kStartsWith:
85 			return StartsWith(string, caseSensitivity);
86 
87 		case kEndsWith:
88 			return EndsWith(string, caseSensitivity);
89 
90 		case kContains:
91 			return Contains(string, caseSensitivity);
92 
93 		case kGlobMatch:
94 			return MatchesGlob(string, caseSensitivity);
95 
96 		case kRegexpMatch:
97 			return MatchesRegExp(string, caseSensitivity);
98 	}
99 }
100 
101 
102 bool
103 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
104 {
105 	BString patternString(pattern);
106 	BString textString(String());
107 
108 	if (caseSensitivity == false) {
109 		patternString.ToLower();
110 		textString.ToLower();
111 	}
112 
113 	RegExp expression(patternString);
114 
115 	if (expression.InitCheck() != B_OK)
116 		return false;
117 
118 	return expression.Matches(textString);
119 }
120 
121 
122 bool
123 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
124 {
125 	return StringMatchesPattern(String(), string, caseSensitivity);
126 }
127 
128 
129 bool
130 TrackerString::EndsWith(const char* string, bool caseSensitivity) const
131 {
132 	// If "string" is longer than "this",
133 	// we should simply return false
134 	int32 position = Length() - (int32)strlen(string);
135 	if (position < 0)
136 		return false;
137 
138 	if (caseSensitivity)
139 		return FindLast(string) == position;
140 	else
141 		return IFindLast(string) == position;
142 }
143 
144 
145 bool
146 TrackerString::StartsWith(const char* string, bool caseSensitivity) const
147 {
148 	if (caseSensitivity)
149 		return FindFirst(string) == 0;
150 	else
151 		return IFindFirst(string) == 0;
152 }
153 
154 
155 bool
156 TrackerString::Contains(const char* string, bool caseSensitivity) const
157 {
158 	if (caseSensitivity)
159 		return FindFirst(string) > -1;
160 	else
161 		return IFindFirst(string) > -1;
162 }
163 
164 
165 // About the ?Find* functions:
166 // The leading star here has been compliance with BString,
167 // simplicity and performance. Therefore unncessary copying
168 // has been avoided, as unncessary function calls.
169 // The copying has been avoided by implementing the
170 // ?Find*(const char*) functions rather than
171 // the ?Find*(TrackerString &) functions.
172 // The function calls has been avoided by
173 // inserting a check on the first character
174 // before calling the str*cmp functions.
175 
176 
177 int32
178 TrackerString::FindFirst(const BString &string) const
179 {
180 	return FindFirst(string.String(), 0);
181 }
182 
183 
184 int32
185 TrackerString::FindFirst(const char* string) const
186 {
187 	return FindFirst(string, 0);
188 }
189 
190 
191 int32
192 TrackerString::FindFirst(const BString &string, int32 fromOffset) const
193 {
194 	return FindFirst(string.String(), fromOffset);
195 }
196 
197 
198 int32
199 TrackerString::FindFirst(const char* string, int32 fromOffset) const
200 {
201 	if (string == NULL)
202 		return -1;
203 
204 	int32 length = Length();
205 	uint32 stringLength = strlen(string);
206 
207 	// The following two checks are required to be compatible
208 	// with BString:
209 	if (length <= 0)
210 		return -1;
211 
212 	if (stringLength == 0)
213 		return fromOffset;
214 
215 	int32 stop = length - static_cast<int32>(stringLength);
216 	int32 start = MAX(0, MIN(fromOffset, stop));
217 	int32 position = -1;
218 
219 	for (int32 i = start; i <= stop; i++) {
220 		if (string[0] == ByteAt(i)) {
221 			// This check is to avoid mute str*cmp() calls. Performance.
222 			if (strncmp(string, String() + i, stringLength) == 0) {
223 				position = i;
224 				break;
225 			}
226 		}
227 	}
228 
229 	return position;
230 }
231 
232 
233 int32
234 TrackerString::FindFirst(char ch) const
235 {
236 	char string[2] = {ch, '\0'};
237 	return FindFirst(string, 0);
238 }
239 
240 
241 int32
242 TrackerString::FindFirst(char ch, int32 fromOffset) const
243 {
244 	char string[2] = {ch, '\0'};
245 	return FindFirst(string, fromOffset);
246 }
247 
248 
249 int32
250 TrackerString::FindLast(const BString &string) const
251 {
252 	return FindLast(string.String(), Length() - 1);
253 }
254 
255 
256 int32
257 TrackerString::FindLast(const char* string) const
258 {
259 	return FindLast(string, Length() - 1);
260 }
261 
262 
263 int32
264 TrackerString::FindLast(const BString &string, int32 beforeOffset) const
265 {
266 	return FindLast(string.String(), beforeOffset);
267 }
268 
269 
270 int32
271 TrackerString::FindLast(const char* string, int32 beforeOffset) const
272 {
273 	if (string == NULL)
274 		return -1;
275 
276 	int32 length = Length();
277 	uint32 stringLength = strlen(string);
278 
279 	// The following two checks are required to be compatible
280 	// with BString:
281 	if (length <= 0)
282 		return -1;
283 
284 	if (stringLength == 0)
285 		return beforeOffset;
286 
287 	int32 start = MIN(beforeOffset,
288 		length - static_cast<int32>(stringLength));
289 	int32 stop = 0;
290 	int32 position = -1;
291 
292 	for (int32 i = start; i >= stop; i--) {
293 		if (string[0] == ByteAt(i)) {
294 			// This check is to avoid mute str*cmp() calls. Performance.
295 			if (strncmp(string, String() + i, stringLength) == 0) {
296 				position = i;
297 				break;
298 			}
299 		}
300 	}
301 
302 	return position;
303 }
304 
305 
306 int32
307 TrackerString::FindLast(char ch) const
308 {
309 	char string[2] = {ch, '\0'};
310 	return FindLast(string, Length() - 1);
311 }
312 
313 
314 int32
315 TrackerString::FindLast(char ch, int32 beforeOffset) const
316 {
317 	char string[2] = {ch, '\0'};
318 	return FindLast(string, beforeOffset);
319 }
320 
321 
322 int32
323 TrackerString::IFindFirst(const BString &string) const
324 {
325 	return IFindFirst(string.String(), 0);
326 }
327 
328 
329 int32
330 TrackerString::IFindFirst(const char* string) const
331 {
332 	return IFindFirst(string, 0);
333 }
334 
335 
336 int32
337 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const
338 {
339 	return IFindFirst(string.String(), fromOffset);
340 }
341 
342 
343 int32
344 TrackerString::IFindFirst(const char* string, int32 fromOffset) const
345 {
346 	if (string == NULL)
347 		return -1;
348 
349 	int32 length = Length();
350 	uint32 stringLength = strlen(string);
351 
352 	// The following two checks are required to be compatible
353 	// with BString:
354 	if (length <= 0)
355 		return -1;
356 
357 	if (stringLength == 0)
358 		return fromOffset;
359 
360 	int32 stop = length - static_cast<int32>(stringLength);
361 	int32 start = MAX(0, MIN(fromOffset, stop));
362 	int32 position = -1;
363 
364 	for (int32 i = start; i <= stop; i++) {
365 		if (tolower(string[0]) == tolower(ByteAt(i))) {
366 			// This check is to avoid mute str*cmp() calls. Performance.
367 			if (strncasecmp(string, String() + i, stringLength) == 0) {
368 				position = i;
369 				break;
370 			}
371 		}
372 	}
373 
374 	return position;
375 }
376 
377 
378 int32
379 TrackerString::IFindLast(const BString &string) const
380 {
381 	return IFindLast(string.String(), Length() - 1);
382 }
383 
384 
385 int32
386 TrackerString::IFindLast(const char* string) const
387 {
388 	return IFindLast(string, Length() - 1);
389 }
390 
391 
392 int32
393 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const
394 {
395 	return IFindLast(string.String(), beforeOffset);
396 }
397 
398 
399 int32
400 TrackerString::IFindLast(const char* string, int32 beforeOffset) const
401 {
402 	if (string == NULL)
403 		return -1;
404 
405 	int32 length = Length();
406 	uint32 stringLength = strlen(string);
407 
408 	// The following two checks are required to be compatible
409 	// with BString:
410 	if (length <= 0)
411 		return -1;
412 
413 	if (stringLength == 0)
414 		return beforeOffset;
415 
416 	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
417 	int32 stop = 0;
418 	int32 position = -1;
419 
420 	for (int32 i = start; i >= stop; i--) {
421 		if (tolower(string[0]) == tolower(ByteAt(i))) {
422 			// This check is to avoid mute str*cmp() calls. Performance.
423 			if (strncasecmp(string, String() + i, stringLength) == 0) {
424 				position = i;
425 				break;
426 			}
427 		}
428 	}
429 
430 	return position;
431 }
432 
433 
434 // MatchesBracketExpression() assumes 'pattern' to point to the
435 // character following the initial '[' in a bracket expression.
436 // The reason is that an encountered '[' will be taken literally.
437 // (Makes it possible to match a '[' with the expression '[[]').
438 bool
439 TrackerString::MatchesBracketExpression(const char* string,
440 	const char* pattern, bool caseSensitivity) const
441 {
442 	bool GlyphMatch = IsStartOfGlyph(string[0]);
443 
444 	if (IsInsideGlyph(string[0]))
445 		return false;
446 
447 	char testChar = ConditionalToLower(string[0], caseSensitivity);
448 	bool match = false;
449 
450 	bool inverse = *pattern == '^' || *pattern == '!';
451 		// We allow both ^ and ! as a initial inverting character.
452 
453 	if (inverse)
454 		pattern++;
455 
456 	while (!match && *pattern != ']' && *pattern != '\0') {
457 		switch (*pattern) {
458 			case '-':
459 			{
460 				char start = ConditionalToLower(*(pattern - 1),
461 					caseSensitivity);
462 				char stop = ConditionalToLower(*(pattern + 1),
463 					caseSensitivity);
464 
465 				if (IsGlyph(start) || IsGlyph(stop))
466 					return false;
467 						// Not a valid range!
468 
469 				if ((islower(start) && islower(stop))
470 					|| (isupper(start) && isupper(stop))
471 					|| (isdigit(start) && isdigit(stop))) {
472 					// Make sure 'start' and 'stop' are of the same type.
473 					match = start <= testChar && testChar <= stop;
474 				} else {
475 					// If no valid range, we've got a syntax error.
476 					return false;
477 				}
478 
479 				break;
480 			}
481 
482 			default:
483 				if (GlyphMatch)
484 					match = UTF8CharsAreEqual(string, pattern);
485 				else
486 					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
487 				break;
488 		}
489 
490 		if (!match) {
491 			pattern++;
492 			if (IsInsideGlyph(pattern[0]))
493 				pattern = MoveToEndOfGlyph(pattern);
494 		}
495 	}
496 
497 	// Consider an unmatched bracket a failure
498 	// (i.e. when detecting a '\0' instead of a ']'.)
499 	if (*pattern == '\0')
500 		return false;
501 
502 	return (match ^ inverse) != 0;
503 }
504 
505 
506 bool
507 TrackerString::StringMatchesPattern(const char* string, const char* pattern,
508 	bool caseSensitivity) const
509 {
510 	// One could do this dynamically, counting the number of *'s,
511 	// but then you have to free them at every exit of this
512 	// function, which is awkward and ugly.
513 	const int32 kWildCardMaximum = 100;
514 	const char* pStorage[kWildCardMaximum];
515 	const char* sStorage[kWildCardMaximum];
516 
517 	int32 patternLevel = 0;
518 
519 	if (string == NULL || pattern == NULL)
520 		return false;
521 
522 	while (*pattern != '\0') {
523 		switch (*pattern) {
524 			case '?':
525 				pattern++;
526 				string++;
527 				if (IsInsideGlyph(string[0]))
528 					string = MoveToEndOfGlyph(string);
529 				break;
530 
531 			case '*':
532 			{
533 				// Collapse any ** and *? constructions:
534 				while (*pattern == '*' || *pattern == '?') {
535 					pattern++;
536 					if (*pattern == '?' && string != '\0') {
537 						string++;
538 						if (IsInsideGlyph(string[0]))
539 							string = MoveToEndOfGlyph(string);
540 					}
541 				}
542 
543 				if (*pattern == '\0') {
544 					// An ending * matches all strings.
545 					return true;
546 				}
547 
548 				bool match = false;
549 				const char* pBefore = pattern - 1;
550 
551 				if (*pattern == '[') {
552 					pattern++;
553 
554 					while (!match && *string != '\0') {
555 						match = MatchesBracketExpression(string++, pattern,
556 							caseSensitivity);
557 					}
558 
559 					while (*pattern != ']' && *pattern != '\0') {
560 						// Skip the rest of the bracket:
561 						pattern++;
562 					}
563 
564 					if (*pattern == '\0') {
565 						// Failure if no closing bracket;
566 						return false;
567 					}
568 				} else {
569 					// No bracket, just one character:
570 					while (!match && *string != '\0') {
571 						if (IsGlyph(string[0]))
572 							match = UTF8CharsAreEqual(string++, pattern);
573 						else {
574 							match = CharsAreEqual(*string++, *pattern,
575 								caseSensitivity);
576 						}
577 					}
578 				}
579 
580 				if (!match)
581 					return false;
582 				else {
583 					pStorage[patternLevel] = pBefore;
584 					if (IsInsideGlyph(string[0]))
585 						string = MoveToEndOfGlyph(string);
586 
587 					sStorage[patternLevel++] = string;
588 					if (patternLevel > kWildCardMaximum)
589 						return false;
590 
591 					pattern++;
592 					if (IsInsideGlyph(pattern[0]))
593 						pattern = MoveToEndOfGlyph(pattern);
594 				}
595 				break;
596 			}
597 
598 			case '[':
599 				pattern++;
600 
601 				if (!MatchesBracketExpression(string, pattern,
602 						caseSensitivity)) {
603 					if (patternLevel > 0) {
604 						pattern = pStorage[--patternLevel];
605 						string = sStorage[patternLevel];
606 					} else
607 						return false;
608 				} else {
609 					// Skip the rest of the bracket:
610 					while (*pattern != ']' && *pattern != '\0')
611 						pattern++;
612 
613 					// Failure if no closing bracket;
614 					if (*pattern == '\0')
615 						return false;
616 
617 					string++;
618 					if (IsInsideGlyph(string[0]))
619 						string = MoveToEndOfGlyph(string);
620 					pattern++;
621 				}
622 				break;
623 
624 			default:
625 			{
626 				bool equal = false;
627 				if (IsGlyph(string[0]))
628 					equal = UTF8CharsAreEqual(string, pattern);
629 				else
630 					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
631 
632 				if (equal) {
633 					pattern++;
634 					if (IsInsideGlyph(pattern[0]))
635 						pattern = MoveToEndOfGlyph(pattern);
636 					string++;
637 					if (IsInsideGlyph(string[0]))
638 						string = MoveToEndOfGlyph(string);
639 				} else if (patternLevel > 0) {
640 						pattern = pStorage[--patternLevel];
641 						string = sStorage[patternLevel];
642 				} else
643 					return false;
644 				break;
645 			}
646 		}
647 
648 		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
649 			pattern = pStorage[--patternLevel];
650 			string = sStorage[patternLevel];
651 		}
652 	}
653 
654 	return *string == '\0' && *pattern == '\0';
655 }
656 
657 
658 bool
659 TrackerString::UTF8CharsAreEqual(const char* string1,
660 	const char* string2) const
661 {
662 	const char* s1 = string1;
663 	const char* s2 = string2;
664 
665 	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
666 		s1++;
667 		s2++;
668 
669 		while (IsInsideGlyph(*s1) && *s1 == *s2) {
670 			s1++;
671 			s2++;
672 		}
673 
674 		return !IsInsideGlyph(*s1)
675 			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
676 	} else
677 		return false;
678 }
679 
680 
681 const char*
682 TrackerString::MoveToEndOfGlyph(const char* string) const
683 {
684 	const char* ptr = string;
685 
686 	while (IsInsideGlyph(*ptr))
687 		ptr++;
688 
689 	return ptr;
690 }
691 
692 
693 bool
694 TrackerString::IsGlyph(char ch) const
695 {
696 	return (ch & 0x80) == 0x80;
697 }
698 
699 
700 bool
701 TrackerString::IsInsideGlyph(char ch) const
702 {
703 	return (ch & 0xC0) == 0x80;
704 }
705 
706 
707 bool
708 TrackerString::IsStartOfGlyph(char ch) const
709 {
710 	return (ch & 0xC0) == 0xC0;
711 }
712