xref: /haiku/src/kits/tracker/TrackerString.cpp (revision 2f470aec1c92ce6917b8a903e343795dc77af41f)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include "TrackerString.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 
40 TrackerString::TrackerString()
41 {
42 }
43 
44 
45 TrackerString::TrackerString(const char *string)
46 	:	BString(string)
47 {
48 }
49 
50 
51 TrackerString::TrackerString(const TrackerString &string)
52 	:	BString(string)
53 {
54 }
55 
56 
57 TrackerString::TrackerString(const char *string, int32 maxLength)
58 	:	BString(string, maxLength)
59 {
60 }
61 
62 
63 TrackerString::~TrackerString()
64 {
65 }
66 
67 
68 bool
69 TrackerString::Matches(const char *string, bool caseSensitivity,
70 	TrackerStringExpressionType expressionType) const
71 {
72 	switch (expressionType) {
73 		default:
74 		case kNone:
75 			return false;
76 
77 		case kStartsWith:
78 			return StartsWith(string, caseSensitivity);
79 
80 		case kEndsWith:
81 			return EndsWith(string, caseSensitivity);
82 
83 		case kContains:
84 			return Contains(string, caseSensitivity);
85 
86 		case kGlobMatch:
87 			return MatchesGlob(string, caseSensitivity);
88 
89 		case kRegexpMatch:
90 			return MatchesRegExp(string, caseSensitivity);
91 	}
92 }
93 
94 
95 bool
96 TrackerString::MatchesRegExp(const char *pattern, bool caseSensitivity) const
97 {
98 	BString patternString(pattern);
99 	BString textString(String());
100 
101 	if (caseSensitivity == false) {
102 		patternString.ToLower();
103 		textString.ToLower();
104 	}
105 
106 	RegExp expression(patternString);
107 
108 	if (expression.InitCheck() != B_OK)
109 		return false;
110 
111 	return expression.Matches(textString);
112 }
113 
114 
115 bool
116 TrackerString::MatchesGlob(const char *string, bool caseSensitivity) const
117 {
118 	return StringMatchesPattern(String(), string, caseSensitivity);
119 }
120 
121 
122 bool
123 TrackerString::EndsWith(const char *string, bool caseSensitivity) const
124 {
125 	// If "string" is longer than "this",
126 	// we should simply return false
127 	int32 position = Length() - (int32)strlen(string);
128 	if (position < 0)
129 		return false;
130 
131 	if (caseSensitivity)
132 		return FindLast(string) == position;
133 	else
134 		return IFindLast(string) == position;
135 }
136 
137 
138 bool
139 TrackerString::StartsWith(const char *string, bool caseSensitivity) const
140 {
141 	if (caseSensitivity)
142 		return FindFirst(string) == 0;
143 	else
144 		return IFindFirst(string) == 0;
145 }
146 
147 
148 bool
149 TrackerString::Contains(const char *string, bool caseSensitivity) const
150 {
151 	if (caseSensitivity)
152 		return FindFirst(string) > -1;
153 	else
154 		return IFindFirst(string) > -1;
155 }
156 
157 
158 // About the ?Find* functions:
159 // The leading star here has been compliance with BString,
160 // simplicity and performance. Therefore unncessary copying
161 // has been avoided, as unncessary function calls.
162 // The copying has been avoided by implementing the
163 // ?Find*(const char*) functions rather than
164 // the ?Find*(TrackerString &) functions.
165 // The function calls has been avoided by
166 // inserting a check on the first character
167 // before calling the str*cmp functions.
168 
169 
170 int32
171 TrackerString::FindFirst(const BString &string) const
172 {
173 	return FindFirst(string.String(), 0);
174 }
175 
176 
177 int32
178 TrackerString::FindFirst(const char *string) const
179 {
180 	return FindFirst(string, 0);
181 }
182 
183 
184 int32
185 TrackerString::FindFirst(const BString &string, int32 fromOffset) const
186 {
187 	return FindFirst(string.String(), fromOffset);
188 }
189 
190 
191 int32
192 TrackerString::FindFirst(const char *string, int32 fromOffset) const
193 {
194 	if (!string)
195 		return -1;
196 
197 	int32 length = Length();
198 	uint32 stringLength = strlen(string);
199 
200 	// The following two checks are required to be compatible
201 	// with BString:
202 	if (length <= 0)
203 		return -1;
204 
205 	if (stringLength == 0)
206 		return fromOffset;
207 
208 	int32 stop = length - static_cast<int32>(stringLength);
209 	int32 start = MAX(0, MIN(fromOffset, stop));
210 	int32 position = -1;
211 
212 
213 	for (int32 i = start; i <= stop; i++)
214 		if (string[0] == ByteAt(i))
215 			// This check is to avoid mute str*cmp() calls. Performance.
216 			if (strncmp(string, String() + i, stringLength) == 0) {
217 				position = i;
218 				break;
219 			}
220 
221 	return position;
222 }
223 
224 
225 int32
226 TrackerString::FindFirst(char ch) const
227 {
228 	char string[2] = {ch, '\0'};
229 	return FindFirst(string, 0);
230 }
231 
232 
233 int32
234 TrackerString::FindFirst(char ch, int32 fromOffset) const
235 {
236 	char string[2] = {ch, '\0'};
237 	return FindFirst(string, fromOffset);
238 }
239 
240 
241 int32
242 TrackerString::FindLast(const BString &string) const
243 {
244 	return FindLast(string.String(), Length() - 1);
245 }
246 
247 
248 int32
249 TrackerString::FindLast(const char *string) const
250 {
251 	return FindLast(string, Length() - 1);
252 }
253 
254 
255 int32
256 TrackerString::FindLast(const BString &string, int32 beforeOffset) const
257 {
258 	return FindLast(string.String(), beforeOffset);
259 }
260 
261 
262 int32
263 TrackerString::FindLast(const char *string, int32 beforeOffset) const
264 {
265 	if (!string)
266 		return -1;
267 
268 	int32 length = Length();
269 	uint32 stringLength = strlen(string);
270 
271 	// The following two checks are required to be compatible
272 	// with BString:
273 	if (length <= 0)
274 		return -1;
275 
276 	if (stringLength == 0)
277 		return beforeOffset;
278 
279 	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
280 	int32 stop = 0;
281 	int32 position = -1;
282 
283 	for (int32 i = start; i >= stop; i--)
284 		if (string[0] == ByteAt(i))
285 			// This check is to avoid mute str*cmp() calls. Performance.
286 			if (strncmp(string, String() + i, stringLength) == 0) {
287 				position = i;
288 				break;
289 			}
290 
291 	return position;
292 }
293 
294 
295 int32
296 TrackerString::FindLast(char ch) const
297 {
298 	char string[2] = {ch, '\0'};
299 	return FindLast(string, Length() - 1);
300 }
301 
302 
303 int32
304 TrackerString::FindLast(char ch, int32 beforeOffset) const
305 {
306 	char string[2] = {ch, '\0'};
307 	return FindLast(string, beforeOffset);
308 }
309 
310 
311 int32
312 TrackerString::IFindFirst(const BString &string) const
313 {
314 	return IFindFirst(string.String(), 0);
315 }
316 
317 
318 int32
319 TrackerString::IFindFirst(const char *string) const
320 {
321 	return IFindFirst(string, 0);
322 }
323 
324 
325 int32
326 TrackerString::IFindFirst(const BString &string, int32 fromOffset) const
327 {
328 	return IFindFirst(string.String(), fromOffset);
329 }
330 
331 
332 int32
333 TrackerString::IFindFirst(const char *string, int32 fromOffset) const
334 {
335 	if (!string)
336 		return -1;
337 
338 	int32 length = Length();
339 	uint32 stringLength = strlen(string);
340 
341 	// The following two checks are required to be compatible
342 	// with BString:
343 	if (length <= 0)
344 		return -1;
345 
346 	if (stringLength == 0)
347 		return fromOffset;
348 
349 	int32 stop = length - static_cast<int32>(stringLength);
350 	int32 start = MAX(0, MIN(fromOffset, stop));
351 	int32 position = -1;
352 
353 	for (int32 i = start; i <= stop; i++)
354 		if (tolower(string[0]) == tolower(ByteAt(i)))
355 			// This check is to avoid mute str*cmp() calls. Performance.
356 			if (strncasecmp(string, String() + i, stringLength) == 0) {
357 				position = i;
358 				break;
359 			}
360 
361 	return position;
362 }
363 
364 
365 int32
366 TrackerString::IFindLast(const BString &string) const
367 {
368 	return IFindLast(string.String(), Length() - 1);
369 }
370 
371 
372 int32
373 TrackerString::IFindLast(const char *string) const
374 {
375 	return IFindLast(string, Length() - 1);
376 }
377 
378 
379 int32
380 TrackerString::IFindLast(const BString &string, int32 beforeOffset) const
381 {
382 	return IFindLast(string.String(), beforeOffset);
383 }
384 
385 
386 int32
387 TrackerString::IFindLast(const char *string, int32 beforeOffset) const
388 {
389 	if (!string)
390 		return -1;
391 
392 	int32 length = Length();
393 	uint32 stringLength = strlen(string);
394 
395 	// The following two checks are required to be compatible
396 	// with BString:
397 	if (length <= 0)
398 		return -1;
399 
400 	if (stringLength == 0)
401 		return beforeOffset;
402 
403 	int32 start = MIN(beforeOffset, length - static_cast<int32>(stringLength));
404 	int32 stop = 0;
405 	int32 position = -1;
406 
407 	for (int32 i = start; i >= stop; i--)
408 		if (tolower(string[0]) == tolower(ByteAt(i)))
409 			// This check is to avoid mute str*cmp() calls. Performance.
410 			if (strncasecmp(string, String() + i, stringLength) == 0) {
411 				position = i;
412 				break;
413 			}
414 
415 	return position;
416 }
417 
418 
419 // MatchesBracketExpression() assumes 'pattern' to point to the
420 // character following the initial '[' in a bracket expression.
421 // The reason is that an encountered '[' will be taken literally.
422 // (Makes it possible to match a '[' with the expression '[[]').
423 bool
424 TrackerString::MatchesBracketExpression(const char *string, const char *pattern,
425 	bool caseSensitivity) const
426 {
427 	bool GlyphMatch = IsStartOfGlyph(string[0]);
428 
429 	if (IsInsideGlyph(string[0]))
430 		return false;
431 
432 	char testChar = ConditionalToLower(string[0], caseSensitivity);
433 	bool match = false;
434 
435 	bool inverse = *pattern == '^' || *pattern == '!';
436 		// We allow both ^ and ! as a initial inverting character.
437 
438 	if (inverse)
439 		pattern++;
440 
441 	while (!match && *pattern != ']' && *pattern != '\0') {
442 		switch (*pattern) {
443 			case '-':
444 				{
445 					char start = ConditionalToLower(*(pattern - 1), caseSensitivity),
446 						stop = ConditionalToLower(*(pattern + 1), caseSensitivity);
447 
448 					if (IsGlyph(start) || IsGlyph(stop))
449 						return false;
450 							// Not a valid range!
451 
452 					if (islower(start) && islower(stop)
453 						|| isupper(start) && isupper(stop)
454 						|| isdigit(start) && isdigit(stop))
455 							// Make sure 'start' and 'stop' are of the same type.
456 						match = start <= testChar && testChar <= stop;
457 					else
458 						return false;
459 							// If no valid range, we've got a syntax error.
460 				}
461 				break;
462 
463 			default:
464 				if (GlyphMatch)
465 					match = UTF8CharsAreEqual(string, pattern);
466 				else
467 					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
468 				break;
469 		}
470 
471 		if (!match) {
472 			pattern++;
473 			if (IsInsideGlyph(pattern[0]))
474 				pattern = MoveToEndOfGlyph(pattern);
475 		}
476 	}
477 	// Consider an unmatched bracket a failure
478 	// (i.e. when detecting a '\0' instead of a ']'.)
479 	if (*pattern == '\0')
480 		return false;
481 
482 	return (match ^ inverse) != 0;
483 }
484 
485 
486 bool
487 TrackerString::StringMatchesPattern(const char *string, const char *pattern,
488 	bool caseSensitivity) const
489 {
490 	// One could do this dynamically, counting the number of *'s,
491 	// but then you have to free them at every exit of this
492 	// function, which is awkward and ugly.
493 	const int32 kWildCardMaximum = 100;
494 	const char *pStorage[kWildCardMaximum];
495 	const char *sStorage[kWildCardMaximum];
496 
497 	int32 patternLevel = 0;
498 
499 	if (string == NULL || pattern == NULL)
500 		return false;
501 
502 	while (*pattern != '\0') {
503 
504 		switch (*pattern) {
505 
506 			case '?':
507 				pattern++;
508 				string++;
509 				if (IsInsideGlyph(string[0]))
510 					string = MoveToEndOfGlyph(string);
511 				break;
512 
513 			case '*':
514 				{
515 					// Collapse any ** and *? constructions:
516 					while (*pattern == '*' || *pattern == '?') {
517 						pattern++;
518 						if (*pattern == '?' && string != '\0') {
519 							string++;
520 							if (IsInsideGlyph(string[0]))
521 								string = MoveToEndOfGlyph(string);
522 						}
523 					}
524 
525 					if (*pattern == '\0')
526 						// An ending * matches all strings.
527 						return true;
528 
529 					bool match = false;
530 					const char *pBefore = pattern - 1;
531 
532 					if (*pattern == '[') {
533 						pattern++;
534 
535 						while (!match && *string != '\0')
536 							match = MatchesBracketExpression(string++, pattern, caseSensitivity);
537 
538 						// Skip the rest of the bracket:
539 						while (*pattern != ']' && *pattern != '\0')
540 							pattern++;
541 
542 						// Failure if no closing bracket;
543 						if (*pattern == '\0')
544 							return false;
545 
546 					}
547 					else {
548 						// No bracket, just one character:
549 						while (!match && *string != '\0') {
550 							if (IsGlyph(string[0]))
551 								match = UTF8CharsAreEqual(string++, pattern);
552 							else
553 								match = CharsAreEqual(*string++, *pattern, caseSensitivity);
554 						}
555 					}
556 					if (!match)
557 							return false;
558 					else {
559 						pStorage[patternLevel] = pBefore;
560 						if (IsInsideGlyph(string[0]))
561 							string = MoveToEndOfGlyph(string);
562 						sStorage[patternLevel++] = string;
563 						if (patternLevel > kWildCardMaximum)
564 							return false;
565 						pattern++;
566 						if (IsInsideGlyph(pattern[0]))
567 							pattern = MoveToEndOfGlyph(pattern);
568 					}
569 				}
570 				break;
571 
572 			case '[':
573 				pattern++;
574 
575 				if (!MatchesBracketExpression(string, pattern, caseSensitivity))
576 					if (patternLevel > 0) {
577 						pattern = pStorage[--patternLevel];
578 						string = sStorage[patternLevel];
579 					} else
580 						return false;
581 				else {
582 
583 					// Skip the rest of the bracket:
584 					while (*pattern != ']' && *pattern != '\0')
585 						pattern++;
586 
587 					// Failure if no closing bracket;
588 					if (*pattern == '\0')
589 						return false;
590 
591 					string++;
592 					if (IsInsideGlyph(string[0]))
593 						string = MoveToEndOfGlyph(string);
594 					pattern++;
595 				}
596 				break;
597 
598 			default:
599 				{
600 					bool equal = false;
601 					if (IsGlyph(string[0]))
602 						equal = UTF8CharsAreEqual(string, pattern);
603 					else
604 						equal = CharsAreEqual(*string, *pattern, caseSensitivity);
605 
606 					if (equal) {
607 						pattern++;
608 						if (IsInsideGlyph(pattern[0]))
609 							pattern = MoveToEndOfGlyph(pattern);
610 						string++;
611 						if (IsInsideGlyph(string[0]))
612 							string = MoveToEndOfGlyph(string);
613 					} else if (patternLevel > 0) {
614 							pattern = pStorage[--patternLevel];
615 							string = sStorage[patternLevel];
616 					} else
617 						return false;
618 				}
619 				break;
620 		}
621 
622 		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
623 			pattern = pStorage[--patternLevel];
624 			string = sStorage[patternLevel];
625 		}
626 	}
627 
628 	return *string == '\0' && *pattern == '\0';
629 }
630 
631 
632 bool
633 TrackerString::UTF8CharsAreEqual(const char *string1, const char *string2) const
634 {
635 	const char *s1 = string1;
636 	const char *s2 = string2;
637 
638 	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
639 		s1++;
640 		s2++;
641 
642 		while (IsInsideGlyph(*s1) && *s1 == *s2) {
643 			s1++;
644 			s2++;
645 		}
646 
647 		return !IsInsideGlyph(*s1) && !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
648 
649 	} else
650 		return false;
651 }
652 
653 
654 const char *
655 TrackerString::MoveToEndOfGlyph(const char *string) const
656 {
657 	const char *ptr = string;
658 
659 	while (IsInsideGlyph(*ptr))
660 		ptr++;
661 
662 	return ptr;
663 }
664 
665 
666 bool
667 TrackerString::IsGlyph(char ch) const
668 {
669 	return (ch & 0x80) == 0x80;
670 }
671 
672 
673 bool
674 TrackerString::IsInsideGlyph(char ch) const
675 {
676 	return (ch & 0xC0) == 0x80;
677 }
678 
679 
680 bool
681 TrackerString::IsStartOfGlyph(char ch) const
682 {
683 	return (ch & 0xC0) == 0xC0;
684 }
685