xref: /haiku/src/kits/tracker/TrackerString.cpp (revision e019775491cc7be55c4f2269fe8792b0d204cf09)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include "TrackerString.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 
41 
42 //	#pragma mark - TrackerString
43 
44 
TrackerString()45 TrackerString::TrackerString()
46 {
47 }
48 
49 
TrackerString(const char * string)50 TrackerString::TrackerString(const char* string)
51 	:
52 	BString(string)
53 {
54 }
55 
56 
TrackerString(const TrackerString & string)57 TrackerString::TrackerString(const TrackerString &string)
58 	:
59 	BString(string)
60 {
61 }
62 
63 
TrackerString(const char * string,int32 maxLength)64 TrackerString::TrackerString(const char* string, int32 maxLength)
65 	:
66 	BString(string, maxLength)
67 {
68 }
69 
70 
~TrackerString()71 TrackerString::~TrackerString()
72 {
73 }
74 
75 
76 bool
Matches(const char * string,bool caseSensitivity,TrackerStringExpressionType expressionType) const77 TrackerString::Matches(const char* string, bool caseSensitivity,
78 	TrackerStringExpressionType expressionType) const
79 {
80 	switch (expressionType) {
81 		default:
82 		case kNone:
83 			return false;
84 
85 		case kStartsWith:
86 			return StartsWith(string, caseSensitivity);
87 
88 		case kEndsWith:
89 			return EndsWith(string, caseSensitivity);
90 
91 		case kContains:
92 			return Contains(string, caseSensitivity);
93 
94 		case kGlobMatch:
95 			return MatchesGlob(string, caseSensitivity);
96 
97 		case kRegexpMatch:
98 			return MatchesRegExp(string, caseSensitivity);
99 	}
100 }
101 
102 
103 bool
MatchesRegExp(const char * pattern,bool caseSensitivity) const104 TrackerString::MatchesRegExp(const char* pattern, bool caseSensitivity) const
105 {
106 	BString patternString(pattern);
107 	BString textString(String());
108 
109 	if (caseSensitivity == false) {
110 		patternString.ToLower();
111 		textString.ToLower();
112 	}
113 
114 	RegExp expression(patternString);
115 
116 	if (expression.InitCheck() != B_OK)
117 		return false;
118 
119 	return expression.Matches(textString);
120 }
121 
122 
123 bool
MatchesGlob(const char * string,bool caseSensitivity) const124 TrackerString::MatchesGlob(const char* string, bool caseSensitivity) const
125 {
126 	return StringMatchesPattern(String(), string, caseSensitivity);
127 }
128 
129 
130 bool
EndsWith(const char * string,bool caseSensitivity) const131 TrackerString::EndsWith(const char* string, bool caseSensitivity) const
132 {
133 	// If "string" is longer than "this",
134 	// we should simply return false
135 	int32 position = Length() - (int32)strlen(string);
136 	if (position < 0)
137 		return false;
138 
139 	if (caseSensitivity)
140 		return FindLast(string) == position;
141 	else
142 		return IFindLast(string) == position;
143 }
144 
145 
146 bool
StartsWith(const char * string,bool caseSensitivity) const147 TrackerString::StartsWith(const char* string, bool caseSensitivity) const
148 {
149 	if (caseSensitivity)
150 		return FindFirst(string) == 0;
151 	else
152 		return IFindFirst(string) == 0;
153 }
154 
155 
156 bool
Contains(const char * string,bool caseSensitivity) const157 TrackerString::Contains(const char* string, bool caseSensitivity) const
158 {
159 	if (caseSensitivity)
160 		return FindFirst(string) > -1;
161 	else
162 		return IFindFirst(string) > -1;
163 }
164 
165 
166 // MatchesBracketExpression() assumes 'pattern' to point to the
167 // character following the initial '[' in a bracket expression.
168 // The reason is that an encountered '[' will be taken literally.
169 // (Makes it possible to match a '[' with the expression '[[]').
170 bool
MatchesBracketExpression(const char * string,const char * pattern,bool caseSensitivity) const171 TrackerString::MatchesBracketExpression(const char* string,
172 	const char* pattern, bool caseSensitivity) const
173 {
174 	bool GlyphMatch = IsStartOfGlyph(string[0]);
175 
176 	if (IsInsideGlyph(string[0]))
177 		return false;
178 
179 	char testChar = ConditionalToLower(string[0], caseSensitivity);
180 	bool match = false;
181 
182 	bool inverse = *pattern == '^' || *pattern == '!';
183 		// We allow both ^ and ! as a initial inverting character.
184 
185 	if (inverse)
186 		pattern++;
187 
188 	while (!match && *pattern != ']' && *pattern != '\0') {
189 		switch (*pattern) {
190 			case '-':
191 			{
192 				char start = ConditionalToLower(*(pattern - 1),
193 					caseSensitivity);
194 				char stop = ConditionalToLower(*(pattern + 1),
195 					caseSensitivity);
196 
197 				if (IsGlyph(start) || IsGlyph(stop))
198 					return false;
199 						// Not a valid range!
200 
201 				if ((islower(start) && islower(stop))
202 					|| (isupper(start) && isupper(stop))
203 					|| (isdigit(start) && isdigit(stop))) {
204 					// Make sure 'start' and 'stop' are of the same type.
205 					match = start <= testChar && testChar <= stop;
206 				} else {
207 					// If no valid range, we've got a syntax error.
208 					return false;
209 				}
210 
211 				break;
212 			}
213 
214 			default:
215 				if (GlyphMatch)
216 					match = UTF8CharsAreEqual(string, pattern);
217 				else
218 					match = CharsAreEqual(testChar, *pattern, caseSensitivity);
219 				break;
220 		}
221 
222 		if (!match) {
223 			pattern++;
224 			if (IsInsideGlyph(pattern[0]))
225 				pattern = MoveToEndOfGlyph(pattern);
226 		}
227 	}
228 
229 	// Consider an unmatched bracket a failure
230 	// (i.e. when detecting a '\0' instead of a ']'.)
231 	if (*pattern == '\0')
232 		return false;
233 
234 	return (match ^ inverse) != 0;
235 }
236 
237 
238 bool
StringMatchesPattern(const char * string,const char * pattern,bool caseSensitivity) const239 TrackerString::StringMatchesPattern(const char* string, const char* pattern,
240 	bool caseSensitivity) const
241 {
242 	// One could do this dynamically, counting the number of *'s,
243 	// but then you have to free them at every exit of this
244 	// function, which is awkward and ugly.
245 	const int32 kWildCardMaximum = 100;
246 	const char* pStorage[kWildCardMaximum];
247 	const char* sStorage[kWildCardMaximum];
248 
249 	int32 patternLevel = 0;
250 
251 	if (string == NULL || pattern == NULL)
252 		return false;
253 
254 	while (*pattern != '\0') {
255 		switch (*pattern) {
256 			case '?':
257 				pattern++;
258 				string++;
259 				if (IsInsideGlyph(string[0]))
260 					string = MoveToEndOfGlyph(string);
261 				break;
262 
263 			case '*':
264 			{
265 				// Collapse any ** and *? constructions:
266 				while (*pattern == '*' || *pattern == '?') {
267 					pattern++;
268 					if (*pattern == '?' && *string != '\0') {
269 						string++;
270 						if (IsInsideGlyph(string[0]))
271 							string = MoveToEndOfGlyph(string);
272 					}
273 				}
274 
275 				if (*pattern == '\0') {
276 					// An ending * matches all strings.
277 					return true;
278 				}
279 
280 				bool match = false;
281 				const char* pBefore = pattern - 1;
282 
283 				if (*pattern == '[') {
284 					pattern++;
285 
286 					while (!match && *string != '\0') {
287 						match = MatchesBracketExpression(string++, pattern,
288 							caseSensitivity);
289 					}
290 
291 					while (*pattern != ']' && *pattern != '\0') {
292 						// Skip the rest of the bracket:
293 						pattern++;
294 					}
295 
296 					if (*pattern == '\0') {
297 						// Failure if no closing bracket;
298 						return false;
299 					}
300 				} else {
301 					// No bracket, just one character:
302 					while (!match && *string != '\0') {
303 						if (IsGlyph(string[0]))
304 							match = UTF8CharsAreEqual(string++, pattern);
305 						else {
306 							match = CharsAreEqual(*string++, *pattern,
307 								caseSensitivity);
308 						}
309 					}
310 				}
311 
312 				if (!match)
313 					return false;
314 				else {
315 					pStorage[patternLevel] = pBefore;
316 					if (IsInsideGlyph(string[0]))
317 						string = MoveToEndOfGlyph(string);
318 
319 					sStorage[patternLevel++] = string;
320 					if (patternLevel > kWildCardMaximum)
321 						return false;
322 
323 					pattern++;
324 					if (IsInsideGlyph(pattern[0]))
325 						pattern = MoveToEndOfGlyph(pattern);
326 				}
327 				break;
328 			}
329 
330 			case '[':
331 				pattern++;
332 
333 				if (!MatchesBracketExpression(string, pattern,
334 						caseSensitivity)) {
335 					if (patternLevel > 0) {
336 						pattern = pStorage[--patternLevel];
337 						string = sStorage[patternLevel];
338 					} else
339 						return false;
340 				} else {
341 					// Skip the rest of the bracket:
342 					while (*pattern != ']' && *pattern != '\0')
343 						pattern++;
344 
345 					// Failure if no closing bracket;
346 					if (*pattern == '\0')
347 						return false;
348 
349 					string++;
350 					if (IsInsideGlyph(string[0]))
351 						string = MoveToEndOfGlyph(string);
352 					pattern++;
353 				}
354 				break;
355 
356 			default:
357 			{
358 				bool equal = false;
359 				if (IsGlyph(string[0]))
360 					equal = UTF8CharsAreEqual(string, pattern);
361 				else
362 					equal = CharsAreEqual(*string, *pattern, caseSensitivity);
363 
364 				if (equal) {
365 					pattern++;
366 					if (IsInsideGlyph(pattern[0]))
367 						pattern = MoveToEndOfGlyph(pattern);
368 					string++;
369 					if (IsInsideGlyph(string[0]))
370 						string = MoveToEndOfGlyph(string);
371 				} else if (patternLevel > 0) {
372 						pattern = pStorage[--patternLevel];
373 						string = sStorage[patternLevel];
374 				} else
375 					return false;
376 				break;
377 			}
378 		}
379 
380 		if (*pattern == '\0' && *string != '\0' && patternLevel > 0) {
381 			pattern = pStorage[--patternLevel];
382 			string = sStorage[patternLevel];
383 		}
384 	}
385 
386 	return *string == '\0' && *pattern == '\0';
387 }
388 
389 
390 bool
UTF8CharsAreEqual(const char * string1,const char * string2) const391 TrackerString::UTF8CharsAreEqual(const char* string1,
392 	const char* string2) const
393 {
394 	const char* s1 = string1;
395 	const char* s2 = string2;
396 
397 	if (IsStartOfGlyph(*s1) && *s1 == *s2) {
398 		s1++;
399 		s2++;
400 
401 		while (IsInsideGlyph(*s1) && *s1 == *s2) {
402 			s1++;
403 			s2++;
404 		}
405 
406 		return !IsInsideGlyph(*s1)
407 			&& !IsInsideGlyph(*s2) && *(s1 - 1) == *(s2 - 1);
408 	} else
409 		return false;
410 }
411 
412 
413 const char*
MoveToEndOfGlyph(const char * string) const414 TrackerString::MoveToEndOfGlyph(const char* string) const
415 {
416 	const char* ptr = string;
417 
418 	while (IsInsideGlyph(*ptr))
419 		ptr++;
420 
421 	return ptr;
422 }
423 
424 
425 bool
IsGlyph(char ch) const426 TrackerString::IsGlyph(char ch) const
427 {
428 	return (ch & 0x80) == 0x80;
429 }
430 
431 
432 bool
IsInsideGlyph(char ch) const433 TrackerString::IsInsideGlyph(char ch) const
434 {
435 	return (ch & 0xC0) == 0x80;
436 }
437 
438 
439 bool
IsStartOfGlyph(char ch) const440 TrackerString::IsStartOfGlyph(char ch) const
441 {
442 	return (ch & 0xC0) == 0xC0;
443 }
444