1 /*
2 * Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de.
3 * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de>
4 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de.
5 * This file may be used under the terms of the MIT License.
6 */
7
8
9 // This needs to be the first include because of the fs shell API wrapper
10 #include <algorithm>
11
12 #include <file_systems/QueryParserUtils.h>
13
14 #ifndef FS_SHELL
15 # include <string.h>
16
17 # include <TypeConstants.h>
18 #endif
19
20
21 namespace QueryParser {
22
23
24 template<typename Key>
25 static inline int
compare_integral(const Key & a,const Key & b)26 compare_integral(const Key& a, const Key& b)
27 {
28 if (a < b)
29 return -1;
30 if (a > b)
31 return 1;
32 return 0;
33 }
34
35
36 // #pragma mark -
37
38
39 void
skipWhitespace(const char ** expr,int32 skip)40 skipWhitespace(const char** expr, int32 skip)
41 {
42 const char* string = (*expr) + skip;
43 while (*string == ' ' || *string == '\t') string++;
44 *expr = string;
45 }
46
47
48 void
skipWhitespaceReverse(const char ** expr,const char * stop)49 skipWhitespaceReverse(const char** expr, const char* stop)
50 {
51 const char* string = *expr;
52 while (string > stop && (*string == ' ' || *string == '\t'))
53 string--;
54 *expr = string;
55 }
56
57
58 int
compareKeys(uint32 type,const void * key1,size_t length1,const void * key2,size_t length2)59 compareKeys(uint32 type, const void* key1, size_t length1, const void* key2,
60 size_t length2)
61 {
62 switch (type) {
63 case B_INT32_TYPE:
64 return compare_integral(*(int32*)key1, *(int32*)key2);
65 case B_UINT32_TYPE:
66 return compare_integral(*(uint32*)key1, *(uint32*)key2);
67 case B_INT64_TYPE:
68 return compare_integral(*(int64*)key1, *(int64*)key2);
69 case B_UINT64_TYPE:
70 return compare_integral(*(uint64*)key1, *(uint64*)key2);
71 case B_FLOAT_TYPE:
72 return compare_integral(*(float*)key1, *(float*)key2);
73 case B_DOUBLE_TYPE:
74 return compare_integral(*(double*)key1, *(double*)key2);
75 case B_STRING_TYPE:
76 case B_MIME_STRING_TYPE:
77 {
78 int result = strncmp((const char*)key1, (const char*)key2,
79 std::min(length1, length2));
80 if (result == 0) {
81 result = compare_integral(strnlen((const char*)key1, length1),
82 strnlen((const char*)key2, length2));
83 }
84 return result;
85 }
86 }
87 return -1;
88 }
89
90
91 // #pragma mark -
92
93
94 uint32
utf8ToUnicode(const char ** string)95 utf8ToUnicode(const char** string)
96 {
97 uint8* bytes = (uint8*)*string;
98 int32 length;
99 uint8 mask = 0x1f;
100
101 switch (bytes[0] & 0xf0) {
102 case 0xc0:
103 case 0xd0:
104 length = 2;
105 break;
106 case 0xe0:
107 length = 3;
108 break;
109 case 0xf0:
110 mask = 0x0f;
111 length = 4;
112 break;
113 default:
114 // valid 1-byte character
115 // and invalid characters
116 (*string)++;
117 return bytes[0];
118 }
119 uint32 c = bytes[0] & mask;
120 int32 i = 1;
121 for (; i < length && (bytes[i] & 0x80) > 0; i++)
122 c = (c << 6) | (bytes[i] & 0x3f);
123
124 if (i < length) {
125 // invalid character
126 (*string)++;
127 return (uint32)bytes[0];
128 }
129 *string += length;
130 return c;
131 }
132
133
134 int32
getFirstPatternSymbol(const char * string)135 getFirstPatternSymbol(const char* string)
136 {
137 char c;
138
139 for (int32 index = 0; (c = *string++); index++) {
140 if (c == '*' || c == '?' || c == '[')
141 return index;
142 }
143 return -1;
144 }
145
146
147 status_t
isValidPattern(const char * pattern)148 isValidPattern(const char* pattern)
149 {
150 while (*pattern) {
151 switch (*pattern++) {
152 case '\\':
153 // the escape character must not be at the end of the pattern
154 if (!*pattern++)
155 return PATTERN_INVALID_ESCAPE;
156 break;
157
158 case '[':
159 if (pattern[0] == ']' || !pattern[0])
160 return PATTERN_INVALID_SET;
161
162 while (*pattern != ']') {
163 if (*pattern == '\\' && !*++pattern)
164 return PATTERN_INVALID_ESCAPE;
165
166 if (!*pattern)
167 return PATTERN_INVALID_SET;
168
169 if (pattern[0] == '-' && pattern[1] == '-')
170 return PATTERN_INVALID_RANGE;
171
172 pattern++;
173 }
174 break;
175 }
176 }
177 return B_OK;
178 }
179
180
181 /*! Matches the string against the given wildcard pattern.
182 Returns either MATCH_OK, or NO_MATCH when everything went fine, or
183 values < 0 (see enum at the top of Query.cpp) if an error occurs.
184
185 The pattern matching is roughly based on code originally written
186 by J. Kercheval, and on code written by Kenneth Almquist, though
187 it shares no code.
188 */
189 status_t
matchString(const char * pattern,const char * string)190 matchString(const char* pattern, const char* string)
191 {
192 while (*pattern) {
193 // end of string == valid end of pattern?
194 if (!string[0]) {
195 while (pattern[0] == '*')
196 pattern++;
197 return !pattern[0] ? MATCH_OK : NO_MATCH;
198 }
199
200 switch (*pattern++) {
201 case '?':
202 {
203 // match exactly one UTF-8 character; we are
204 // not interested in the result
205 utf8ToUnicode(&string);
206 break;
207 }
208
209 case '*':
210 {
211 // compact pattern
212 while (true) {
213 if (pattern[0] == '?') {
214 if (!*++string)
215 return NO_MATCH;
216 } else if (pattern[0] != '*')
217 break;
218
219 pattern++;
220 }
221
222 // if the pattern is done, we have matched the string
223 if (!pattern[0])
224 return MATCH_OK;
225
226 while(true) {
227 // we have removed all occurences of '*' and '?'
228 if (pattern[0] == string[0]
229 || pattern[0] == '['
230 || pattern[0] == '\\') {
231 status_t status = matchString(pattern, string);
232 if (status < B_OK || status == MATCH_OK)
233 return status;
234 }
235
236 // we could be nice here and just jump to the next
237 // UTF-8 character - but we wouldn't gain that much
238 // and it'd be slower (since we're checking for
239 // equality before entering the recursion)
240 if (!*++string)
241 return NO_MATCH;
242 }
243 break;
244 }
245
246 case '[':
247 {
248 bool invert = false;
249 if (pattern[0] == '^' || pattern[0] == '!') {
250 invert = true;
251 pattern++;
252 }
253
254 if (!pattern[0] || pattern[0] == ']')
255 return MATCH_BAD_PATTERN;
256
257 uint32 c = utf8ToUnicode(&string);
258 bool matched = false;
259
260 while (pattern[0] != ']') {
261 if (!pattern[0])
262 return MATCH_BAD_PATTERN;
263
264 if (pattern[0] == '\\')
265 pattern++;
266
267 uint32 first = utf8ToUnicode(&pattern);
268
269 // Does this character match, or is this a range?
270 if (first == c) {
271 matched = true;
272 break;
273 } else if (pattern[0] == '-' && pattern[1] != ']'
274 && pattern[1]) {
275 pattern++;
276
277 if (pattern[0] == '\\') {
278 pattern++;
279 if (!pattern[0])
280 return MATCH_BAD_PATTERN;
281 }
282 uint32 last = utf8ToUnicode(&pattern);
283
284 if (c >= first && c <= last) {
285 matched = true;
286 break;
287 }
288 }
289 }
290
291 if (invert)
292 matched = !matched;
293
294 if (matched) {
295 while (pattern[0] != ']') {
296 if (!pattern[0])
297 return MATCH_BAD_PATTERN;
298 pattern++;
299 }
300 pattern++;
301 break;
302 }
303 return NO_MATCH;
304 }
305
306 case '\\':
307 if (!pattern[0])
308 return MATCH_BAD_PATTERN;
309 // supposed to fall through
310 default:
311 if (pattern[-1] != string[0])
312 return NO_MATCH;
313 string++;
314 break;
315 }
316 }
317
318 if (string[0])
319 return NO_MATCH;
320
321 return MATCH_OK;
322 }
323
324
325 } // namespace QueryParser
326