1 /* 2 * Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de. 3 * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de> 4 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de. 5 * This file may be used under the terms of the MIT License. 6 */ 7 8 9 // This needs to be the first include because of the fs shell API wrapper 10 #include <algorithm> 11 12 #include <file_systems/QueryParserUtils.h> 13 14 #ifndef FS_SHELL 15 # include <string.h> 16 17 # include <TypeConstants.h> 18 #endif 19 20 21 namespace QueryParser { 22 23 24 template<typename Key> 25 static inline int 26 compare_integral(const Key& a, const Key& b) 27 { 28 if (a < b) 29 return -1; 30 if (a > b) 31 return 1; 32 return 0; 33 } 34 35 36 // #pragma mark - 37 38 39 void 40 skipWhitespace(const char** expr, int32 skip) 41 { 42 const char* string = (*expr) + skip; 43 while (*string == ' ' || *string == '\t') string++; 44 *expr = string; 45 } 46 47 48 void 49 skipWhitespaceReverse(const char** expr, const char* stop) 50 { 51 const char* string = *expr; 52 while (string > stop && (*string == ' ' || *string == '\t')) 53 string--; 54 *expr = string; 55 } 56 57 58 int 59 compareKeys(uint32 type, const void* key1, size_t length1, const void* key2, 60 size_t length2) 61 { 62 switch (type) { 63 case B_INT32_TYPE: 64 return compare_integral(*(int32*)key1, *(int32*)key2); 65 case B_UINT32_TYPE: 66 return compare_integral(*(uint32*)key1, *(uint32*)key2); 67 case B_INT64_TYPE: 68 return compare_integral(*(int64*)key1, *(int64*)key2); 69 case B_UINT64_TYPE: 70 return compare_integral(*(uint64*)key1, *(uint64*)key2); 71 case B_FLOAT_TYPE: 72 return compare_integral(*(float*)key1, *(float*)key2); 73 case B_DOUBLE_TYPE: 74 return compare_integral(*(double*)key1, *(double*)key2); 75 case B_STRING_TYPE: 76 case B_MIME_STRING_TYPE: 77 { 78 int result = strncmp((const char*)key1, (const char*)key2, 79 std::min(length1, length2)); 80 if (result == 0) { 81 result = compare_integral(strnlen((const char*)key1, length1), 82 strnlen((const char*)key2, length2)); 83 } 84 return result; 85 } 86 } 87 return -1; 88 } 89 90 91 // #pragma mark - 92 93 94 uint32 95 utf8ToUnicode(const char** string) 96 { 97 uint8* bytes = (uint8*)*string; 98 int32 length; 99 uint8 mask = 0x1f; 100 101 switch (bytes[0] & 0xf0) { 102 case 0xc0: 103 case 0xd0: 104 length = 2; 105 break; 106 case 0xe0: 107 length = 3; 108 break; 109 case 0xf0: 110 mask = 0x0f; 111 length = 4; 112 break; 113 default: 114 // valid 1-byte character 115 // and invalid characters 116 (*string)++; 117 return bytes[0]; 118 } 119 uint32 c = bytes[0] & mask; 120 int32 i = 1; 121 for (; i < length && (bytes[i] & 0x80) > 0; i++) 122 c = (c << 6) | (bytes[i] & 0x3f); 123 124 if (i < length) { 125 // invalid character 126 (*string)++; 127 return (uint32)bytes[0]; 128 } 129 *string += length; 130 return c; 131 } 132 133 134 int32 135 getFirstPatternSymbol(const char* string) 136 { 137 char c; 138 139 for (int32 index = 0; (c = *string++); index++) { 140 if (c == '*' || c == '?' || c == '[') 141 return index; 142 } 143 return -1; 144 } 145 146 147 status_t 148 isValidPattern(const char* pattern) 149 { 150 while (*pattern) { 151 switch (*pattern++) { 152 case '\\': 153 // the escape character must not be at the end of the pattern 154 if (!*pattern++) 155 return PATTERN_INVALID_ESCAPE; 156 break; 157 158 case '[': 159 if (pattern[0] == ']' || !pattern[0]) 160 return PATTERN_INVALID_SET; 161 162 while (*pattern != ']') { 163 if (*pattern == '\\' && !*++pattern) 164 return PATTERN_INVALID_ESCAPE; 165 166 if (!*pattern) 167 return PATTERN_INVALID_SET; 168 169 if (pattern[0] == '-' && pattern[1] == '-') 170 return PATTERN_INVALID_RANGE; 171 172 pattern++; 173 } 174 break; 175 } 176 } 177 return B_OK; 178 } 179 180 181 /*! Matches the string against the given wildcard pattern. 182 Returns either MATCH_OK, or NO_MATCH when everything went fine, or 183 values < 0 (see enum at the top of Query.cpp) if an error occurs. 184 185 The pattern matching is roughly based on code originally written 186 by J. Kercheval, and on code written by Kenneth Almquist, though 187 it shares no code. 188 */ 189 status_t 190 matchString(const char* pattern, const char* string) 191 { 192 while (*pattern) { 193 // end of string == valid end of pattern? 194 if (!string[0]) { 195 while (pattern[0] == '*') 196 pattern++; 197 return !pattern[0] ? MATCH_OK : NO_MATCH; 198 } 199 200 switch (*pattern++) { 201 case '?': 202 { 203 // match exactly one UTF-8 character; we are 204 // not interested in the result 205 utf8ToUnicode(&string); 206 break; 207 } 208 209 case '*': 210 { 211 // compact pattern 212 while (true) { 213 if (pattern[0] == '?') { 214 if (!*++string) 215 return NO_MATCH; 216 } else if (pattern[0] != '*') 217 break; 218 219 pattern++; 220 } 221 222 // if the pattern is done, we have matched the string 223 if (!pattern[0]) 224 return MATCH_OK; 225 226 while(true) { 227 // we have removed all occurences of '*' and '?' 228 if (pattern[0] == string[0] 229 || pattern[0] == '[' 230 || pattern[0] == '\\') { 231 status_t status = matchString(pattern, string); 232 if (status < B_OK || status == MATCH_OK) 233 return status; 234 } 235 236 // we could be nice here and just jump to the next 237 // UTF-8 character - but we wouldn't gain that much 238 // and it'd be slower (since we're checking for 239 // equality before entering the recursion) 240 if (!*++string) 241 return NO_MATCH; 242 } 243 break; 244 } 245 246 case '[': 247 { 248 bool invert = false; 249 if (pattern[0] == '^' || pattern[0] == '!') { 250 invert = true; 251 pattern++; 252 } 253 254 if (!pattern[0] || pattern[0] == ']') 255 return MATCH_BAD_PATTERN; 256 257 uint32 c = utf8ToUnicode(&string); 258 bool matched = false; 259 260 while (pattern[0] != ']') { 261 if (!pattern[0]) 262 return MATCH_BAD_PATTERN; 263 264 if (pattern[0] == '\\') 265 pattern++; 266 267 uint32 first = utf8ToUnicode(&pattern); 268 269 // Does this character match, or is this a range? 270 if (first == c) { 271 matched = true; 272 break; 273 } else if (pattern[0] == '-' && pattern[1] != ']' 274 && pattern[1]) { 275 pattern++; 276 277 if (pattern[0] == '\\') { 278 pattern++; 279 if (!pattern[0]) 280 return MATCH_BAD_PATTERN; 281 } 282 uint32 last = utf8ToUnicode(&pattern); 283 284 if (c >= first && c <= last) { 285 matched = true; 286 break; 287 } 288 } 289 } 290 291 if (invert) 292 matched = !matched; 293 294 if (matched) { 295 while (pattern[0] != ']') { 296 if (!pattern[0]) 297 return MATCH_BAD_PATTERN; 298 pattern++; 299 } 300 pattern++; 301 break; 302 } 303 return NO_MATCH; 304 } 305 306 case '\\': 307 if (!pattern[0]) 308 return MATCH_BAD_PATTERN; 309 // supposed to fall through 310 default: 311 if (pattern[-1] != string[0]) 312 return NO_MATCH; 313 string++; 314 break; 315 } 316 } 317 318 if (string[0]) 319 return NO_MATCH; 320 321 return MATCH_OK; 322 } 323 324 325 } // namespace QueryParser 326