1 /* 2 * Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de. 3 * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de> 4 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de. 5 * This file may be used under the terms of the MIT License. 6 */ 7 8 9 // This needs to be the first include because of the fs shell API wrapper 10 #include <algorithm> 11 12 #include <file_systems/QueryParserUtils.h> 13 14 #ifndef FS_SHELL 15 # include <string.h> 16 17 # include <TypeConstants.h> 18 #endif 19 20 21 namespace QueryParser { 22 23 24 template<typename Key> 25 static inline int 26 compare_integral(const Key& a, const Key& b) 27 { 28 if (a < b) 29 return -1; 30 if (a > b) 31 return 1; 32 return 0; 33 } 34 35 36 // #pragma mark - 37 38 39 void 40 skipWhitespace(char** expr, int32 skip) 41 { 42 char* string = (*expr) + skip; 43 while (*string == ' ' || *string == '\t') string++; 44 *expr = string; 45 } 46 47 48 void 49 skipWhitespaceReverse(char** expr, char* stop) 50 { 51 char* string = *expr; 52 while (string > stop && (*string == ' ' || *string == '\t')) 53 string--; 54 *expr = string; 55 } 56 57 58 int 59 compareKeys(uint32 type, const void* key1, size_t length1, const void* key2, 60 size_t length2) 61 { 62 switch (type) { 63 case B_INT32_TYPE: 64 return compare_integral(*(int32*)key1, *(int32*)key2); 65 case B_UINT32_TYPE: 66 return compare_integral(*(uint32*)key1, *(uint32*)key2); 67 case B_INT64_TYPE: 68 return compare_integral(*(int64*)key1, *(int64*)key2); 69 case B_UINT64_TYPE: 70 return compare_integral(*(uint64*)key1, *(uint64*)key2); 71 case B_FLOAT_TYPE: 72 return compare_integral(*(float*)key1, *(float*)key2); 73 case B_DOUBLE_TYPE: 74 return compare_integral(*(double*)key1, *(double*)key2); 75 case B_STRING_TYPE: 76 case B_MIME_STRING_TYPE: 77 { 78 int result = strncmp((const char*)key1, (const char*)key2, 79 std::min(length1, length2)); 80 if (result == 0) { 81 result = compare_integral(strnlen((const char*)key1, length1), 82 strnlen((const char*)key2, length2)); 83 } 84 return result; 85 } 86 } 87 return -1; 88 } 89 90 91 // #pragma mark - 92 93 94 uint32 95 utf8ToUnicode(char** string) 96 { 97 uint8* bytes = (uint8*)*string; 98 int32 length; 99 uint8 mask = 0x1f; 100 101 switch (bytes[0] & 0xf0) { 102 case 0xc0: 103 case 0xd0: 104 length = 2; 105 break; 106 case 0xe0: 107 length = 3; 108 break; 109 case 0xf0: 110 mask = 0x0f; 111 length = 4; 112 break; 113 default: 114 // valid 1-byte character 115 // and invalid characters 116 (*string)++; 117 return bytes[0]; 118 } 119 uint32 c = bytes[0] & mask; 120 int32 i = 1; 121 for (; i < length && (bytes[i] & 0x80) > 0; i++) 122 c = (c << 6) | (bytes[i] & 0x3f); 123 124 if (i < length) { 125 // invalid character 126 (*string)++; 127 return (uint32)bytes[0]; 128 } 129 *string += length; 130 return c; 131 } 132 133 134 int32 135 getFirstPatternSymbol(char* string) 136 { 137 char c; 138 139 for (int32 index = 0; (c = *string++); index++) { 140 if (c == '*' || c == '?' || c == '[') 141 return index; 142 } 143 return -1; 144 } 145 146 147 status_t 148 isValidPattern(char* pattern) 149 { 150 while (*pattern) { 151 switch (*pattern++) { 152 case '\\': 153 // the escape character must not be at the end of the pattern 154 if (!*pattern++) 155 return PATTERN_INVALID_ESCAPE; 156 break; 157 158 case '[': 159 if (pattern[0] == ']' || !pattern[0]) 160 return PATTERN_INVALID_SET; 161 162 while (*pattern != ']') { 163 if (*pattern == '\\' && !*++pattern) 164 return PATTERN_INVALID_ESCAPE; 165 166 if (!*pattern) 167 return PATTERN_INVALID_SET; 168 169 if (pattern[0] == '-' && pattern[1] == '-') 170 return PATTERN_INVALID_RANGE; 171 172 pattern++; 173 } 174 break; 175 } 176 } 177 return B_OK; 178 } 179 180 181 /*! Matches the string against the given wildcard pattern. 182 Returns either MATCH_OK, or NO_MATCH when everything went fine, or 183 values < 0 (see enum at the top of Query.cpp) if an error occurs. 184 */ 185 status_t 186 matchString(char* pattern, char* string) 187 { 188 while (*pattern) { 189 // end of string == valid end of pattern? 190 if (!string[0]) { 191 while (pattern[0] == '*') 192 pattern++; 193 return !pattern[0] ? MATCH_OK : NO_MATCH; 194 } 195 196 switch (*pattern++) { 197 case '?': 198 { 199 // match exactly one UTF-8 character; we are 200 // not interested in the result 201 utf8ToUnicode(&string); 202 break; 203 } 204 205 case '*': 206 { 207 // compact pattern 208 while (true) { 209 if (pattern[0] == '?') { 210 if (!*++string) 211 return NO_MATCH; 212 } else if (pattern[0] != '*') 213 break; 214 215 pattern++; 216 } 217 218 // if the pattern is done, we have matched the string 219 if (!pattern[0]) 220 return MATCH_OK; 221 222 while(true) { 223 // we have removed all occurences of '*' and '?' 224 if (pattern[0] == string[0] 225 || pattern[0] == '[' 226 || pattern[0] == '\\') { 227 status_t status = matchString(pattern, string); 228 if (status < B_OK || status == MATCH_OK) 229 return status; 230 } 231 232 // we could be nice here and just jump to the next 233 // UTF-8 character - but we wouldn't gain that much 234 // and it'd be slower (since we're checking for 235 // equality before entering the recursion) 236 if (!*++string) 237 return NO_MATCH; 238 } 239 break; 240 } 241 242 case '[': 243 { 244 bool invert = false; 245 if (pattern[0] == '^' || pattern[0] == '!') { 246 invert = true; 247 pattern++; 248 } 249 250 if (!pattern[0] || pattern[0] == ']') 251 return MATCH_BAD_PATTERN; 252 253 uint32 c = utf8ToUnicode(&string); 254 bool matched = false; 255 256 while (pattern[0] != ']') { 257 if (!pattern[0]) 258 return MATCH_BAD_PATTERN; 259 260 if (pattern[0] == '\\') 261 pattern++; 262 263 uint32 first = utf8ToUnicode(&pattern); 264 265 // Does this character match, or is this a range? 266 if (first == c) { 267 matched = true; 268 break; 269 } else if (pattern[0] == '-' && pattern[1] != ']' 270 && pattern[1]) { 271 pattern++; 272 273 if (pattern[0] == '\\') { 274 pattern++; 275 if (!pattern[0]) 276 return MATCH_BAD_PATTERN; 277 } 278 uint32 last = utf8ToUnicode(&pattern); 279 280 if (c >= first && c <= last) { 281 matched = true; 282 break; 283 } 284 } 285 } 286 287 if (invert) 288 matched = !matched; 289 290 if (matched) { 291 while (pattern[0] != ']') { 292 if (!pattern[0]) 293 return MATCH_BAD_PATTERN; 294 pattern++; 295 } 296 pattern++; 297 break; 298 } 299 return NO_MATCH; 300 } 301 302 case '\\': 303 if (!pattern[0]) 304 return MATCH_BAD_PATTERN; 305 // supposed to fall through 306 default: 307 if (pattern[-1] != string[0]) 308 return NO_MATCH; 309 string++; 310 break; 311 } 312 } 313 314 if (string[0]) 315 return NO_MATCH; 316 317 return MATCH_OK; 318 } 319 320 321 } // namespace QueryParser 322