1 /* 2 * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Copyright 2010, Clemens Zeidler <haiku@clemens-zeidler.de> 4 * Copyright 2011, Ingo Weinhold, ingo_weinhold@gmx.de. 5 * This file may be used under the terms of the MIT License. 6 */ 7 8 9 #include <file_systems/QueryParserUtils.h> 10 11 #include <string.h> 12 13 #include <algorithm> 14 15 #include <TypeConstants.h> 16 17 18 namespace QueryParser { 19 20 21 template<typename Key> 22 static inline int 23 compare_integral(const Key& a, const Key& b) 24 { 25 if (a < b) 26 return -1; 27 else if (a > b) 28 return 1; 29 return 0; 30 } 31 32 33 // #pragma mark - 34 35 36 void 37 skipWhitespace(char** expr, int32 skip) 38 { 39 char* string = (*expr) + skip; 40 while (*string == ' ' || *string == '\t') string++; 41 *expr = string; 42 } 43 44 45 void 46 skipWhitespaceReverse(char** expr, char* stop) 47 { 48 char* string = *expr; 49 while (string > stop && (*string == ' ' || *string == '\t')) 50 string--; 51 *expr = string; 52 } 53 54 55 int 56 compareKeys(uint32 type, const void* key1, size_t length1, const void* key2, 57 size_t length2) 58 { 59 switch (type) { 60 case B_INT32_TYPE: 61 return compare_integral(*(int32*)key1, *(int32*)key2); 62 case B_UINT32_TYPE: 63 return compare_integral(*(uint32*)key1, *(uint32*)key2); 64 case B_INT64_TYPE: 65 return compare_integral(*(int64*)key1, *(int64*)key2); 66 case B_UINT64_TYPE: 67 return compare_integral(*(uint64*)key1, *(uint64*)key2); 68 case B_FLOAT_TYPE: 69 return compare_integral(*(float*)key1, *(float*)key2); 70 case B_DOUBLE_TYPE: 71 return compare_integral(*(double*)key1, *(double*)key2); 72 case B_STRING_TYPE: 73 case B_MIME_STRING_TYPE: 74 { 75 int result = strncmp((const char*)key1, (const char*)key2, 76 std::min(length1, length2)); 77 if (result == 0) { 78 result = compare_integral(strnlen((const char*)key1, length1), 79 strnlen((const char*)key2, length2)); 80 } 81 return result; 82 } 83 } 84 return -1; 85 } 86 87 88 // #pragma mark - 89 90 91 uint32 92 utf8ToUnicode(char** string) 93 { 94 uint8* bytes = (uint8*)*string; 95 int32 length; 96 uint8 mask = 0x1f; 97 98 switch (bytes[0] & 0xf0) { 99 case 0xc0: 100 case 0xd0: 101 length = 2; 102 break; 103 case 0xe0: 104 length = 3; 105 break; 106 case 0xf0: 107 mask = 0x0f; 108 length = 4; 109 break; 110 default: 111 // valid 1-byte character 112 // and invalid characters 113 (*string)++; 114 return bytes[0]; 115 } 116 uint32 c = bytes[0] & mask; 117 int32 i = 1; 118 for (; i < length && (bytes[i] & 0x80) > 0; i++) 119 c = (c << 6) | (bytes[i] & 0x3f); 120 121 if (i < length) { 122 // invalid character 123 (*string)++; 124 return (uint32)bytes[0]; 125 } 126 *string += length; 127 return c; 128 } 129 130 131 int32 132 getFirstPatternSymbol(char* string) 133 { 134 char c; 135 136 for (int32 index = 0; (c = *string++); index++) { 137 if (c == '*' || c == '?' || c == '[') 138 return index; 139 } 140 return -1; 141 } 142 143 144 status_t 145 isValidPattern(char* pattern) 146 { 147 while (*pattern) { 148 switch (*pattern++) { 149 case '\\': 150 // the escape character must not be at the end of the pattern 151 if (!*pattern++) 152 return PATTERN_INVALID_ESCAPE; 153 break; 154 155 case '[': 156 if (pattern[0] == ']' || !pattern[0]) 157 return PATTERN_INVALID_SET; 158 159 while (*pattern != ']') { 160 if (*pattern == '\\' && !*++pattern) 161 return PATTERN_INVALID_ESCAPE; 162 163 if (!*pattern) 164 return PATTERN_INVALID_SET; 165 166 if (pattern[0] == '-' && pattern[1] == '-') 167 return PATTERN_INVALID_RANGE; 168 169 pattern++; 170 } 171 break; 172 } 173 } 174 return B_OK; 175 } 176 177 178 /*! Matches the string against the given wildcard pattern. 179 Returns either MATCH_OK, or NO_MATCH when everything went fine, or 180 values < 0 (see enum at the top of Query.cpp) if an error occurs. 181 */ 182 status_t 183 matchString(char* pattern, char* string) 184 { 185 while (*pattern) { 186 // end of string == valid end of pattern? 187 if (!string[0]) { 188 while (pattern[0] == '*') 189 pattern++; 190 return !pattern[0] ? MATCH_OK : NO_MATCH; 191 } 192 193 switch (*pattern++) { 194 case '?': 195 { 196 // match exactly one UTF-8 character; we are 197 // not interested in the result 198 utf8ToUnicode(&string); 199 break; 200 } 201 202 case '*': 203 { 204 // compact pattern 205 while (true) { 206 if (pattern[0] == '?') { 207 if (!*++string) 208 return NO_MATCH; 209 } else if (pattern[0] != '*') 210 break; 211 212 pattern++; 213 } 214 215 // if the pattern is done, we have matched the string 216 if (!pattern[0]) 217 return MATCH_OK; 218 219 while(true) { 220 // we have removed all occurences of '*' and '?' 221 if (pattern[0] == string[0] 222 || pattern[0] == '[' 223 || pattern[0] == '\\') { 224 status_t status = matchString(pattern, string); 225 if (status < B_OK || status == MATCH_OK) 226 return status; 227 } 228 229 // we could be nice here and just jump to the next 230 // UTF-8 character - but we wouldn't gain that much 231 // and it'd be slower (since we're checking for 232 // equality before entering the recursion) 233 if (!*++string) 234 return NO_MATCH; 235 } 236 break; 237 } 238 239 case '[': 240 { 241 bool invert = false; 242 if (pattern[0] == '^' || pattern[0] == '!') { 243 invert = true; 244 pattern++; 245 } 246 247 if (!pattern[0] || pattern[0] == ']') 248 return MATCH_BAD_PATTERN; 249 250 uint32 c = utf8ToUnicode(&string); 251 bool matched = false; 252 253 while (pattern[0] != ']') { 254 if (!pattern[0]) 255 return MATCH_BAD_PATTERN; 256 257 if (pattern[0] == '\\') 258 pattern++; 259 260 uint32 first = utf8ToUnicode(&pattern); 261 262 // Does this character match, or is this a range? 263 if (first == c) { 264 matched = true; 265 break; 266 } else if (pattern[0] == '-' && pattern[1] != ']' 267 && pattern[1]) { 268 pattern++; 269 270 if (pattern[0] == '\\') { 271 pattern++; 272 if (!pattern[0]) 273 return MATCH_BAD_PATTERN; 274 } 275 uint32 last = utf8ToUnicode(&pattern); 276 277 if (c >= first && c <= last) { 278 matched = true; 279 break; 280 } 281 } 282 } 283 284 if (invert) 285 matched = !matched; 286 287 if (matched) { 288 while (pattern[0] != ']') { 289 if (!pattern[0]) 290 return MATCH_BAD_PATTERN; 291 pattern++; 292 } 293 pattern++; 294 break; 295 } 296 return NO_MATCH; 297 } 298 299 case '\\': 300 if (!pattern[0]) 301 return MATCH_BAD_PATTERN; 302 // supposed to fall through 303 default: 304 if (pattern[-1] != string[0]) 305 return NO_MATCH; 306 string++; 307 break; 308 } 309 } 310 311 if (string[0]) 312 return NO_MATCH; 313 314 return MATCH_OK; 315 } 316 317 318 } // namespace QueryParser 319