1 /* 2 * Copyright 2002-2006, Haiku Inc. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Tyler Dauwalder 7 * Ingo Weinhold, bonefish@users.sf.net 8 */ 9 10 /*! 11 \file SnifferRules.cpp 12 SnifferRules class implementation 13 */ 14 15 #include <mime/SnifferRules.h> 16 17 #include <stdio.h> 18 #include <sys/stat.h> 19 20 #include <Directory.h> 21 #include <Entry.h> 22 #include <File.h> 23 #include <MimeType.h> 24 #include <mime/database_support.h> 25 #include <mime/DatabaseDirectory.h> 26 #include <mime/DatabaseLocation.h> 27 #include <mime/MimeSniffer.h> 28 #include <sniffer/Parser.h> 29 #include <sniffer/Rule.h> 30 #include <StorageDefs.h> 31 #include <storage_support.h> 32 #include <String.h> 33 34 35 #define DBG(x) x 36 //#define DBG(x) 37 #define OUT printf 38 39 namespace BPrivate { 40 namespace Storage { 41 namespace Mime { 42 43 using namespace BPrivate::Storage; 44 45 /*! 46 \struct SnifferRules::sniffer_rule 47 \brief A parsed sniffer rule and its corresponding mime type and rule string 48 49 The parse sniffer rule is stored in the \c rule member, which is a pointer 50 to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule 51 objects (as opposed to \c sniffer_rule pointers) to be used with STL objects 52 without unnecessary copying. As a consequence of this decision, the 53 \c SnifferRules object managing the rule list is responsible for actually 54 deleting each \c sniffer_rule's \c Sniffer::Rule object. 55 */ 56 57 // sniffer_rule Constructor 58 //! Creates a new \c sniffer_rule object 59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule) 60 : rule(rule) 61 { 62 } 63 64 // sniffer_rule Destructor 65 //! Destroys the \c sniffer_rule object. 66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule 67 object's \c rule member is *NOT* deleted by this function. 68 */ 69 SnifferRules::sniffer_rule::~sniffer_rule() 70 { 71 } 72 73 // private functions 74 /*! \brief Returns true if \a left's priority is greater than \a right's 75 76 This may seem slightly backwards, but since sort() using 77 operator<() sorts in ascending order, we say "left < right" 78 if "left.priority > right.priority" to get them sorted in 79 ascending order. Super, no? 80 81 Also, sniffer_rule objects with \c NULL \c rule members are 82 treated as having minimal priority (and thus are placed at 83 the end of the list of rules). 84 85 Finally, sniffer_rule objects that are otherwise equal are 86 sorted in reverse alphabetic order (thus placing sniffer 87 rules for supertypes *after* sniffer rules for subtypes 88 of said supertype when both rules have identical priorities). 89 */ 90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right) 91 { 92 if (left.rule && right.rule) { 93 double leftPriority = left.rule->Priority(); 94 double rightPriority = right.rule->Priority(); 95 if (leftPriority > rightPriority) { 96 return true; // left < right 97 } else if (rightPriority > leftPriority) { 98 return false; // right < left 99 } else { 100 return left.type > right.type; 101 } 102 } else if (left.rule) { 103 return true; // left < right 104 } else { 105 return false; // right < left 106 } 107 } 108 109 /*! 110 \class SnifferRules 111 \brief Manages the sniffer rules for the entire database 112 */ 113 114 // Constructor 115 //! Constructs a new SnifferRules object 116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation, 117 MimeSniffer* mimeSniffer) 118 : 119 fDatabaseLocation(databaseLocation), 120 fMimeSniffer(mimeSniffer), 121 fMaxBytesNeeded(0), 122 fHaveDoneFullBuild(false) 123 { 124 } 125 126 // Destructor 127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated 128 \c Sniffer::Rule objects scattered throughout the rule list in 129 \c sniffer_rule::rule members. 130 */ 131 SnifferRules::~SnifferRules() 132 { 133 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 134 i != fRuleList.end(); i++) { 135 delete i->rule; 136 i->rule = NULL; 137 } 138 } 139 140 // GuessMimeType 141 /*! \brief Guesses a MIME type for the supplied entry_ref. 142 143 Only the data in the given entry is considered, not the filename or 144 its extension. Please see GuessMimeType(BFile*, const void *, int32, 145 BString*) for more details. 146 147 \param ref The entry to sniff 148 \param type Pointer to a pre-allocated BString which is set to the 149 resulting MIME type. 150 \return 151 - \c B_OK: success 152 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 153 - error code: failure 154 */ 155 status_t 156 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type) 157 { 158 status_t err = ref && type ? B_OK : B_BAD_VALUE; 159 ssize_t bytes = 0; 160 char *buffer = NULL; 161 BFile file; 162 163 // First find out the max number of bytes we need to read 164 // from the file to fully accomodate all of our currently 165 // installed sniffer rules 166 if (!err) { 167 bytes = MaxBytesNeeded(); 168 if (bytes < 0) 169 err = bytes; 170 } 171 172 // Next read that many bytes (or fewer, if the file isn't 173 // that long) into a buffer 174 if (!err) { 175 buffer = new(std::nothrow) char[bytes]; 176 if (!buffer) 177 err = B_NO_MEMORY; 178 } 179 180 if (!err) 181 err = file.SetTo(ref, B_READ_ONLY); 182 if (!err) { 183 bytes = file.Read(buffer, bytes); 184 if (bytes < 0) 185 err = bytes; 186 } 187 188 // Now sniff the buffer 189 if (!err) 190 err = GuessMimeType(&file, buffer, bytes, type); 191 192 delete[] buffer; 193 194 return err; 195 } 196 197 // GuessMimeType 198 /*! \brief Guesses a MIME type for the given chunk of data. 199 200 Please see GuessMimeType(BFile*, const void *, int32, BString*) for more 201 details. 202 203 \param buffer Pointer to a data buffer to sniff 204 \param length The length of the data buffer pointed to by \a buffer 205 \param type Pointer to a pre-allocated BString which is set to the 206 resulting MIME type. 207 \return 208 - \c B_OK: success 209 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 210 - error code: failure 211 */ 212 status_t 213 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type) 214 { 215 return GuessMimeType(NULL, buffer, length, type); 216 } 217 218 // SetSnifferRule 219 /*! Updates the sniffer rule for the given type 220 221 If the a rule currently exists in the rule list for the given type, 222 it is first removed before the new rule is inserted. 223 224 The new rule is inserted in its proper, sorted position in the list. 225 226 \param type The type of interest 227 \param rule The new sniffer rule 228 \return 229 - \c B_OK: success 230 - other error code: failure 231 */ 232 status_t 233 SnifferRules::SetSnifferRule(const char *type, const char *rule) 234 { 235 status_t err = type && rule ? B_OK : B_BAD_VALUE; 236 if (!err && !fHaveDoneFullBuild) 237 return B_OK; 238 239 sniffer_rule item(new Sniffer::Rule()); 240 BString parseError; 241 242 // Check the mem alloc 243 if (!err) 244 err = item.rule ? B_OK : B_NO_MEMORY; 245 // Prepare the sniffer_rule 246 if (!err) { 247 item.type = type; 248 item.rule_string = rule; 249 err = Sniffer::parse(rule, item.rule, &parseError); 250 if (err) 251 DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n", 252 parseError.String())); 253 } 254 // Remove any previous rule for this type 255 if (!err) 256 err = DeleteSnifferRule(type); 257 // Insert the new rule at the proper position in 258 // the sorted rule list (remembering that our list 259 // is sorted in ascending order using 260 // operator<(sniffer_rule&, sniffer_rule&)) 261 if (!err) { 262 std::list<sniffer_rule>::iterator i; 263 for (i = fRuleList.begin(); i != fRuleList.end(); i++) { 264 if (item < (*i)) { 265 fRuleList.insert(i, item); 266 break; 267 } 268 } 269 if (i == fRuleList.end()) 270 fRuleList.push_back(item); 271 } 272 273 return err; 274 } 275 276 // DeleteSnifferRule 277 /*! \brief Removes the sniffer rule for the given type from the rule list 278 \param type The type of interest 279 \return 280 - \c B_OK: success (even if no rule existed for the given type) 281 - other error code: failure 282 */ 283 status_t 284 SnifferRules::DeleteSnifferRule(const char *type) 285 { 286 status_t err = type ? B_OK : B_BAD_VALUE; 287 if (!err && !fHaveDoneFullBuild) 288 return B_OK; 289 290 // Find the rule in the list and remove it 291 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 292 i != fRuleList.end(); i++) { 293 if (i->type == type) { 294 fRuleList.erase(i); 295 break; 296 } 297 } 298 299 return err; 300 } 301 302 // PrintToStream 303 //! Dumps the list of sniffer rules in sorted order to standard output 304 void 305 SnifferRules::PrintToStream() const 306 { 307 printf("\n"); 308 printf("--------------\n"); 309 printf("Sniffer Rules:\n"); 310 printf("--------------\n"); 311 312 if (fHaveDoneFullBuild) { 313 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 314 i != fRuleList.end(); i++) { 315 printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str()); 316 } 317 } else { 318 printf("You haven't built your rule list yet, chump. ;-)\n"); 319 } 320 } 321 322 // BuildRuleList 323 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds 324 each parsed rule to the rule list, and sorts the list by priority, largest first. 325 326 Initial MaxBytesNeeded() info is compiled by this function as well. 327 */ 328 status_t 329 SnifferRules::BuildRuleList() 330 { 331 fRuleList.clear(); 332 333 ssize_t maxBytesNeeded = 0; 334 ssize_t bytesNeeded = 0; 335 DatabaseDirectory root; 336 337 status_t err = root.Init(fDatabaseLocation); 338 if (!err) { 339 root.Rewind(); 340 while (true) { 341 BEntry entry; 342 err = root.GetNextEntry(&entry); 343 if (err) { 344 // If we've come to the end of list, it's not an error 345 if (err == B_ENTRY_NOT_FOUND) 346 err = B_OK; 347 break; 348 } else { 349 // Check that this entry is both a directory and a valid MIME string 350 char supertype[B_PATH_NAME_LENGTH]; 351 if (entry.IsDirectory() 352 && entry.GetName(supertype) == B_OK 353 && BMimeType::IsValid(supertype)) { 354 // Make sure the supertype string is all lowercase 355 BPrivate::Storage::to_lower(supertype); 356 357 // First, iterate through this supertype directory and process 358 // all of its subtypes 359 DatabaseDirectory dir; 360 if (dir.Init(fDatabaseLocation, supertype) == B_OK) { 361 dir.Rewind(); 362 while (true) { 363 BEntry subEntry; 364 err = dir.GetNextEntry(&subEntry); 365 if (err) { 366 // If we've come to the end of list, it's not an error 367 if (err == B_ENTRY_NOT_FOUND) 368 err = B_OK; 369 break; 370 } else { 371 // Get the subtype's name 372 char subtype[B_PATH_NAME_LENGTH]; 373 if (subEntry.GetName(subtype) == B_OK) { 374 BPrivate::Storage::to_lower(subtype); 375 376 char fulltype[B_PATH_NAME_LENGTH]; 377 snprintf(fulltype, B_PATH_NAME_LENGTH, "%s/%s", 378 supertype, subtype); 379 380 // Process the subtype 381 ProcessType(fulltype, &bytesNeeded); 382 if (bytesNeeded > maxBytesNeeded) 383 maxBytesNeeded = bytesNeeded; 384 } 385 } 386 } 387 } else { 388 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 389 "Failed opening supertype directory '%s'\n", 390 supertype)); 391 } 392 393 // Second, process the supertype 394 ProcessType(supertype, &bytesNeeded); 395 if (bytesNeeded > maxBytesNeeded) 396 maxBytesNeeded = bytesNeeded; 397 } 398 } 399 } 400 } else { 401 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 402 "Failed opening mime database directory.\n")); 403 } 404 405 if (!err) { 406 fRuleList.sort(); 407 fMaxBytesNeeded = maxBytesNeeded; 408 fHaveDoneFullBuild = true; 409 // PrintToStream(); 410 } else { 411 DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%" 412 B_PRIx32 "\n", err)); 413 } 414 return err; 415 } 416 417 // GuessMimeType 418 /*! \brief Guesses a MIME type for the supplied chunk of data. 419 420 This is accomplished by searching through the currently installed 421 list of sniffer rules for a rule that matches on the given data buffer. 422 Rules are searched in order of priority (higher priority first). Rules 423 of equal priority are searched in reverse-alphabetical order (that way 424 "supertype/subtype" form rules are checked before "supertype-only" form 425 rules if their priorities happen to be identical). 426 427 \param file The file to sniff. May be \c NULL. \a buffer is always given. 428 \param buffer Pointer to a data buffer to sniff 429 \param length The length of the data buffer pointed to by \a buffer 430 \param type Pointer to a pre-allocated BString which is set to the 431 resulting MIME type. 432 \return 433 - \c B_OK: success 434 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 435 - error code: failure 436 */ 437 status_t 438 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length, 439 BString *type) 440 { 441 status_t err = buffer && type ? B_OK : B_BAD_VALUE; 442 if (err) 443 return err; 444 445 // wrap the buffer by a BMemoryIO 446 BMemoryIO data(buffer, length); 447 448 if (!fHaveDoneFullBuild) 449 err = BuildRuleList(); 450 451 // first ask the MIME sniffer for a suitable type 452 float addonPriority = -1; 453 BMimeType mimeType; 454 if (!err && fMimeSniffer != NULL) { 455 addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length, 456 &mimeType); 457 } 458 459 if (!err) { 460 // Run through our rule list, which is sorted in order of 461 // descreasing priority, and see if one of the rules sniffs 462 // out a match 463 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 464 i != fRuleList.end(); i++) { 465 if (i->rule) { 466 // If an add-on identified the type with a priority at least 467 // as great as the remaining rules, we can stop further 468 // processing and return the type found by the add-on. 469 if (i->rule->Priority() <= addonPriority) { 470 *type = mimeType.Type(); 471 return B_OK; 472 } 473 474 if (i->rule->Sniff(&data)) { 475 type->SetTo(i->type.c_str()); 476 return B_OK; 477 } 478 } else { 479 DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): " 480 "NULL sniffer_rule::rule member found in rule list for type == '%s', " 481 "rule_string == '%s'\n", 482 i->type.c_str(), i->rule_string.c_str())); 483 } 484 } 485 486 // The sniffer add-on manager might have returned a low priority 487 // (lower than any of a rule). 488 if (addonPriority >= 0) { 489 *type = mimeType.Type(); 490 return B_OK; 491 } 492 493 // If we get here, we didn't find a damn thing 494 err = kMimeGuessFailureError; 495 } 496 return err; 497 } 498 499 // MaxBytesNeeded 500 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for 501 all the currently installed rules to be able to perform a complete sniff, 502 or an error code if something goes wrong. 503 504 If the internal rule list has not yet been built (this includes parsing 505 all the installed rules), it will be. 506 507 \return: If the return value is non-negative, it represents the max number 508 of bytes needed to do a complete sniff. Otherwise, the number returned is 509 an error code. 510 */ 511 ssize_t 512 SnifferRules::MaxBytesNeeded() 513 { 514 ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList(); 515 if (!err) { 516 err = fMaxBytesNeeded; 517 518 if (fMimeSniffer != NULL) { 519 fMaxBytesNeeded = max_c(fMaxBytesNeeded, 520 (ssize_t)fMimeSniffer->MinimalBufferSize()); 521 } 522 } 523 return err; 524 } 525 526 // ProcessType 527 /*! \brief Handles a portion of the initial rule list construction for 528 the given mime type. 529 530 \note To be called by BuildRuleList() *ONLY*. :-) 531 532 \param type The mime type of interest. The mime string is expected to be valid 533 and lowercase. Both "supertype" and "supertype/subtype" mime types 534 are allowed. 535 \param bytesNeeded Returns the minimum number of bytes needed for this rule to 536 perform a complete sniff. May not be NULL because I'm lazy 537 and this function is for internal use only anyway. 538 \return 539 The return value is essentially ignored (as this function prints out the 540 debug warning if a parse fails), but that being said: 541 - \c B_OK: success 542 - \c other error code: failure 543 */ 544 status_t 545 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded) 546 { 547 status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE; 548 if (!err) 549 *bytesNeeded = 0; 550 551 BString str; 552 BString errorMsg; 553 sniffer_rule rule(new Sniffer::Rule()); 554 555 // Check the mem alloc 556 if (!err) 557 err = rule.rule ? B_OK : B_NO_MEMORY; 558 // Read the attr 559 if (!err) { 560 err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr, 561 str); 562 } 563 // Parse the rule 564 if (!err) { 565 err = Sniffer::parse(str.String(), rule.rule, &errorMsg); 566 if (err) 567 DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String())); 568 } 569 if (!err) { 570 // Note the bytes needed 571 *bytesNeeded = rule.rule->BytesNeeded(); 572 573 // Add the rule to the list 574 rule.type = type; 575 rule.rule_string = str.String(); 576 fRuleList.push_back(rule); 577 } 578 return err; 579 } 580 581 } // namespace Mime 582 } // namespace Storage 583 } // namespace BPrivate 584 585