1 /* 2 * Copyright 2002-2006, Haiku Inc. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Tyler Dauwalder 7 * Ingo Weinhold, bonefish@users.sf.net 8 */ 9 10 /*! 11 \file SnifferRules.cpp 12 SnifferRules class implementation 13 */ 14 15 #include <mime/SnifferRules.h> 16 17 #include <stdio.h> 18 #include <sys/stat.h> 19 20 #include <Directory.h> 21 #include <Entry.h> 22 #include <File.h> 23 #include <MimeType.h> 24 #include <mime/database_support.h> 25 #include <mime/DatabaseDirectory.h> 26 #include <mime/DatabaseLocation.h> 27 #include <mime/MimeSniffer.h> 28 #include <sniffer/Parser.h> 29 #include <sniffer/Rule.h> 30 #include <StorageDefs.h> 31 #include <storage_support.h> 32 #include <String.h> 33 34 35 #define DBG(x) x 36 //#define DBG(x) 37 #define OUT printf 38 39 namespace BPrivate { 40 namespace Storage { 41 namespace Mime { 42 43 using namespace BPrivate::Storage; 44 45 /*! 46 \struct SnifferRules::sniffer_rule 47 \brief A parsed sniffer rule and its corresponding mime type and rule string 48 49 The parse sniffer rule is stored in the \c rule member, which is a pointer 50 to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule 51 objects (as opposed to \c sniffer_rule pointers) to be used with STL objects 52 without unnecessary copying. As a consequence of this decision, the 53 \c SnifferRules object managing the rule list is responsible for actually 54 deleting each \c sniffer_rule's \c Sniffer::Rule object. 55 */ 56 57 // sniffer_rule Constructor 58 //! Creates a new \c sniffer_rule object 59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule) 60 : rule(rule) 61 { 62 } 63 64 // sniffer_rule Destructor 65 //! Destroys the \c sniffer_rule object. 66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule 67 object's \c rule member is *NOT* deleted by this function. 68 */ 69 SnifferRules::sniffer_rule::~sniffer_rule() 70 { 71 } 72 73 // private functions 74 /*! \brief Returns true if \a left's priority is greater than \a right's 75 76 This may seem slightly backwards, but since sort() using 77 operator<() sorts in ascending order, we say "left < right" 78 if "left.priority > right.priority" to get them sorted in 79 ascending order. Super, no? 80 81 Also, sniffer_rule objects with \c NULL \c rule members are 82 treated as having minimal priority (and thus are placed at 83 the end of the list of rules). 84 85 Finally, sniffer_rule objects that are otherwise equal are 86 sorted in reverse alphabetic order (thus placing sniffer 87 rules for supertypes *after* sniffer rules for subtypes 88 of said supertype when both rules have identical priorities). 89 */ 90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right) 91 { 92 if (left.rule && right.rule) { 93 double leftPriority = left.rule->Priority(); 94 double rightPriority = right.rule->Priority(); 95 if (leftPriority > rightPriority) { 96 return true; // left < right 97 } else if (rightPriority > leftPriority) { 98 return false; // right < left 99 } else { 100 return left.type > right.type; 101 } 102 } else if (left.rule) { 103 return true; // left < right 104 } else { 105 return false; // right < left 106 } 107 } 108 109 /*! 110 \class SnifferRules 111 \brief Manages the sniffer rules for the entire database 112 */ 113 114 // Constructor 115 //! Constructs a new SnifferRules object 116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation, 117 MimeSniffer* mimeSniffer) 118 : 119 fDatabaseLocation(databaseLocation), 120 fMimeSniffer(mimeSniffer), 121 fMaxBytesNeeded(0), 122 fHaveDoneFullBuild(false) 123 { 124 } 125 126 // Destructor 127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated 128 \c Sniffer::Rule objects scattered throughout the rule list in 129 \c sniffer_rule::rule members. 130 */ 131 SnifferRules::~SnifferRules() 132 { 133 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 134 i != fRuleList.end(); 135 i++) 136 { 137 delete i->rule; 138 i->rule = NULL; 139 } 140 } 141 142 // GuessMimeType 143 /*! \brief Guesses a MIME type for the supplied entry_ref. 144 145 Only the data in the given entry is considered, not the filename or 146 its extension. Please see GuessMimeType(BFile*, const void *, int32, 147 BString*) for more details. 148 149 \param ref The entry to sniff 150 \param type Pointer to a pre-allocated BString which is set to the 151 resulting MIME type. 152 \return 153 - \c B_OK: success 154 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 155 - error code: failure 156 */ 157 status_t 158 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type) 159 { 160 status_t err = ref && type ? B_OK : B_BAD_VALUE; 161 ssize_t bytes = 0; 162 char *buffer = NULL; 163 BFile file; 164 165 // First find out the max number of bytes we need to read 166 // from the file to fully accomodate all of our currently 167 // installed sniffer rules 168 if (!err) { 169 bytes = MaxBytesNeeded(); 170 if (bytes < 0) 171 err = bytes; 172 } 173 174 // Next read that many bytes (or fewer, if the file isn't 175 // that long) into a buffer 176 if (!err) { 177 buffer = new(std::nothrow) char[bytes]; 178 if (!buffer) 179 err = B_NO_MEMORY; 180 } 181 182 if (!err) 183 err = file.SetTo(ref, B_READ_ONLY); 184 if (!err) { 185 bytes = file.Read(buffer, bytes); 186 if (bytes < 0) 187 err = bytes; 188 } 189 190 // Now sniff the buffer 191 if (!err) 192 err = GuessMimeType(&file, buffer, bytes, type); 193 194 delete[] buffer; 195 196 return err; 197 } 198 199 // GuessMimeType 200 /*! \brief Guesses a MIME type for the given chunk of data. 201 202 Please see GuessMimeType(BFile*, const void *, int32, BString*) for more 203 details. 204 205 \param buffer Pointer to a data buffer to sniff 206 \param length The length of the data buffer pointed to by \a buffer 207 \param type Pointer to a pre-allocated BString which is set to the 208 resulting MIME type. 209 \return 210 - \c B_OK: success 211 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 212 - error code: failure 213 */ 214 status_t 215 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type) 216 { 217 return GuessMimeType(NULL, buffer, length, type); 218 } 219 220 // SetSnifferRule 221 /*! Updates the sniffer rule for the given type 222 223 If the a rule currently exists in the rule list for the given type, 224 it is first removed before the new rule is inserted. 225 226 The new rule is inserted in its proper, sorted position in the list. 227 228 \param type The type of interest 229 \param rule The new sniffer rule 230 \return 231 - \c B_OK: success 232 - other error code: failure 233 */ 234 status_t 235 SnifferRules::SetSnifferRule(const char *type, const char *rule) 236 { 237 status_t err = type && rule ? B_OK : B_BAD_VALUE; 238 if (!err && !fHaveDoneFullBuild) 239 return B_OK; 240 241 sniffer_rule item(new Sniffer::Rule()); 242 BString parseError; 243 244 // Check the mem alloc 245 if (!err) 246 err = item.rule ? B_OK : B_NO_MEMORY; 247 // Prepare the sniffer_rule 248 if (!err) { 249 item.type = type; 250 item.rule_string = rule; 251 err = Sniffer::parse(rule, item.rule, &parseError); 252 if (err) 253 DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n", 254 parseError.String())); 255 } 256 // Remove any previous rule for this type 257 if (!err) 258 err = DeleteSnifferRule(type); 259 // Insert the new rule at the proper position in 260 // the sorted rule list (remembering that our list 261 // is sorted in ascending order using 262 // operator<(sniffer_rule&, sniffer_rule&)) 263 if (!err) { 264 std::list<sniffer_rule>::iterator i; 265 for (i = fRuleList.begin(); i != fRuleList.end(); i++) 266 { 267 if (item < (*i)) { 268 fRuleList.insert(i, item); 269 break; 270 } 271 } 272 if (i == fRuleList.end()) 273 fRuleList.push_back(item); 274 } 275 276 return err; 277 } 278 279 // DeleteSnifferRule 280 /*! \brief Removes the sniffer rule for the given type from the rule list 281 \param type The type of interest 282 \return 283 - \c B_OK: success (even if no rule existed for the given type) 284 - other error code: failure 285 */ 286 status_t 287 SnifferRules::DeleteSnifferRule(const char *type) 288 { 289 status_t err = type ? B_OK : B_BAD_VALUE; 290 if (!err && !fHaveDoneFullBuild) 291 return B_OK; 292 293 // Find the rule in the list and remove it 294 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 295 i != fRuleList.end(); 296 i++) 297 { 298 if (i->type == type) { 299 fRuleList.erase(i); 300 break; 301 } 302 } 303 304 return err; 305 } 306 307 // PrintToStream 308 //! Dumps the list of sniffer rules in sorted order to standard output 309 void 310 SnifferRules::PrintToStream() const 311 { 312 printf("\n"); 313 printf("--------------\n"); 314 printf("Sniffer Rules:\n"); 315 printf("--------------\n"); 316 317 if (fHaveDoneFullBuild) { 318 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 319 i != fRuleList.end(); 320 i++) 321 { 322 printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str()); 323 } 324 } else { 325 printf("You haven't built your rule list yet, chump. ;-)\n"); 326 } 327 } 328 329 // BuildRuleList 330 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds 331 each parsed rule to the rule list, and sorts the list by priority, largest first. 332 333 Initial MaxBytesNeeded() info is compiled by this function as well. 334 */ 335 status_t 336 SnifferRules::BuildRuleList() 337 { 338 fRuleList.clear(); 339 340 ssize_t maxBytesNeeded = 0; 341 ssize_t bytesNeeded = 0; 342 DatabaseDirectory root; 343 344 status_t err = root.Init(fDatabaseLocation); 345 if (!err) { 346 root.Rewind(); 347 while (true) { 348 BEntry entry; 349 err = root.GetNextEntry(&entry); 350 if (err) { 351 // If we've come to the end of list, it's not an error 352 if (err == B_ENTRY_NOT_FOUND) 353 err = B_OK; 354 break; 355 } else { 356 // Check that this entry is both a directory and a valid MIME string 357 char supertype[B_PATH_NAME_LENGTH]; 358 if (entry.IsDirectory() 359 && entry.GetName(supertype) == B_OK 360 && BMimeType::IsValid(supertype)) 361 { 362 // Make sure the supertype string is all lowercase 363 BPrivate::Storage::to_lower(supertype); 364 365 // First, iterate through this supertype directory and process 366 // all of its subtypes 367 DatabaseDirectory dir; 368 if (dir.Init(fDatabaseLocation, supertype) == B_OK) { 369 dir.Rewind(); 370 while (true) { 371 BEntry subEntry; 372 err = dir.GetNextEntry(&subEntry); 373 if (err) { 374 // If we've come to the end of list, it's not an error 375 if (err == B_ENTRY_NOT_FOUND) 376 err = B_OK; 377 break; 378 } else { 379 // Get the subtype's name 380 char subtype[B_PATH_NAME_LENGTH]; 381 if (subEntry.GetName(subtype) == B_OK) { 382 BPrivate::Storage::to_lower(subtype); 383 384 char fulltype[B_PATH_NAME_LENGTH]; 385 sprintf(fulltype, "%s/%s", supertype, subtype); 386 387 // Process the subtype 388 ProcessType(fulltype, &bytesNeeded); 389 if (bytesNeeded > maxBytesNeeded) 390 maxBytesNeeded = bytesNeeded; 391 } 392 } 393 } 394 } else { 395 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 396 "Failed opening supertype directory '%s'\n", 397 supertype)); 398 } 399 400 // Second, process the supertype 401 ProcessType(supertype, &bytesNeeded); 402 if (bytesNeeded > maxBytesNeeded) 403 maxBytesNeeded = bytesNeeded; 404 } 405 } 406 } 407 } else { 408 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 409 "Failed opening mime database directory.\n")); 410 } 411 412 if (!err) { 413 fRuleList.sort(); 414 fMaxBytesNeeded = maxBytesNeeded; 415 fHaveDoneFullBuild = true; 416 // PrintToStream(); 417 } else { 418 DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%" 419 B_PRIx32 "\n", err)); 420 } 421 return err; 422 } 423 424 // GuessMimeType 425 /*! \brief Guesses a MIME type for the supplied chunk of data. 426 427 This is accomplished by searching through the currently installed 428 list of sniffer rules for a rule that matches on the given data buffer. 429 Rules are searched in order of priority (higher priority first). Rules 430 of equal priority are searched in reverse-alphabetical order (that way 431 "supertype/subtype" form rules are checked before "supertype-only" form 432 rules if their priorities happen to be identical). 433 434 \param file The file to sniff. May be \c NULL. \a buffer is always given. 435 \param buffer Pointer to a data buffer to sniff 436 \param length The length of the data buffer pointed to by \a buffer 437 \param type Pointer to a pre-allocated BString which is set to the 438 resulting MIME type. 439 \return 440 - \c B_OK: success 441 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 442 - error code: failure 443 */ 444 status_t 445 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length, 446 BString *type) 447 { 448 status_t err = buffer && type ? B_OK : B_BAD_VALUE; 449 if (err) 450 return err; 451 452 // wrap the buffer by a BMemoryIO 453 BMemoryIO data(buffer, length); 454 455 if (!fHaveDoneFullBuild) 456 err = BuildRuleList(); 457 458 // first ask the MIME sniffer for a suitable type 459 float addonPriority = -1; 460 BMimeType mimeType; 461 if (!err && fMimeSniffer != NULL) { 462 addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length, 463 &mimeType); 464 } 465 466 if (!err) { 467 // Run through our rule list, which is sorted in order of 468 // descreasing priority, and see if one of the rules sniffs 469 // out a match 470 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 471 i != fRuleList.end(); 472 i++) 473 { 474 if (i->rule) { 475 // If an add-on identified the type with a priority at least 476 // as great as the remaining rules, we can stop further 477 // processing and return the type found by the add-on. 478 if (i->rule->Priority() <= addonPriority) { 479 *type = mimeType.Type(); 480 return B_OK; 481 } 482 483 if (i->rule->Sniff(&data)) { 484 type->SetTo(i->type.c_str()); 485 return B_OK; 486 } 487 } else { 488 DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): " 489 "NULL sniffer_rule::rule member found in rule list for type == '%s', " 490 "rule_string == '%s'\n", 491 i->type.c_str(), i->rule_string.c_str())); 492 } 493 } 494 495 // The sniffer add-on manager might have returned a low priority 496 // (lower than any of a rule). 497 if (addonPriority >= 0) { 498 *type = mimeType.Type(); 499 return B_OK; 500 } 501 502 // If we get here, we didn't find a damn thing 503 err = kMimeGuessFailureError; 504 } 505 return err; 506 } 507 508 // MaxBytesNeeded 509 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for 510 all the currently installed rules to be able to perform a complete sniff, 511 or an error code if something goes wrong. 512 513 If the internal rule list has not yet been built (this includes parsing 514 all the installed rules), it will be. 515 516 \return: If the return value is non-negative, it represents the max number 517 of bytes needed to do a complete sniff. Otherwise, the number returned is 518 an error code. 519 */ 520 ssize_t 521 SnifferRules::MaxBytesNeeded() 522 { 523 ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList(); 524 if (!err) { 525 err = fMaxBytesNeeded; 526 527 if (fMimeSniffer != NULL) { 528 fMaxBytesNeeded = max_c(fMaxBytesNeeded, 529 (ssize_t)fMimeSniffer->MinimalBufferSize()); 530 } 531 } 532 return err; 533 } 534 535 // ProcessType 536 /*! \brief Handles a portion of the initial rule list construction for 537 the given mime type. 538 539 \note To be called by BuildRuleList() *ONLY*. :-) 540 541 \param type The mime type of interest. The mime string is expected to be valid 542 and lowercase. Both "supertype" and "supertype/subtype" mime types 543 are allowed. 544 \param bytesNeeded Returns the minimum number of bytes needed for this rule to 545 perform a complete sniff. May not be NULL because I'm lazy 546 and this function is for internal use only anyway. 547 \return 548 The return value is essentially ignored (as this function prints out the 549 debug warning if a parse fails), but that being said: 550 - \c B_OK: success 551 - \c other error code: failure 552 */ 553 status_t 554 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded) 555 { 556 status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE; 557 if (!err) 558 *bytesNeeded = 0; 559 560 BString str; 561 BString errorMsg; 562 sniffer_rule rule(new Sniffer::Rule()); 563 564 // Check the mem alloc 565 if (!err) 566 err = rule.rule ? B_OK : B_NO_MEMORY; 567 // Read the attr 568 if (!err) { 569 err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr, 570 str); 571 } 572 // Parse the rule 573 if (!err) { 574 err = Sniffer::parse(str.String(), rule.rule, &errorMsg); 575 if (err) 576 DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String())); 577 } 578 if (!err) { 579 // Note the bytes needed 580 *bytesNeeded = rule.rule->BytesNeeded(); 581 582 // Add the rule to the list 583 rule.type = type; 584 rule.rule_string = str.String(); 585 fRuleList.push_back(rule); 586 } 587 return err; 588 } 589 590 } // namespace Mime 591 } // namespace Storage 592 } // namespace BPrivate 593 594