1 /* 2 * Copyright 2002-2006, Haiku Inc. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Tyler Dauwalder 7 * Ingo Weinhold, bonefish@users.sf.net 8 */ 9 10 /*! 11 \file SnifferRules.cpp 12 SnifferRules class implementation 13 */ 14 15 #include <mime/SnifferRules.h> 16 17 #include <stdio.h> 18 #include <sys/stat.h> 19 20 #include <Directory.h> 21 #include <Entry.h> 22 #include <File.h> 23 #include <MimeType.h> 24 #include <mime/database_support.h> 25 #include <mime/DatabaseDirectory.h> 26 #include <mime/DatabaseLocation.h> 27 #include <mime/MimeSniffer.h> 28 #include <sniffer/Parser.h> 29 #include <sniffer/Rule.h> 30 #include <StorageDefs.h> 31 #include <storage_support.h> 32 #include <String.h> 33 34 35 #define DBG(x) x 36 //#define DBG(x) 37 #define OUT printf 38 39 namespace BPrivate { 40 namespace Storage { 41 namespace Mime { 42 43 using namespace BPrivate::Storage; 44 45 /*! 46 \struct SnifferRules::sniffer_rule 47 \brief A parsed sniffer rule and its corresponding mime type and rule string 48 49 The parse sniffer rule is stored in the \c rule member, which is a pointer 50 to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule 51 objects (as opposed to \c sniffer_rule pointers) to be used with STL objects 52 without unnecessary copying. As a consequence of this decision, the 53 \c SnifferRules object managing the rule list is responsible for actually 54 deleting each \c sniffer_rule's \c Sniffer::Rule object. 55 */ 56 57 // sniffer_rule Constructor 58 //! Creates a new \c sniffer_rule object 59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule) 60 : rule(rule) 61 { 62 } 63 64 // sniffer_rule Destructor 65 //! Destroys the \c sniffer_rule object. 66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule 67 object's \c rule member is *NOT* deleted by this function. 68 */ 69 SnifferRules::sniffer_rule::~sniffer_rule() 70 { 71 } 72 73 // private functions 74 /*! \brief Returns true if \a left's priority is greater than \a right's 75 76 This may seem slightly backwards, but since sort() using 77 operator<() sorts in ascending order, we say "left < right" 78 if "left.priority > right.priority" to get them sorted in 79 ascending order. Super, no? 80 81 Also, sniffer_rule objects with \c NULL \c rule members are 82 treated as having minimal priority (and thus are placed at 83 the end of the list of rules). 84 85 Finally, sniffer_rule objects that are otherwise equal are 86 sorted in reverse alphabetic order (thus placing sniffer 87 rules for supertypes *after* sniffer rules for subtypes 88 of said supertype when both rules have identical priorities). 89 */ 90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right) 91 { 92 if (left.rule && right.rule) { 93 double leftPriority = left.rule->Priority(); 94 double rightPriority = right.rule->Priority(); 95 if (leftPriority > rightPriority) { 96 return true; // left < right 97 } else if (rightPriority > leftPriority) { 98 return false; // right < left 99 } else { 100 return left.type > right.type; 101 } 102 } else if (left.rule) { 103 return true; // left < right 104 } else { 105 return false; // right < left 106 } 107 } 108 109 /*! 110 \class SnifferRules 111 \brief Manages the sniffer rules for the entire database 112 */ 113 114 // Constructor 115 //! Constructs a new SnifferRules object 116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation, 117 MimeSniffer* mimeSniffer) 118 : 119 fDatabaseLocation(databaseLocation), 120 fMimeSniffer(mimeSniffer), 121 fMaxBytesNeeded(0), 122 fHaveDoneFullBuild(false) 123 { 124 } 125 126 // Destructor 127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated 128 \c Sniffer::Rule objects scattered throughout the rule list in 129 \c sniffer_rule::rule members. 130 */ 131 SnifferRules::~SnifferRules() 132 { 133 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 134 i != fRuleList.end(); i++) { 135 delete i->rule; 136 i->rule = NULL; 137 } 138 } 139 140 // GuessMimeType 141 /*! \brief Guesses a MIME type for the supplied entry_ref. 142 143 Only the data in the given entry is considered, not the filename or 144 its extension. Please see GuessMimeType(BFile*, const void *, int32, 145 BString*) for more details. 146 147 \param ref The entry to sniff 148 \param type Pointer to a pre-allocated BString which is set to the 149 resulting MIME type. 150 \return 151 - \c B_OK: success 152 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 153 - error code: failure 154 */ 155 status_t 156 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type) 157 { 158 status_t err = ref && type ? B_OK : B_BAD_VALUE; 159 ssize_t bytes = 0; 160 char *buffer = NULL; 161 BFile file; 162 163 // First find out the max number of bytes we need to read 164 // from the file to fully accomodate all of our currently 165 // installed sniffer rules 166 if (!err) { 167 bytes = MaxBytesNeeded(); 168 if (bytes < 0) 169 err = bytes; 170 } 171 172 // Next read that many bytes (or fewer, if the file isn't 173 // that long) into a buffer 174 if (!err) { 175 buffer = new(std::nothrow) char[bytes]; 176 if (!buffer) 177 err = B_NO_MEMORY; 178 } 179 180 if (!err) 181 err = file.SetTo(ref, B_READ_ONLY); 182 if (!err) { 183 bytes = file.Read(buffer, bytes); 184 if (bytes < 0) 185 err = bytes; 186 } 187 188 // Now sniff the buffer 189 if (!err) 190 err = GuessMimeType(&file, buffer, bytes, type); 191 192 delete[] buffer; 193 194 return err; 195 } 196 197 // GuessMimeType 198 /*! \brief Guesses a MIME type for the given chunk of data. 199 200 Please see GuessMimeType(BFile*, const void *, int32, BString*) for more 201 details. 202 203 \param buffer Pointer to a data buffer to sniff 204 \param length The length of the data buffer pointed to by \a buffer 205 \param type Pointer to a pre-allocated BString which is set to the 206 resulting MIME type. 207 \return 208 - \c B_OK: success 209 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 210 - error code: failure 211 */ 212 status_t 213 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type) 214 { 215 return GuessMimeType(NULL, buffer, length, type); 216 } 217 218 // SetSnifferRule 219 /*! Updates the sniffer rule for the given type 220 221 If the a rule currently exists in the rule list for the given type, 222 it is first removed before the new rule is inserted. 223 224 The new rule is inserted in its proper, sorted position in the list. 225 226 \param type The type of interest 227 \param rule The new sniffer rule 228 \return 229 - \c B_OK: success 230 - other error code: failure 231 */ 232 status_t 233 SnifferRules::SetSnifferRule(const char *type, const char *rule) 234 { 235 status_t err = type && rule ? B_OK : B_BAD_VALUE; 236 if (!err && !fHaveDoneFullBuild) 237 return B_OK; 238 239 sniffer_rule item(new Sniffer::Rule()); 240 BString parseError; 241 242 // Check the mem alloc 243 if (!err) 244 err = item.rule ? B_OK : B_NO_MEMORY; 245 // Prepare the sniffer_rule 246 if (!err) { 247 item.type = type; 248 item.rule_string = rule; 249 err = Sniffer::parse(rule, item.rule, &parseError); 250 if (err) 251 DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n", 252 parseError.String())); 253 } 254 // Remove any previous rule for this type 255 if (!err) 256 err = DeleteSnifferRule(type); 257 // Insert the new rule at the proper position in 258 // the sorted rule list (remembering that our list 259 // is sorted in ascending order using 260 // operator<(sniffer_rule&, sniffer_rule&)) 261 if (!err) { 262 std::list<sniffer_rule>::iterator i; 263 for (i = fRuleList.begin(); i != fRuleList.end(); i++) { 264 if (item < (*i)) { 265 fRuleList.insert(i, item); 266 break; 267 } 268 } 269 if (i == fRuleList.end()) 270 fRuleList.push_back(item); 271 } 272 273 return err; 274 } 275 276 // DeleteSnifferRule 277 /*! \brief Removes the sniffer rule for the given type from the rule list 278 \param type The type of interest 279 \return 280 - \c B_OK: success (even if no rule existed for the given type) 281 - other error code: failure 282 */ 283 status_t 284 SnifferRules::DeleteSnifferRule(const char *type) 285 { 286 status_t err = type ? B_OK : B_BAD_VALUE; 287 if (!err && !fHaveDoneFullBuild) 288 return B_OK; 289 290 // Find the rule in the list and remove it 291 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 292 i != fRuleList.end(); i++) { 293 if (i->type == type) { 294 fRuleList.erase(i); 295 break; 296 } 297 } 298 299 return err; 300 } 301 302 // PrintToStream 303 //! Dumps the list of sniffer rules in sorted order to standard output 304 void 305 SnifferRules::PrintToStream() const 306 { 307 printf("\n"); 308 printf("--------------\n"); 309 printf("Sniffer Rules:\n"); 310 printf("--------------\n"); 311 312 if (fHaveDoneFullBuild) { 313 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 314 i != fRuleList.end(); i++) { 315 printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str()); 316 } 317 } else { 318 printf("You haven't built your rule list yet, chump. ;-)\n"); 319 } 320 } 321 322 // BuildRuleList 323 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds 324 each parsed rule to the rule list, and sorts the list by priority, largest first. 325 326 Initial MaxBytesNeeded() info is compiled by this function as well. 327 */ 328 status_t 329 SnifferRules::BuildRuleList() 330 { 331 fRuleList.clear(); 332 333 ssize_t maxBytesNeeded = 0; 334 ssize_t bytesNeeded = 0; 335 DatabaseDirectory root; 336 337 status_t err = root.Init(fDatabaseLocation); 338 if (!err) { 339 root.Rewind(); 340 while (true) { 341 BEntry entry; 342 err = root.GetNextEntry(&entry); 343 if (err) { 344 // If we've come to the end of list, it's not an error 345 if (err == B_ENTRY_NOT_FOUND) 346 err = B_OK; 347 break; 348 } else { 349 // Check that this entry is both a directory and a valid MIME string 350 char supertype[B_PATH_NAME_LENGTH]; 351 if (entry.IsDirectory() 352 && entry.GetName(supertype) == B_OK 353 && BMimeType::IsValid(supertype)) { 354 // Make sure the supertype string is all lowercase 355 BPrivate::Storage::to_lower(supertype); 356 357 // First, iterate through this supertype directory and process 358 // all of its subtypes 359 DatabaseDirectory dir; 360 if (dir.Init(fDatabaseLocation, supertype) == B_OK) { 361 dir.Rewind(); 362 while (true) { 363 BEntry subEntry; 364 err = dir.GetNextEntry(&subEntry); 365 if (err) { 366 // If we've come to the end of list, it's not an error 367 if (err == B_ENTRY_NOT_FOUND) 368 err = B_OK; 369 break; 370 } else { 371 // Get the subtype's name 372 char subtype[B_PATH_NAME_LENGTH]; 373 if (subEntry.GetName(subtype) == B_OK) { 374 BPrivate::Storage::to_lower(subtype); 375 376 char fulltype[B_PATH_NAME_LENGTH]; 377 sprintf(fulltype, "%s/%s", supertype, subtype); 378 379 // Process the subtype 380 ProcessType(fulltype, &bytesNeeded); 381 if (bytesNeeded > maxBytesNeeded) 382 maxBytesNeeded = bytesNeeded; 383 } 384 } 385 } 386 } else { 387 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 388 "Failed opening supertype directory '%s'\n", 389 supertype)); 390 } 391 392 // Second, process the supertype 393 ProcessType(supertype, &bytesNeeded); 394 if (bytesNeeded > maxBytesNeeded) 395 maxBytesNeeded = bytesNeeded; 396 } 397 } 398 } 399 } else { 400 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 401 "Failed opening mime database directory.\n")); 402 } 403 404 if (!err) { 405 fRuleList.sort(); 406 fMaxBytesNeeded = maxBytesNeeded; 407 fHaveDoneFullBuild = true; 408 // PrintToStream(); 409 } else { 410 DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%" 411 B_PRIx32 "\n", err)); 412 } 413 return err; 414 } 415 416 // GuessMimeType 417 /*! \brief Guesses a MIME type for the supplied chunk of data. 418 419 This is accomplished by searching through the currently installed 420 list of sniffer rules for a rule that matches on the given data buffer. 421 Rules are searched in order of priority (higher priority first). Rules 422 of equal priority are searched in reverse-alphabetical order (that way 423 "supertype/subtype" form rules are checked before "supertype-only" form 424 rules if their priorities happen to be identical). 425 426 \param file The file to sniff. May be \c NULL. \a buffer is always given. 427 \param buffer Pointer to a data buffer to sniff 428 \param length The length of the data buffer pointed to by \a buffer 429 \param type Pointer to a pre-allocated BString which is set to the 430 resulting MIME type. 431 \return 432 - \c B_OK: success 433 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 434 - error code: failure 435 */ 436 status_t 437 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length, 438 BString *type) 439 { 440 status_t err = buffer && type ? B_OK : B_BAD_VALUE; 441 if (err) 442 return err; 443 444 // wrap the buffer by a BMemoryIO 445 BMemoryIO data(buffer, length); 446 447 if (!fHaveDoneFullBuild) 448 err = BuildRuleList(); 449 450 // first ask the MIME sniffer for a suitable type 451 float addonPriority = -1; 452 BMimeType mimeType; 453 if (!err && fMimeSniffer != NULL) { 454 addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length, 455 &mimeType); 456 } 457 458 if (!err) { 459 // Run through our rule list, which is sorted in order of 460 // descreasing priority, and see if one of the rules sniffs 461 // out a match 462 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 463 i != fRuleList.end(); i++) { 464 if (i->rule) { 465 // If an add-on identified the type with a priority at least 466 // as great as the remaining rules, we can stop further 467 // processing and return the type found by the add-on. 468 if (i->rule->Priority() <= addonPriority) { 469 *type = mimeType.Type(); 470 return B_OK; 471 } 472 473 if (i->rule->Sniff(&data)) { 474 type->SetTo(i->type.c_str()); 475 return B_OK; 476 } 477 } else { 478 DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): " 479 "NULL sniffer_rule::rule member found in rule list for type == '%s', " 480 "rule_string == '%s'\n", 481 i->type.c_str(), i->rule_string.c_str())); 482 } 483 } 484 485 // The sniffer add-on manager might have returned a low priority 486 // (lower than any of a rule). 487 if (addonPriority >= 0) { 488 *type = mimeType.Type(); 489 return B_OK; 490 } 491 492 // If we get here, we didn't find a damn thing 493 err = kMimeGuessFailureError; 494 } 495 return err; 496 } 497 498 // MaxBytesNeeded 499 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for 500 all the currently installed rules to be able to perform a complete sniff, 501 or an error code if something goes wrong. 502 503 If the internal rule list has not yet been built (this includes parsing 504 all the installed rules), it will be. 505 506 \return: If the return value is non-negative, it represents the max number 507 of bytes needed to do a complete sniff. Otherwise, the number returned is 508 an error code. 509 */ 510 ssize_t 511 SnifferRules::MaxBytesNeeded() 512 { 513 ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList(); 514 if (!err) { 515 err = fMaxBytesNeeded; 516 517 if (fMimeSniffer != NULL) { 518 fMaxBytesNeeded = max_c(fMaxBytesNeeded, 519 (ssize_t)fMimeSniffer->MinimalBufferSize()); 520 } 521 } 522 return err; 523 } 524 525 // ProcessType 526 /*! \brief Handles a portion of the initial rule list construction for 527 the given mime type. 528 529 \note To be called by BuildRuleList() *ONLY*. :-) 530 531 \param type The mime type of interest. The mime string is expected to be valid 532 and lowercase. Both "supertype" and "supertype/subtype" mime types 533 are allowed. 534 \param bytesNeeded Returns the minimum number of bytes needed for this rule to 535 perform a complete sniff. May not be NULL because I'm lazy 536 and this function is for internal use only anyway. 537 \return 538 The return value is essentially ignored (as this function prints out the 539 debug warning if a parse fails), but that being said: 540 - \c B_OK: success 541 - \c other error code: failure 542 */ 543 status_t 544 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded) 545 { 546 status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE; 547 if (!err) 548 *bytesNeeded = 0; 549 550 BString str; 551 BString errorMsg; 552 sniffer_rule rule(new Sniffer::Rule()); 553 554 // Check the mem alloc 555 if (!err) 556 err = rule.rule ? B_OK : B_NO_MEMORY; 557 // Read the attr 558 if (!err) { 559 err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr, 560 str); 561 } 562 // Parse the rule 563 if (!err) { 564 err = Sniffer::parse(str.String(), rule.rule, &errorMsg); 565 if (err) 566 DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String())); 567 } 568 if (!err) { 569 // Note the bytes needed 570 *bytesNeeded = rule.rule->BytesNeeded(); 571 572 // Add the rule to the list 573 rule.type = type; 574 rule.rule_string = str.String(); 575 fRuleList.push_back(rule); 576 } 577 return err; 578 } 579 580 } // namespace Mime 581 } // namespace Storage 582 } // namespace BPrivate 583 584