1 /* 2 * Copyright 2002-2006, Haiku Inc. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Tyler Dauwalder 7 * Ingo Weinhold, bonefish@users.sf.net 8 */ 9 10 /*! 11 \file SnifferRules.cpp 12 SnifferRules class implementation 13 */ 14 15 #include "mime/SnifferRules.h" 16 17 #include <Directory.h> 18 #include <Entry.h> 19 #include <File.h> 20 #include <MimeType.h> 21 #include <mime/database_support.h> 22 #include <mime/MimeSnifferAddonManager.h> 23 #include <sniffer/Parser.h> 24 #include <sniffer/Rule.h> 25 #include <StorageDefs.h> 26 #include <storage_support.h> 27 #include <String.h> 28 29 #include <stdio.h> 30 #include <sys/stat.h> 31 32 #define DBG(x) x 33 //#define DBG(x) 34 #define OUT printf 35 36 namespace BPrivate { 37 namespace Storage { 38 namespace Mime { 39 40 using namespace BPrivate::Storage; 41 42 /*! 43 \struct SnifferRules::sniffer_rule 44 \brief A parsed sniffer rule and its corresponding mime type and rule string 45 46 The parse sniffer rule is stored in the \c rule member, which is a pointer 47 to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule 48 objects (as opposed to \c sniffer_rule pointers) to be used with STL objects 49 without unnecessary copying. As a consequence of this decision, the 50 \c SnifferRules object managing the rule list is responsible for actually 51 deleting each \c sniffer_rule's \c Sniffer::Rule object. 52 */ 53 54 // sniffer_rule Constructor 55 //! Creates a new \c sniffer_rule object 56 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule) 57 : rule(rule) 58 { 59 } 60 61 // sniffer_rule Destructor 62 //! Destroys the \c sniffer_rule object. 63 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule 64 object's \c rule member is *NOT* deleted by this function. 65 */ 66 SnifferRules::sniffer_rule::~sniffer_rule() 67 { 68 } 69 70 // private functions 71 /*! \brief Returns true if \a left's priority is greater than \a right's 72 73 This may seem slightly backwards, but since sort() using 74 operator<() sorts in ascending order, we say "left < right" 75 if "left.priority > right.priority" to get them sorted in 76 ascending order. Super, no? 77 78 Also, sniffer_rule objects with \c NULL \c rule members are 79 treated as having minimal priority (and thus are placed at 80 the end of the list of rules). 81 82 Finally, sniffer_rule objects that are otherwise equal are 83 sorted in reverse alphabetic order (thus placing sniffer 84 rules for supertypes *after* sniffer rules for subtypes 85 of said supertype when both rules have identical priorities). 86 */ 87 bool operator<(SnifferRules::sniffer_rule &left, SnifferRules::sniffer_rule &right) 88 { 89 if (left.rule && right.rule) { 90 double leftPriority = left.rule->Priority(); 91 double rightPriority = right.rule->Priority(); 92 if (leftPriority > rightPriority) { 93 return true; // left < right 94 } else if (rightPriority > leftPriority) { 95 return false; // right < left 96 } else { 97 return left.type > right.type; 98 } 99 } else if (left.rule) { 100 return true; // left < right 101 } else { 102 return false; // right < left 103 } 104 } 105 106 /*! 107 \class SnifferRules 108 \brief Manages the sniffer rules for the entire database 109 */ 110 111 // Constructor 112 //! Constructs a new SnifferRules object 113 SnifferRules::SnifferRules() 114 : fHaveDoneFullBuild(false) 115 { 116 } 117 118 // Destructor 119 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated 120 \c Sniffer::Rule objects scattered throughout the rule list in 121 \c sniffer_rule::rule members. 122 */ 123 SnifferRules::~SnifferRules() 124 { 125 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 126 i != fRuleList.end(); 127 i++) 128 { 129 delete i->rule; 130 i->rule = NULL; 131 } 132 } 133 134 // GuessMimeType 135 /*! \brief Guesses a MIME type for the supplied entry_ref. 136 137 Only the data in the given entry is considered, not the filename or 138 its extension. Please see GuessMimeType(BFile*, const void *, int32, 139 BString*) for more details. 140 141 \param ref The entry to sniff 142 \param type Pointer to a pre-allocated BString which is set to the 143 resulting MIME type. 144 \return 145 - \c B_OK: success 146 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 147 - error code: failure 148 */ 149 status_t 150 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type) 151 { 152 status_t err = ref && type ? B_OK : B_BAD_VALUE; 153 ssize_t bytes = 0; 154 char *buffer = NULL; 155 BFile file; 156 157 // First find out the max number of bytes we need to read 158 // from the file to fully accomodate all of our currently 159 // installed sniffer rules 160 if (!err) { 161 bytes = MaxBytesNeeded(); 162 if (bytes < 0) 163 err = bytes; 164 } 165 166 // Next read that many bytes (or fewer, if the file isn't 167 // that long) into a buffer 168 if (!err) { 169 buffer = new(std::nothrow) char[bytes]; 170 if (!buffer) 171 err = B_NO_MEMORY; 172 } 173 if (!err) 174 err = file.SetTo(ref, B_READ_ONLY); 175 if (!err) { 176 bytes = file.Read(buffer, bytes); 177 if (bytes < 0) 178 err = bytes; 179 } 180 181 // Now sniff the buffer 182 if (!err) 183 err = GuessMimeType(&file, buffer, bytes, type); 184 185 return err; 186 } 187 188 // GuessMimeType 189 /*! \brief Guesses a MIME type for the given chunk of data. 190 191 Please see GuessMimeType(BFile*, const void *, int32, BString*) for more 192 details. 193 194 \param buffer Pointer to a data buffer to sniff 195 \param length The length of the data buffer pointed to by \a buffer 196 \param type Pointer to a pre-allocated BString which is set to the 197 resulting MIME type. 198 \return 199 - \c B_OK: success 200 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 201 - error code: failure 202 */ 203 status_t 204 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type) 205 { 206 return GuessMimeType(NULL, buffer, length, type); 207 } 208 209 // SetSnifferRule 210 /*! Updates the sniffer rule for the given type 211 212 If the a rule currently exists in the rule list for the given type, 213 it is first removed before the new rule is inserted. 214 215 The new rule is inserted in its proper, sorted position in the list. 216 217 \param type The type of interest 218 \param rule The new sniffer rule 219 \return 220 - \c B_OK: success 221 - other error code: failure 222 */ 223 status_t 224 SnifferRules::SetSnifferRule(const char *type, const char *rule) 225 { 226 status_t err = type && rule ? B_OK : B_BAD_VALUE; 227 if (!err && !fHaveDoneFullBuild) 228 return B_OK; 229 230 sniffer_rule item(new Sniffer::Rule()); 231 BString parseError; 232 233 // Check the mem alloc 234 if (!err) 235 err = item.rule ? B_OK : B_NO_MEMORY; 236 // Prepare the sniffer_rule 237 if (!err) { 238 item.type = type; 239 item.rule_string = rule; 240 err = Sniffer::parse(rule, item.rule, &parseError); 241 if (err) 242 DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n", 243 parseError.String())); 244 } 245 // Remove any previous rule for this type 246 if (!err) 247 err = DeleteSnifferRule(type); 248 // Insert the new rule at the proper position in 249 // the sorted rule list (remembering that our list 250 // is sorted in ascending order using 251 // operator<(sniffer_rule&, sniffer_rule&)) 252 if (!err) { 253 std::list<sniffer_rule>::iterator i; 254 for (i = fRuleList.begin(); i != fRuleList.end(); i++) 255 { 256 if (item < (*i)) { 257 fRuleList.insert(i, item); 258 break; 259 } 260 } 261 if (i == fRuleList.end()) 262 fRuleList.push_back(item); 263 } 264 265 return err; 266 } 267 268 // DeleteSnifferRule 269 /*! \brief Removes the sniffer rule for the given type from the rule list 270 \param type The type of interest 271 \return 272 - \c B_OK: success (even if no rule existed for the given type) 273 - other error code: failure 274 */ 275 status_t 276 SnifferRules::DeleteSnifferRule(const char *type) 277 { 278 status_t err = type ? B_OK : B_BAD_VALUE; 279 if (!err && !fHaveDoneFullBuild) 280 return B_OK; 281 282 // Find the rule in the list and remove it 283 for (std::list<sniffer_rule>::iterator i = fRuleList.begin(); 284 i != fRuleList.end(); 285 i++) 286 { 287 if (i->type == type) { 288 fRuleList.erase(i); 289 break; 290 } 291 } 292 293 return err; 294 } 295 296 // PrintToStream 297 //! Dumps the list of sniffer rules in sorted order to standard output 298 void 299 SnifferRules::PrintToStream() const 300 { 301 printf("\n"); 302 printf("--------------\n"); 303 printf("Sniffer Rules:\n"); 304 printf("--------------\n"); 305 306 if (fHaveDoneFullBuild) { 307 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 308 i != fRuleList.end(); 309 i++) 310 { 311 printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str()); 312 } 313 } else { 314 printf("You haven't built your rule list yet, chump. ;-)\n"); 315 } 316 } 317 318 // BuildRuleList 319 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds 320 each parsed rule to the rule list, and sorts the list by priority, largest first. 321 322 Initial MaxBytesNeeded() info is compiled by this function as well. 323 */ 324 status_t 325 SnifferRules::BuildRuleList() 326 { 327 fRuleList.clear(); 328 329 ssize_t maxBytesNeeded = 0; 330 ssize_t bytesNeeded = 0; 331 BDirectory root; 332 333 status_t err = root.SetTo(kDatabaseDir.c_str()); 334 if (!err) { 335 root.Rewind(); 336 while (true) { 337 BEntry entry; 338 err = root.GetNextEntry(&entry); 339 if (err) { 340 // If we've come to the end of list, it's not an error 341 if (err == B_ENTRY_NOT_FOUND) 342 err = B_OK; 343 break; 344 } else { 345 // Check that this entry is both a directory and a valid MIME string 346 char supertype[B_PATH_NAME_LENGTH]; 347 if (entry.IsDirectory() 348 && entry.GetName(supertype) == B_OK 349 && BMimeType::IsValid(supertype)) 350 { 351 // Make sure the supertype string is all lowercase 352 BPrivate::Storage::to_lower(supertype); 353 354 // First, iterate through this supertype directory and process 355 // all of its subtypes 356 BDirectory dir; 357 if (dir.SetTo(&entry) == B_OK) { 358 dir.Rewind(); 359 while (true) { 360 BEntry subEntry; 361 err = dir.GetNextEntry(&subEntry); 362 if (err) { 363 // If we've come to the end of list, it's not an error 364 if (err == B_ENTRY_NOT_FOUND) 365 err = B_OK; 366 break; 367 } else { 368 // Get the subtype's name 369 char subtype[B_PATH_NAME_LENGTH]; 370 if (subEntry.GetName(subtype) == B_OK) { 371 BPrivate::Storage::to_lower(subtype); 372 373 char fulltype[B_PATH_NAME_LENGTH]; 374 sprintf(fulltype, "%s/%s", supertype, subtype); 375 376 // Process the subtype 377 ProcessType(fulltype, &bytesNeeded); 378 if (bytesNeeded > maxBytesNeeded) 379 maxBytesNeeded = bytesNeeded; 380 } 381 } 382 } 383 } else { 384 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 385 "Failed opening supertype directory '%s'\n", 386 supertype)); 387 } 388 389 // Second, process the supertype 390 ProcessType(supertype, &bytesNeeded); 391 if (bytesNeeded > maxBytesNeeded) 392 maxBytesNeeded = bytesNeeded; 393 } 394 } 395 } 396 } else { 397 DBG(OUT("Mime::SnifferRules::BuildRuleList(): " 398 "Failed opening mime database directory '%s'\n", 399 kDatabaseDir.c_str())); 400 } 401 402 if (!err) { 403 fRuleList.sort(); 404 fMaxBytesNeeded = maxBytesNeeded; 405 fHaveDoneFullBuild = true; 406 // PrintToStream(); 407 } else 408 DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%lx\n", err)); 409 return err; 410 } 411 412 // GuessMimeType 413 /*! \brief Guesses a MIME type for the supplied chunk of data. 414 415 This is accomplished by searching through the currently installed 416 list of sniffer rules for a rule that matches on the given data buffer. 417 Rules are searched in order of priority (higher priority first). Rules 418 of equal priority are searched in reverse-alphabetical order (that way 419 "supertype/subtype" form rules are checked before "supertype-only" form 420 rules if their priorities happen to be identical). 421 422 \param file The file to sniff. May be \c NULL. \a buffer is always given. 423 \param buffer Pointer to a data buffer to sniff 424 \param length The length of the data buffer pointed to by \a buffer 425 \param type Pointer to a pre-allocated BString which is set to the 426 resulting MIME type. 427 \return 428 - \c B_OK: success 429 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified) 430 - error code: failure 431 */ 432 status_t 433 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length, 434 BString *type) 435 { 436 status_t err = buffer && type ? B_OK : B_BAD_VALUE; 437 if (err) 438 return err; 439 440 // wrap the buffer by a BMemoryIO 441 BMemoryIO data(buffer, length); 442 443 if (!err && !fHaveDoneFullBuild) 444 err = BuildRuleList(); 445 446 // first ask the MimeSnifferAddonManager for a suitable type 447 float addonPriority = -1; 448 BMimeType mimeType; 449 if (!err) { 450 MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default(); 451 if (manager) { 452 addonPriority = manager->GuessMimeType(file, buffer, length, 453 &mimeType); 454 } 455 } 456 457 if (!err) { 458 // Run through our rule list, which is sorted in order of 459 // descreasing priority, and see if one of the rules sniffs 460 // out a match 461 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin(); 462 i != fRuleList.end(); 463 i++) 464 { 465 if (i->rule) { 466 // If an add-on identified the type with a priority at least 467 // as great as the remaining rules, we can stop further 468 // processing and return the type found by the add-on. 469 if (i->rule->Priority() <= addonPriority) { 470 *type = mimeType.Type(); 471 return B_OK; 472 } 473 474 if (i->rule->Sniff(&data)) { 475 type->SetTo(i->type.c_str()); 476 return B_OK; 477 } 478 } else { 479 DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): " 480 "NULL sniffer_rule::rule member found in rule list for type == '%s', " 481 "rule_string == '%s'\n", 482 i->type.c_str(), i->rule_string.c_str())); 483 } 484 } 485 486 // The sniffer add-on manager might have returned a low priority 487 // (lower than any of a rule). 488 if (addonPriority >= 0) { 489 *type = mimeType.Type(); 490 return B_OK; 491 } 492 493 // If we get here, we didn't find a damn thing 494 err = kMimeGuessFailureError; 495 } 496 return err; 497 } 498 499 // MaxBytesNeeded 500 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for 501 all the currently installed rules to be able to perform a complete sniff, 502 or an error code if something goes wrong. 503 504 If the internal rule list has not yet been built (this includes parsing 505 all the installed rules), it will be. 506 507 \return: If the return value is non-negative, it represents the max number 508 of bytes needed to do a complete sniff. Otherwise, the number returned is 509 an error code. 510 */ 511 ssize_t 512 SnifferRules::MaxBytesNeeded() 513 { 514 ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList(); 515 if (!err) { 516 err = fMaxBytesNeeded; 517 MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default(); 518 if (manager) { 519 fMaxBytesNeeded = max_c(fMaxBytesNeeded, 520 (ssize_t)manager->MinimalBufferSize()); 521 } 522 } 523 return err; 524 } 525 526 // ProcessType 527 /*! \brief Handles a portion of the initial rule list construction for 528 the given mime type. 529 530 \note To be called by BuildRuleList() *ONLY*. :-) 531 532 \param type The mime type of interest. The mime string is expected to be valid 533 and lowercase. Both "supertype" and "supertype/subtype" mime types 534 are allowed. 535 \param bytesNeeded Returns the minimum number of bytes needed for this rule to 536 perform a complete sniff. May not be NULL because I'm lazy 537 and this function is for internal use only anyway. 538 \return 539 The return value is essentially ignored (as this function prints out the 540 debug warning if a parse fails), but that being said: 541 - \c B_OK: success 542 - \c other error code: failure 543 */ 544 status_t 545 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded) 546 { 547 status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE; 548 if (!err) 549 *bytesNeeded = 0; 550 551 BString str; 552 BString errorMsg; 553 sniffer_rule rule(new Sniffer::Rule()); 554 555 // Check the mem alloc 556 if (!err) 557 err = rule.rule ? B_OK : B_NO_MEMORY; 558 // Read the attr 559 if (!err) 560 err = read_mime_attr_string(type, kSnifferRuleAttr, &str); 561 // Parse the rule 562 if (!err) { 563 err = Sniffer::parse(str.String(), rule.rule, &errorMsg); 564 if (err) 565 DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String())); 566 } 567 if (!err) { 568 // Note the bytes needed 569 *bytesNeeded = rule.rule->BytesNeeded(); 570 571 // Add the rule to the list 572 rule.type = type; 573 rule.rule_string = str.String(); 574 fRuleList.push_back(rule); 575 } 576 return err; 577 } 578 579 } // namespace Mime 580 } // namespace Storage 581 } // namespace BPrivate 582 583