1 /*
2 * Copyright 2002-2006, Haiku Inc.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 * Tyler Dauwalder
7 * Ingo Weinhold, bonefish@users.sf.net
8 */
9
10 /*!
11 \file SnifferRules.cpp
12 SnifferRules class implementation
13 */
14
15 #include <mime/SnifferRules.h>
16
17 #include <stdio.h>
18 #include <sys/stat.h>
19
20 #include <Directory.h>
21 #include <Entry.h>
22 #include <File.h>
23 #include <MimeType.h>
24 #include <mime/database_support.h>
25 #include <mime/DatabaseDirectory.h>
26 #include <mime/DatabaseLocation.h>
27 #include <mime/MimeSniffer.h>
28 #include <sniffer/Parser.h>
29 #include <sniffer/Rule.h>
30 #include <StorageDefs.h>
31 #include <storage_support.h>
32 #include <String.h>
33
34
35 #define DBG(x) x
36 //#define DBG(x)
37 #define OUT printf
38
39 namespace BPrivate {
40 namespace Storage {
41 namespace Mime {
42
43 using namespace BPrivate::Storage;
44
45 /*!
46 \struct SnifferRules::sniffer_rule
47 \brief A parsed sniffer rule and its corresponding mime type and rule string
48
49 The parse sniffer rule is stored in the \c rule member, which is a pointer
50 to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
51 objects (as opposed to \c sniffer_rule pointers) to be used with STL objects
52 without unnecessary copying. As a consequence of this decision, the
53 \c SnifferRules object managing the rule list is responsible for actually
54 deleting each \c sniffer_rule's \c Sniffer::Rule object.
55 */
56
57 // sniffer_rule Constructor
58 //! Creates a new \c sniffer_rule object
sniffer_rule(Sniffer::Rule * rule)59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
60 : rule(rule)
61 {
62 }
63
64 // sniffer_rule Destructor
65 //! Destroys the \c sniffer_rule object.
66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
67 object's \c rule member is *NOT* deleted by this function.
68 */
~sniffer_rule()69 SnifferRules::sniffer_rule::~sniffer_rule()
70 {
71 }
72
73 // private functions
74 /*! \brief Returns true if \a left's priority is greater than \a right's
75
76 This may seem slightly backwards, but since sort() using
77 operator<() sorts in ascending order, we say "left < right"
78 if "left.priority > right.priority" to get them sorted in
79 ascending order. Super, no?
80
81 Also, sniffer_rule objects with \c NULL \c rule members are
82 treated as having minimal priority (and thus are placed at
83 the end of the list of rules).
84
85 Finally, sniffer_rule objects that are otherwise equal are
86 sorted in reverse alphabetic order (thus placing sniffer
87 rules for supertypes *after* sniffer rules for subtypes
88 of said supertype when both rules have identical priorities).
89 */
operator <(const SnifferRules::sniffer_rule & left,const SnifferRules::sniffer_rule & right)90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right)
91 {
92 if (left.rule && right.rule) {
93 double leftPriority = left.rule->Priority();
94 double rightPriority = right.rule->Priority();
95 if (leftPriority > rightPriority) {
96 return true; // left < right
97 } else if (rightPriority > leftPriority) {
98 return false; // right < left
99 } else {
100 return left.type > right.type;
101 }
102 } else if (left.rule) {
103 return true; // left < right
104 } else {
105 return false; // right < left
106 }
107 }
108
109 /*!
110 \class SnifferRules
111 \brief Manages the sniffer rules for the entire database
112 */
113
114 // Constructor
115 //! Constructs a new SnifferRules object
SnifferRules(DatabaseLocation * databaseLocation,MimeSniffer * mimeSniffer)116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation,
117 MimeSniffer* mimeSniffer)
118 :
119 fDatabaseLocation(databaseLocation),
120 fMimeSniffer(mimeSniffer),
121 fMaxBytesNeeded(0),
122 fHaveDoneFullBuild(false)
123 {
124 }
125
126 // Destructor
127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
128 \c Sniffer::Rule objects scattered throughout the rule list in
129 \c sniffer_rule::rule members.
130 */
~SnifferRules()131 SnifferRules::~SnifferRules()
132 {
133 for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
134 i != fRuleList.end(); i++) {
135 delete i->rule;
136 i->rule = NULL;
137 }
138 }
139
140 // GuessMimeType
141 /*! \brief Guesses a MIME type for the supplied entry_ref.
142
143 Only the data in the given entry is considered, not the filename or
144 its extension. Please see GuessMimeType(BFile*, const void *, int32,
145 BString*) for more details.
146
147 \param ref The entry to sniff
148 \param type Pointer to a pre-allocated BString which is set to the
149 resulting MIME type.
150 \return
151 - \c B_OK: success
152 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
153 - error code: failure
154 */
155 status_t
GuessMimeType(const entry_ref * ref,BString * type)156 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
157 {
158 status_t err = ref && type ? B_OK : B_BAD_VALUE;
159 ssize_t bytes = 0;
160 char *buffer = NULL;
161 BFile file;
162
163 // First find out the max number of bytes we need to read
164 // from the file to fully accomodate all of our currently
165 // installed sniffer rules
166 if (!err) {
167 bytes = MaxBytesNeeded();
168 if (bytes < 0)
169 err = bytes;
170 }
171
172 // Next read that many bytes (or fewer, if the file isn't
173 // that long) into a buffer
174 if (!err) {
175 buffer = new(std::nothrow) char[bytes];
176 if (!buffer)
177 err = B_NO_MEMORY;
178 }
179
180 if (!err)
181 err = file.SetTo(ref, B_READ_ONLY);
182 if (!err) {
183 bytes = file.Read(buffer, bytes);
184 if (bytes < 0)
185 err = bytes;
186 }
187
188 // Now sniff the buffer
189 if (!err)
190 err = GuessMimeType(&file, buffer, bytes, type);
191
192 delete[] buffer;
193
194 return err;
195 }
196
197 // GuessMimeType
198 /*! \brief Guesses a MIME type for the given chunk of data.
199
200 Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
201 details.
202
203 \param buffer Pointer to a data buffer to sniff
204 \param length The length of the data buffer pointed to by \a buffer
205 \param type Pointer to a pre-allocated BString which is set to the
206 resulting MIME type.
207 \return
208 - \c B_OK: success
209 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
210 - error code: failure
211 */
212 status_t
GuessMimeType(const void * buffer,int32 length,BString * type)213 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
214 {
215 return GuessMimeType(NULL, buffer, length, type);
216 }
217
218 // SetSnifferRule
219 /*! Updates the sniffer rule for the given type
220
221 If the a rule currently exists in the rule list for the given type,
222 it is first removed before the new rule is inserted.
223
224 The new rule is inserted in its proper, sorted position in the list.
225
226 \param type The type of interest
227 \param rule The new sniffer rule
228 \return
229 - \c B_OK: success
230 - other error code: failure
231 */
232 status_t
SetSnifferRule(const char * type,const char * rule)233 SnifferRules::SetSnifferRule(const char *type, const char *rule)
234 {
235 status_t err = type && rule ? B_OK : B_BAD_VALUE;
236 if (!err && !fHaveDoneFullBuild)
237 return B_OK;
238
239 sniffer_rule item(new Sniffer::Rule());
240 BString parseError;
241
242 // Check the mem alloc
243 if (!err)
244 err = item.rule ? B_OK : B_NO_MEMORY;
245 // Prepare the sniffer_rule
246 if (!err) {
247 item.type = type;
248 item.rule_string = rule;
249 err = Sniffer::parse(rule, item.rule, &parseError);
250 if (err)
251 DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
252 parseError.String()));
253 }
254 // Remove any previous rule for this type
255 if (!err)
256 err = DeleteSnifferRule(type);
257 // Insert the new rule at the proper position in
258 // the sorted rule list (remembering that our list
259 // is sorted in ascending order using
260 // operator<(sniffer_rule&, sniffer_rule&))
261 if (!err) {
262 std::list<sniffer_rule>::iterator i;
263 for (i = fRuleList.begin(); i != fRuleList.end(); i++) {
264 if (item < (*i)) {
265 fRuleList.insert(i, item);
266 break;
267 }
268 }
269 if (i == fRuleList.end())
270 fRuleList.push_back(item);
271 }
272
273 return err;
274 }
275
276 // DeleteSnifferRule
277 /*! \brief Removes the sniffer rule for the given type from the rule list
278 \param type The type of interest
279 \return
280 - \c B_OK: success (even if no rule existed for the given type)
281 - other error code: failure
282 */
283 status_t
DeleteSnifferRule(const char * type)284 SnifferRules::DeleteSnifferRule(const char *type)
285 {
286 status_t err = type ? B_OK : B_BAD_VALUE;
287 if (!err && !fHaveDoneFullBuild)
288 return B_OK;
289
290 // Find the rule in the list and remove it
291 for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
292 i != fRuleList.end(); i++) {
293 if (i->type == type) {
294 fRuleList.erase(i);
295 break;
296 }
297 }
298
299 return err;
300 }
301
302 // PrintToStream
303 //! Dumps the list of sniffer rules in sorted order to standard output
304 void
PrintToStream() const305 SnifferRules::PrintToStream() const
306 {
307 printf("\n");
308 printf("--------------\n");
309 printf("Sniffer Rules:\n");
310 printf("--------------\n");
311
312 if (fHaveDoneFullBuild) {
313 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
314 i != fRuleList.end(); i++) {
315 printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
316 }
317 } else {
318 printf("You haven't built your rule list yet, chump. ;-)\n");
319 }
320 }
321
322 // BuildRuleList
323 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
324 each parsed rule to the rule list, and sorts the list by priority, largest first.
325
326 Initial MaxBytesNeeded() info is compiled by this function as well.
327 */
328 status_t
BuildRuleList()329 SnifferRules::BuildRuleList()
330 {
331 fRuleList.clear();
332
333 ssize_t maxBytesNeeded = 0;
334 ssize_t bytesNeeded = 0;
335 DatabaseDirectory root;
336
337 status_t err = root.Init(fDatabaseLocation);
338 if (!err) {
339 root.Rewind();
340 while (true) {
341 BEntry entry;
342 err = root.GetNextEntry(&entry);
343 if (err) {
344 // If we've come to the end of list, it's not an error
345 if (err == B_ENTRY_NOT_FOUND)
346 err = B_OK;
347 break;
348 } else {
349 // Check that this entry is both a directory and a valid MIME string
350 char supertype[B_PATH_NAME_LENGTH];
351 if (entry.IsDirectory()
352 && entry.GetName(supertype) == B_OK
353 && BMimeType::IsValid(supertype)) {
354 // Make sure the supertype string is all lowercase
355 BPrivate::Storage::to_lower(supertype);
356
357 // First, iterate through this supertype directory and process
358 // all of its subtypes
359 DatabaseDirectory dir;
360 if (dir.Init(fDatabaseLocation, supertype) == B_OK) {
361 dir.Rewind();
362 while (true) {
363 BEntry subEntry;
364 err = dir.GetNextEntry(&subEntry);
365 if (err) {
366 // If we've come to the end of list, it's not an error
367 if (err == B_ENTRY_NOT_FOUND)
368 err = B_OK;
369 break;
370 } else {
371 // Get the subtype's name
372 char subtype[B_PATH_NAME_LENGTH];
373 if (subEntry.GetName(subtype) == B_OK) {
374 BPrivate::Storage::to_lower(subtype);
375
376 char fulltype[B_PATH_NAME_LENGTH];
377 snprintf(fulltype, B_PATH_NAME_LENGTH, "%s/%s",
378 supertype, subtype);
379
380 // Process the subtype
381 ProcessType(fulltype, &bytesNeeded);
382 if (bytesNeeded > maxBytesNeeded)
383 maxBytesNeeded = bytesNeeded;
384 }
385 }
386 }
387 } else {
388 DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
389 "Failed opening supertype directory '%s'\n",
390 supertype));
391 }
392
393 // Second, process the supertype
394 ProcessType(supertype, &bytesNeeded);
395 if (bytesNeeded > maxBytesNeeded)
396 maxBytesNeeded = bytesNeeded;
397 }
398 }
399 }
400 } else {
401 DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
402 "Failed opening mime database directory.\n"));
403 }
404
405 if (!err) {
406 fRuleList.sort();
407 fMaxBytesNeeded = maxBytesNeeded;
408 fHaveDoneFullBuild = true;
409 // PrintToStream();
410 } else {
411 DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%"
412 B_PRIx32 "\n", err));
413 }
414 return err;
415 }
416
417 // GuessMimeType
418 /*! \brief Guesses a MIME type for the supplied chunk of data.
419
420 This is accomplished by searching through the currently installed
421 list of sniffer rules for a rule that matches on the given data buffer.
422 Rules are searched in order of priority (higher priority first). Rules
423 of equal priority are searched in reverse-alphabetical order (that way
424 "supertype/subtype" form rules are checked before "supertype-only" form
425 rules if their priorities happen to be identical).
426
427 \param file The file to sniff. May be \c NULL. \a buffer is always given.
428 \param buffer Pointer to a data buffer to sniff
429 \param length The length of the data buffer pointed to by \a buffer
430 \param type Pointer to a pre-allocated BString which is set to the
431 resulting MIME type.
432 \return
433 - \c B_OK: success
434 - \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
435 - error code: failure
436 */
437 status_t
GuessMimeType(BFile * file,const void * buffer,int32 length,BString * type)438 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
439 BString *type)
440 {
441 status_t err = buffer && type ? B_OK : B_BAD_VALUE;
442 if (err)
443 return err;
444
445 // wrap the buffer by a BMemoryIO
446 BMemoryIO data(buffer, length);
447
448 if (!fHaveDoneFullBuild)
449 err = BuildRuleList();
450
451 // first ask the MIME sniffer for a suitable type
452 float addonPriority = -1;
453 BMimeType mimeType;
454 if (!err && fMimeSniffer != NULL) {
455 addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length,
456 &mimeType);
457 }
458
459 if (!err) {
460 // Run through our rule list, which is sorted in order of
461 // descreasing priority, and see if one of the rules sniffs
462 // out a match
463 for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
464 i != fRuleList.end(); i++) {
465 if (i->rule) {
466 // If an add-on identified the type with a priority at least
467 // as great as the remaining rules, we can stop further
468 // processing and return the type found by the add-on.
469 if (i->rule->Priority() <= addonPriority) {
470 *type = mimeType.Type();
471 return B_OK;
472 }
473
474 if (i->rule->Sniff(&data)) {
475 type->SetTo(i->type.c_str());
476 return B_OK;
477 }
478 } else {
479 DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
480 "NULL sniffer_rule::rule member found in rule list for type == '%s', "
481 "rule_string == '%s'\n",
482 i->type.c_str(), i->rule_string.c_str()));
483 }
484 }
485
486 // The sniffer add-on manager might have returned a low priority
487 // (lower than any of a rule).
488 if (addonPriority >= 0) {
489 *type = mimeType.Type();
490 return B_OK;
491 }
492
493 // If we get here, we didn't find a damn thing
494 err = kMimeGuessFailureError;
495 }
496 return err;
497 }
498
499 // MaxBytesNeeded
500 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
501 all the currently installed rules to be able to perform a complete sniff,
502 or an error code if something goes wrong.
503
504 If the internal rule list has not yet been built (this includes parsing
505 all the installed rules), it will be.
506
507 \return: If the return value is non-negative, it represents the max number
508 of bytes needed to do a complete sniff. Otherwise, the number returned is
509 an error code.
510 */
511 ssize_t
MaxBytesNeeded()512 SnifferRules::MaxBytesNeeded()
513 {
514 ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
515 if (!err) {
516 err = fMaxBytesNeeded;
517
518 if (fMimeSniffer != NULL) {
519 fMaxBytesNeeded = max_c(fMaxBytesNeeded,
520 (ssize_t)fMimeSniffer->MinimalBufferSize());
521 }
522 }
523 return err;
524 }
525
526 // ProcessType
527 /*! \brief Handles a portion of the initial rule list construction for
528 the given mime type.
529
530 \note To be called by BuildRuleList() *ONLY*. :-)
531
532 \param type The mime type of interest. The mime string is expected to be valid
533 and lowercase. Both "supertype" and "supertype/subtype" mime types
534 are allowed.
535 \param bytesNeeded Returns the minimum number of bytes needed for this rule to
536 perform a complete sniff. May not be NULL because I'm lazy
537 and this function is for internal use only anyway.
538 \return
539 The return value is essentially ignored (as this function prints out the
540 debug warning if a parse fails), but that being said:
541 - \c B_OK: success
542 - \c other error code: failure
543 */
544 status_t
ProcessType(const char * type,ssize_t * bytesNeeded)545 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
546 {
547 status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
548 if (!err)
549 *bytesNeeded = 0;
550
551 BString str;
552 BString errorMsg;
553 sniffer_rule rule(new Sniffer::Rule());
554
555 // Check the mem alloc
556 if (!err)
557 err = rule.rule ? B_OK : B_NO_MEMORY;
558 // Read the attr
559 if (!err) {
560 err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr,
561 str);
562 }
563 // Parse the rule
564 if (!err) {
565 err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
566 if (err)
567 DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
568 }
569 if (!err) {
570 // Note the bytes needed
571 *bytesNeeded = rule.rule->BytesNeeded();
572
573 // Add the rule to the list
574 rule.type = type;
575 rule.rule_string = str.String();
576 fRuleList.push_back(rule);
577 }
578 return err;
579 }
580
581 } // namespace Mime
582 } // namespace Storage
583 } // namespace BPrivate
584
585