xref: /haiku/src/kits/storage/mime/SnifferRules.cpp (revision 3cb015b1ee509d69c643506e8ff573808c86dcfc)
1 /*
2  * Copyright 2002-2006, Haiku Inc.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Tyler Dauwalder
7  *		Ingo Weinhold, bonefish@users.sf.net
8  */
9 
10 /*!
11 	\file SnifferRules.cpp
12 	SnifferRules class implementation
13 */
14 
15 #include "mime/SnifferRules.h"
16 
17 #include <Directory.h>
18 #include <Entry.h>
19 #include <File.h>
20 #include <MimeType.h>
21 #include <mime/database_support.h>
22 #include <mime/MimeSnifferAddonManager.h>
23 #include <sniffer/Parser.h>
24 #include <sniffer/Rule.h>
25 #include <StorageDefs.h>
26 #include <storage_support.h>
27 #include <String.h>
28 
29 #include <stdio.h>
30 #include <sys/stat.h>
31 
32 #define DBG(x) x
33 //#define DBG(x)
34 #define OUT printf
35 
36 namespace BPrivate {
37 namespace Storage {
38 namespace Mime {
39 
40 using namespace BPrivate::Storage;
41 
42 /*!
43 	\struct SnifferRules::sniffer_rule
44 	\brief A parsed sniffer rule and its corresponding mime type and rule string
45 
46 	The parse sniffer rule is stored in the \c rule member, which is a pointer
47 	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
48 	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
49 	without unnecessary copying. As a consequence of this decision, the
50 	\c SnifferRules object managing the rule list is responsible for actually
51 	deleting each \c sniffer_rule's \c Sniffer::Rule object.
52 */
53 
54 // sniffer_rule Constructor
55 //! Creates a new \c sniffer_rule object
56 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
57 	: rule(rule)
58 {
59 }
60 
61 // sniffer_rule Destructor
62 //! Destroys the \c sniffer_rule object.
63 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
64 	object's \c rule member is *NOT* deleted by this function.
65 */
66 SnifferRules::sniffer_rule::~sniffer_rule()
67 {
68 }
69 
70 // private functions
71 /*! \brief Returns true if \a left's priority is greater than \a right's
72 
73 	This may seem slightly backwards, but since sort() using
74 	operator<() sorts in ascending order, we say "left < right"
75 	if "left.priority > right.priority" to get them sorted in
76 	ascending order. Super, no?
77 
78 	Also, sniffer_rule objects with \c NULL \c rule members are
79 	treated as having minimal priority (and thus are placed at
80 	the end of the list of rules).
81 
82 	Finally, sniffer_rule objects that are otherwise equal are
83 	sorted in reverse alphabetic order (thus placing sniffer
84 	rules for supertypes *after* sniffer rules for subtypes
85 	of said supertype when both rules have identical priorities).
86 */
87 bool operator<(SnifferRules::sniffer_rule &left, SnifferRules::sniffer_rule &right)
88 {
89 	if (left.rule && right.rule) {
90 		double leftPriority = left.rule->Priority();
91 		double rightPriority = right.rule->Priority();
92 		if (leftPriority > rightPriority) {
93 			return true;	// left < right
94 		} else if (rightPriority > leftPriority) {
95 			return false;	// right < left
96 		} else {
97 			return left.type > right.type;
98 		}
99 	} else if (left.rule) {
100 		return true; 	// left < right
101 	} else {
102 		return false;	// right < left
103 	}
104 }
105 
106 /*!
107 	\class SnifferRules
108 	\brief Manages the sniffer rules for the entire database
109 */
110 
111 // Constructor
112 //! Constructs a new SnifferRules object
113 SnifferRules::SnifferRules()
114 	: fHaveDoneFullBuild(false)
115 {
116 }
117 
118 // Destructor
119 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
120 	\c Sniffer::Rule objects scattered throughout the rule list in
121 	\c sniffer_rule::rule members.
122 */
123 SnifferRules::~SnifferRules()
124 {
125 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
126 		   i != fRuleList.end();
127 		     i++)
128 	{
129 		delete i->rule;
130 		i->rule = NULL;
131 	}
132 }
133 
134 // GuessMimeType
135 /*!	\brief Guesses a MIME type for the supplied entry_ref.
136 
137 	Only the data in the given entry is considered, not the filename or
138 	its extension. Please see GuessMimeType(BFile*, const void *, int32,
139 	BString*) for more details.
140 
141 	\param ref The entry to sniff
142 	\param type Pointer to a pre-allocated BString which is set to the
143 		   resulting MIME type.
144 	\return
145 	- \c B_OK: success
146 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
147 	- error code: failure
148 */
149 status_t
150 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
151 {
152 	status_t err = ref && type ? B_OK : B_BAD_VALUE;
153 	ssize_t bytes = 0;
154 	char *buffer = NULL;
155 	BFile file;
156 
157 	// First find out the max number of bytes we need to read
158 	// from the file to fully accomodate all of our currently
159 	// installed sniffer rules
160 	if (!err) {
161 		bytes = MaxBytesNeeded();
162 		if (bytes < 0)
163 			err = bytes;
164 	}
165 
166 	// Next read that many bytes (or fewer, if the file isn't
167 	// that long) into a buffer
168 	if (!err) {
169 		buffer = new(std::nothrow) char[bytes];
170 		if (!buffer)
171 			err = B_NO_MEMORY;
172 	}
173 	if (!err)
174 		err = file.SetTo(ref, B_READ_ONLY);
175 	if (!err) {
176 		bytes = file.Read(buffer, bytes);
177 		if (bytes < 0)
178 			err = bytes;
179 	}
180 
181 	// Now sniff the buffer
182 	if (!err)
183 		err = GuessMimeType(&file, buffer, bytes, type);
184 
185 	return err;
186 }
187 
188 // GuessMimeType
189 /*!	\brief Guesses a MIME type for the given chunk of data.
190 
191 	Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
192 	details.
193 
194 	\param buffer Pointer to a data buffer to sniff
195 	\param length The length of the data buffer pointed to by \a buffer
196 	\param type Pointer to a pre-allocated BString which is set to the
197 		   resulting MIME type.
198 	\return
199 	- \c B_OK: success
200 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
201 	- error code: failure
202 */
203 status_t
204 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
205 {
206 	return GuessMimeType(NULL, buffer, length, type);
207 }
208 
209 // SetSnifferRule
210 /*! Updates the sniffer rule for the given type
211 
212 	If the a rule currently exists in the rule list for the given type,
213 	it is first removed before the new rule is inserted.
214 
215 	The new rule is inserted in its proper, sorted position in the list.
216 
217 	\param type The type of interest
218 	\param rule The new sniffer rule
219 	\return
220 	- \c B_OK: success
221 	- other error code: failure
222 */
223 status_t
224 SnifferRules::SetSnifferRule(const char *type, const char *rule)
225 {
226 	status_t err = type && rule ? B_OK : B_BAD_VALUE;
227 	if (!err && !fHaveDoneFullBuild)
228 		return B_OK;
229 
230 	sniffer_rule item(new Sniffer::Rule());
231 	BString parseError;
232 
233 	// Check the mem alloc
234 	if (!err)
235 		err = item.rule ? B_OK : B_NO_MEMORY;
236 	// Prepare the sniffer_rule
237 	if (!err) {
238 		item.type = type;
239 		item.rule_string = rule;
240 		err = Sniffer::parse(rule, item.rule, &parseError);
241 		if (err)
242 			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
243 				parseError.String()));
244 	}
245 	// Remove any previous rule for this type
246 	if (!err)
247 		err = DeleteSnifferRule(type);
248 	// Insert the new rule at the proper position in
249 	// the sorted rule list (remembering that our list
250 	// is sorted in ascending order using
251 	// operator<(sniffer_rule&, sniffer_rule&))
252 	if (!err) {
253 		std::list<sniffer_rule>::iterator i;
254 		for (i = fRuleList.begin(); i != fRuleList.end(); i++)
255 		{
256 			 if (item < (*i)) {
257 			 	fRuleList.insert(i, item);
258 			 	break;
259 			 }
260 		}
261 		if (i == fRuleList.end())
262 			fRuleList.push_back(item);
263 	}
264 
265 	return err;
266 }
267 
268 // DeleteSnifferRule
269 /*! \brief Removes the sniffer rule for the given type from the rule list
270 	\param type The type of interest
271 	\return
272 	- \c B_OK: success (even if no rule existed for the given type)
273 	- other error code: failure
274 */
275 status_t
276 SnifferRules::DeleteSnifferRule(const char *type)
277 {
278 	status_t err = type ? B_OK : B_BAD_VALUE;
279 	if (!err && !fHaveDoneFullBuild)
280 		return B_OK;
281 
282 	// Find the rule in the list and remove it
283 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
284 		   i != fRuleList.end();
285 		     i++)
286 	{
287 		if (i->type == type) {
288 			fRuleList.erase(i);
289 			break;
290 		}
291 	}
292 
293 	return err;
294 }
295 
296 // PrintToStream
297 //! Dumps the list of sniffer rules in sorted order to standard output
298 void
299 SnifferRules::PrintToStream() const
300 {
301 	printf("\n");
302 	printf("--------------\n");
303 	printf("Sniffer Rules:\n");
304 	printf("--------------\n");
305 
306 	if (fHaveDoneFullBuild) {
307 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
308 			   i != fRuleList.end();
309 			     i++)
310 		{
311 			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
312 		}
313 	} else {
314 		printf("You haven't built your rule list yet, chump. ;-)\n");
315 	}
316 }
317 
318 // BuildRuleList
319 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
320 	each parsed rule to the rule list, and sorts the list by priority, largest first.
321 
322 	Initial MaxBytesNeeded() info is compiled by this function as well.
323 */
324 status_t
325 SnifferRules::BuildRuleList()
326 {
327 	fRuleList.clear();
328 
329 	ssize_t maxBytesNeeded = 0;
330 	ssize_t bytesNeeded = 0;
331 	BDirectory root;
332 
333 	status_t err = root.SetTo(kDatabaseDir.c_str());
334 	if (!err) {
335 		root.Rewind();
336 		while (true) {
337 			BEntry entry;
338 			err = root.GetNextEntry(&entry);
339 			if (err) {
340 				// If we've come to the end of list, it's not an error
341 				if (err == B_ENTRY_NOT_FOUND)
342 					err = B_OK;
343 				break;
344 			} else {
345 				// Check that this entry is both a directory and a valid MIME string
346 				char supertype[B_PATH_NAME_LENGTH];
347 				if (entry.IsDirectory()
348 				      && entry.GetName(supertype) == B_OK
349 				         && BMimeType::IsValid(supertype))
350 				{
351 					// Make sure the supertype string is all lowercase
352 					BPrivate::Storage::to_lower(supertype);
353 
354 					// First, iterate through this supertype directory and process
355 					// all of its subtypes
356 					BDirectory dir;
357 					if (dir.SetTo(&entry) == B_OK) {
358 						dir.Rewind();
359 						while (true) {
360 							BEntry subEntry;
361 							err = dir.GetNextEntry(&subEntry);
362 							if (err) {
363 								// If we've come to the end of list, it's not an error
364 								if (err == B_ENTRY_NOT_FOUND)
365 									err = B_OK;
366 								break;
367 							} else {
368 								// Get the subtype's name
369 								char subtype[B_PATH_NAME_LENGTH];
370 								if (subEntry.GetName(subtype) == B_OK) {
371 									BPrivate::Storage::to_lower(subtype);
372 
373 									char fulltype[B_PATH_NAME_LENGTH];
374 									sprintf(fulltype, "%s/%s", supertype, subtype);
375 
376 									// Process the subtype
377 									ProcessType(fulltype, &bytesNeeded);
378 									if (bytesNeeded > maxBytesNeeded)
379 										maxBytesNeeded = bytesNeeded;
380 								}
381 							}
382 						}
383 					} else {
384 						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
385 						          "Failed opening supertype directory '%s'\n",
386 						            supertype));
387 					}
388 
389 					// Second, process the supertype
390 					ProcessType(supertype, &bytesNeeded);
391 					if (bytesNeeded > maxBytesNeeded)
392 						maxBytesNeeded = bytesNeeded;
393 				}
394 			}
395 		}
396 	} else {
397 		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
398 		          "Failed opening mime database directory '%s'\n",
399 		            kDatabaseDir.c_str()));
400 	}
401 
402 	if (!err) {
403 		fRuleList.sort();
404 		fMaxBytesNeeded = maxBytesNeeded;
405 		fHaveDoneFullBuild = true;
406 //		PrintToStream();
407 	} else
408 		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%lx\n", err));
409 	return err;
410 }
411 
412 // GuessMimeType
413 /*!	\brief Guesses a MIME type for the supplied chunk of data.
414 
415 	This is accomplished by searching through the currently installed
416 	list of sniffer rules for a rule that matches on the given data buffer.
417 	Rules are searched in order of priority (higher priority first). Rules
418 	of equal priority are searched in reverse-alphabetical order (that way
419 	"supertype/subtype" form rules are checked before "supertype-only" form
420 	rules if their priorities happen to be identical).
421 
422 	\param file The file to sniff. May be \c NULL. \a buffer is always given.
423 	\param buffer Pointer to a data buffer to sniff
424 	\param length The length of the data buffer pointed to by \a buffer
425 	\param type Pointer to a pre-allocated BString which is set to the
426 		   resulting MIME type.
427 	\return
428 	- \c B_OK: success
429 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
430 	- error code: failure
431 */
432 status_t
433 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
434 	BString *type)
435 {
436 	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
437 	if (err)
438 		return err;
439 
440 	// wrap the buffer by a BMemoryIO
441 	BMemoryIO data(buffer, length);
442 
443 	if (!err && !fHaveDoneFullBuild)
444 		err = BuildRuleList();
445 
446 	// first ask the MimeSnifferAddonManager for a suitable type
447 	float addonPriority = -1;
448 	BMimeType mimeType;
449 	if (!err) {
450 		MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default();
451 		if (manager) {
452 			addonPriority = manager->GuessMimeType(file, buffer, length,
453 				&mimeType);
454 		}
455 	}
456 
457 	if (!err) {
458 		// Run through our rule list, which is sorted in order of
459 		// descreasing priority, and see if one of the rules sniffs
460 		// out a match
461 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
462 			   i != fRuleList.end();
463 			     i++)
464 		{
465 			if (i->rule) {
466 				// If an add-on identified the type with a priority at least
467 				// as great as the remaining rules, we can stop further
468 				// processing and return the type found by the add-on.
469 				if (i->rule->Priority() <= addonPriority) {
470 					*type = mimeType.Type();
471 					return B_OK;
472 				}
473 
474 				if (i->rule->Sniff(&data)) {
475 					type->SetTo(i->type.c_str());
476 					return B_OK;
477 				}
478 			} else {
479 				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
480 					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
481 					"rule_string == '%s'\n",
482 					i->type.c_str(), i->rule_string.c_str()));
483 			}
484 		}
485 
486 		// The sniffer add-on manager might have returned a low priority
487 		// (lower than any of a rule).
488 		if (addonPriority >= 0) {
489 			*type = mimeType.Type();
490 			return B_OK;
491 		}
492 
493 		// If we get here, we didn't find a damn thing
494 		err = kMimeGuessFailureError;
495 	}
496 	return err;
497 }
498 
499 // MaxBytesNeeded
500 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
501 	all the currently installed rules to be able to perform a complete sniff,
502 	or an error code if something goes wrong.
503 
504 	If the internal rule list has not yet been built (this includes parsing
505 	all the installed rules), it will be.
506 
507 	\return: If the return value is non-negative, it represents	the max number
508 	of bytes needed to do a complete sniff. Otherwise, the number returned is
509 	an error code.
510 */
511 ssize_t
512 SnifferRules::MaxBytesNeeded()
513 {
514 	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
515 	if (!err) {
516 		err = fMaxBytesNeeded;
517 		MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default();
518 		if (manager) {
519 			fMaxBytesNeeded = max_c(fMaxBytesNeeded,
520 				(ssize_t)manager->MinimalBufferSize());
521 		}
522 	}
523 	return err;
524 }
525 
526 // ProcessType
527 /*! \brief Handles a portion of the initial rule list construction for
528 	the given mime type.
529 
530 	\note To be called by BuildRuleList() *ONLY*. :-)
531 
532 	\param type The mime type of interest. The mime string is expected to be valid
533 	            and lowercase. Both "supertype" and "supertype/subtype" mime types
534 	            are allowed.
535 	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
536 	                   perform a complete sniff. May not be NULL because I'm lazy
537 	                   and this function is for internal use only anyway.
538 	\return
539 	The return value is essentially ignored (as this function prints out the
540 	debug warning if a parse fails), but that being said:
541 	- \c B_OK: success
542 	- \c other error code: failure
543 */
544 status_t
545 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
546 {
547 	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
548 	if (!err)
549 		*bytesNeeded = 0;
550 
551 	BString str;
552 	BString errorMsg;
553 	sniffer_rule rule(new Sniffer::Rule());
554 
555 	// Check the mem alloc
556 	if (!err)
557 		err = rule.rule ? B_OK : B_NO_MEMORY;
558 	// Read the attr
559 	if (!err)
560 		err = read_mime_attr_string(type, kSnifferRuleAttr, &str);
561 	// Parse the rule
562 	if (!err) {
563 		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
564 		if (err)
565 			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
566 	}
567 	if (!err) {
568 		// Note the bytes needed
569 		*bytesNeeded = rule.rule->BytesNeeded();
570 
571 		// Add the rule to the list
572 		rule.type = type;
573 		rule.rule_string = str.String();
574 		fRuleList.push_back(rule);
575 	}
576 	return err;
577 }
578 
579 } // namespace Mime
580 } // namespace Storage
581 } // namespace BPrivate
582 
583