xref: /haiku/src/kits/storage/mime/SnifferRules.cpp (revision 0e50eab75e25d0d82090e22dbff766dfaa6f5e86)
1 //----------------------------------------------------------------------
2 //  This software is part of the OpenBeOS distribution and is covered
3 //  by the OpenBeOS license.
4 //----------------------------------------------------------------------
5 /*!
6 	\file SnifferRules.cpp
7 	SnifferRules class implementation
8 */
9 
10 #include "mime/SnifferRules.h"
11 
12 #include <Directory.h>
13 #include <Entry.h>
14 #include <File.h>
15 #include <MimeType.h>
16 #include <mime/database_support.h>
17 #include <mime/MimeSnifferAddonManager.h>
18 #include <sniffer/Parser.h>
19 #include <sniffer/Rule.h>
20 #include <StorageDefs.h>
21 #include <storage_support.h>
22 #include <String.h>
23 
24 #include <stdio.h>
25 #include <sys/stat.h>
26 
27 #define DBG(x) x
28 //#define DBG(x)
29 #define OUT printf
30 
31 namespace BPrivate {
32 namespace Storage {
33 namespace Mime {
34 
35 using namespace BPrivate::Storage;
36 
37 /*!
38 	\struct SnifferRules::sniffer_rule
39 	\brief A parsed sniffer rule and its corresponding mime type and rule string
40 
41 	The parse sniffer rule is stored in the \c rule member, which is a pointer
42 	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
43 	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
44 	without unnecessary copying. As a consequence of this decision, the
45 	\c SnifferRules object managing the rule list is responsible for actually
46 	deleting each \c sniffer_rule's \c Sniffer::Rule object.
47 */
48 
49 // sniffer_rule Constructor
50 //! Creates a new \c sniffer_rule object
51 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
52 	: rule(rule)
53 {
54 }
55 
56 // sniffer_rule Destructor
57 //! Destroys the \c sniffer_rule object.
58 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
59 	object's \c rule member is *NOT* deleted by this function.
60 */
61 SnifferRules::sniffer_rule::~sniffer_rule()
62 {
63 }
64 
65 // private functions
66 /*! \brief Returns true if \a left's priority is greater than \a right's
67 
68 	This may seem slightly backwards, but since sort() using
69 	operator<() sorts in ascending order, we say "left < right"
70 	if "left.priority > right.priority" to get them sorted in
71 	ascending order. Super, no?
72 
73 	Also, sniffer_rule objects with \c NULL \c rule members are
74 	treated as having minimal priority (and thus are placed at
75 	the end of the list of rules).
76 
77 	Finally, sniffer_rule objects that are otherwise equal are
78 	sorted in reverse alphabetic order (thus placing sniffer
79 	rules for supertypes *after* sniffer rules for subtypes
80 	of said supertype when both rules have identical priorities).
81 */
82 bool operator<(SnifferRules::sniffer_rule &left, SnifferRules::sniffer_rule &right)
83 {
84 	if (left.rule && right.rule) {
85 		double leftPriority = left.rule->Priority();
86 		double rightPriority = right.rule->Priority();
87 		if (leftPriority > rightPriority) {
88 			return true;	// left < right
89 		} else if (rightPriority > leftPriority) {
90 			return false;	// right < left
91 		} else {
92 			return left.type > right.type;
93 		}
94 	} else if (left.rule) {
95 		return true; 	// left < right
96 	} else {
97 		return false;	// right < left
98 	}
99 }
100 
101 /*!
102 	\class SnifferRules
103 	\brief Manages the sniffer rules for the entire database
104 */
105 
106 // Constructor
107 //! Constructs a new SnifferRules object
108 SnifferRules::SnifferRules()
109 	: fHaveDoneFullBuild(false)
110 {
111 }
112 
113 // Destructor
114 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
115 	\c Sniffer::Rule objects scattered throughout the rule list in
116 	\c sniffer_rule::rule members.
117 */
118 SnifferRules::~SnifferRules()
119 {
120 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
121 		   i != fRuleList.end();
122 		     i++)
123 	{
124 		delete i->rule;
125 		i->rule = NULL;
126 	}
127 }
128 
129 // GuessMimeType
130 /*!	\brief Guesses a MIME type for the supplied entry_ref.
131 
132 	Only the data in the given entry is considered, not the filename or
133 	its extension. Please see GuessMimeType(BFile*, const void *, int32,
134 	BString*) for more details.
135 
136 	\param ref The entry to sniff
137 	\param type Pointer to a pre-allocated BString which is set to the
138 		   resulting MIME type.
139 	\return
140 	- \c B_OK: success
141 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
142 	- error code: failure
143 */
144 status_t
145 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
146 {
147 	status_t err = ref && type ? B_OK : B_BAD_VALUE;
148 	ssize_t bytes = 0;
149 	char *buffer = NULL;
150 	BFile file;
151 
152 	// First find out the max number of bytes we need to read
153 	// from the file to fully accomodate all of our currently
154 	// installed sniffer rules
155 	if (!err) {
156 		bytes = MaxBytesNeeded();
157 		if (bytes < 0)
158 			err = bytes;
159 	}
160 
161 	// Next read that many bytes (or fewer, if the file isn't
162 	// that long) into a buffer
163 	if (!err) {
164 		buffer = new(std::nothrow) char[bytes];
165 		if (!buffer)
166 			err = B_NO_MEMORY;
167 	}
168 	if (!err)
169 		err = file.SetTo(ref, B_READ_ONLY);
170 	if (!err) {
171 		bytes = file.Read(buffer, bytes);
172 		if (bytes < 0)
173 			err = bytes;
174 	}
175 
176 	// Now sniff the buffer
177 	if (!err)
178 		err = GuessMimeType(&file, buffer, bytes, type);
179 
180 	return err;
181 }
182 
183 // GuessMimeType
184 /*!	\brief Guesses a MIME type for the given chunk of data.
185 
186 	Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
187 	details.
188 
189 	\param buffer Pointer to a data buffer to sniff
190 	\param length The length of the data buffer pointed to by \a buffer
191 	\param type Pointer to a pre-allocated BString which is set to the
192 		   resulting MIME type.
193 	\return
194 	- \c B_OK: success
195 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
196 	- error code: failure
197 */
198 status_t
199 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
200 {
201 	return GuessMimeType(NULL, buffer, length, type);
202 }
203 
204 // SetSnifferRule
205 /*! Updates the sniffer rule for the given type
206 
207 	If the a rule currently exists in the rule list for the given type,
208 	it is first removed before the new rule is inserted.
209 
210 	The new rule is inserted in its proper, sorted position in the list.
211 
212 	\param type The type of interest
213 	\param rule The new sniffer rule
214 	\return
215 	- \c B_OK: success
216 	- other error code: failure
217 */
218 status_t
219 SnifferRules::SetSnifferRule(const char *type, const char *rule)
220 {
221 	status_t err = type && rule ? B_OK : B_BAD_VALUE;
222 	if (!err && !fHaveDoneFullBuild)
223 		return B_OK;
224 
225 	sniffer_rule item(new Sniffer::Rule());
226 	BString parseError;
227 
228 	// Check the mem alloc
229 	if (!err)
230 		err = item.rule ? B_OK : B_NO_MEMORY;
231 	// Prepare the sniffer_rule
232 	if (!err) {
233 		item.type = type;
234 		item.rule_string = rule;
235 		err = Sniffer::parse(rule, item.rule, &parseError);
236 		if (err)
237 			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
238 				parseError.String()));
239 	}
240 	// Remove any previous rule for this type
241 	if (!err)
242 		err = DeleteSnifferRule(type);
243 	// Insert the new rule at the proper position in
244 	// the sorted rule list (remembering that our list
245 	// is sorted in ascending order using
246 	// operator<(sniffer_rule&, sniffer_rule&))
247 	if (!err) {
248 		std::list<sniffer_rule>::iterator i;
249 		for (i = fRuleList.begin(); i != fRuleList.end(); i++)
250 		{
251 			 if (item < (*i)) {
252 			 	fRuleList.insert(i, item);
253 			 	break;
254 			 }
255 		}
256 		if (i == fRuleList.end())
257 			fRuleList.push_back(item);
258 	}
259 
260 	return err;
261 }
262 
263 // DeleteSnifferRule
264 /*! \brief Removes the sniffer rule for the given type from the rule list
265 	\param type The type of interest
266 	\return
267 	- \c B_OK: success (even if no rule existed for the given type)
268 	- other error code: failure
269 */
270 status_t
271 SnifferRules::DeleteSnifferRule(const char *type)
272 {
273 	status_t err = type ? B_OK : B_BAD_VALUE;
274 	if (!err && !fHaveDoneFullBuild)
275 		return B_OK;
276 
277 	// Find the rule in the list and remove it
278 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
279 		   i != fRuleList.end();
280 		     i++)
281 	{
282 		if (i->type == type) {
283 			fRuleList.erase(i);
284 			break;
285 		}
286 	}
287 
288 	return err;
289 }
290 
291 // PrintToStream
292 //! Dumps the list of sniffer rules in sorted order to standard output
293 void
294 SnifferRules::PrintToStream() const
295 {
296 	printf("\n");
297 	printf("--------------\n");
298 	printf("Sniffer Rules:\n");
299 	printf("--------------\n");
300 
301 	if (fHaveDoneFullBuild) {
302 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
303 			   i != fRuleList.end();
304 			     i++)
305 		{
306 			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
307 		}
308 	} else {
309 		printf("You haven't built your rule list yet, chump. ;-)\n");
310 	}
311 }
312 
313 // BuildRuleList
314 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
315 	each parsed rule to the rule list, and sorts the list by priority, largest first.
316 
317 	Initial MaxBytesNeeded() info is compiled by this function as well.
318 */
319 status_t
320 SnifferRules::BuildRuleList()
321 {
322 	fRuleList.clear();
323 
324 	ssize_t maxBytesNeeded = 0;
325 	ssize_t bytesNeeded = 0;
326 	BDirectory root;
327 
328 	status_t err = root.SetTo(kDatabaseDir.c_str());
329 	if (!err) {
330 		root.Rewind();
331 		while (true) {
332 			BEntry entry;
333 			err = root.GetNextEntry(&entry);
334 			if (err) {
335 				// If we've come to the end of list, it's not an error
336 				if (err == B_ENTRY_NOT_FOUND)
337 					err = B_OK;
338 				break;
339 			} else {
340 				// Check that this entry is both a directory and a valid MIME string
341 				char supertype[B_PATH_NAME_LENGTH];
342 				if (entry.IsDirectory()
343 				      && entry.GetName(supertype) == B_OK
344 				         && BMimeType::IsValid(supertype))
345 				{
346 					// Make sure the supertype string is all lowercase
347 					BPrivate::Storage::to_lower(supertype);
348 
349 					// First, iterate through this supertype directory and process
350 					// all of its subtypes
351 					BDirectory dir;
352 					if (dir.SetTo(&entry) == B_OK) {
353 						dir.Rewind();
354 						while (true) {
355 							BEntry subEntry;
356 							err = dir.GetNextEntry(&subEntry);
357 							if (err) {
358 								// If we've come to the end of list, it's not an error
359 								if (err == B_ENTRY_NOT_FOUND)
360 									err = B_OK;
361 								break;
362 							} else {
363 								// Get the subtype's name
364 								char subtype[B_PATH_NAME_LENGTH];
365 								if (subEntry.GetName(subtype) == B_OK) {
366 									BPrivate::Storage::to_lower(subtype);
367 
368 									char fulltype[B_PATH_NAME_LENGTH];
369 									sprintf(fulltype, "%s/%s", supertype, subtype);
370 
371 									// Process the subtype
372 									ProcessType(fulltype, &bytesNeeded);
373 									if (bytesNeeded > maxBytesNeeded)
374 										maxBytesNeeded = bytesNeeded;
375 								}
376 							}
377 						}
378 					} else {
379 						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
380 						          "Failed opening supertype directory '%s'\n",
381 						            supertype));
382 					}
383 
384 					// Second, process the supertype
385 					ProcessType(supertype, &bytesNeeded);
386 					if (bytesNeeded > maxBytesNeeded)
387 						maxBytesNeeded = bytesNeeded;
388 				}
389 			}
390 		}
391 	} else {
392 		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
393 		          "Failed opening mime database directory '%s'\n",
394 		            kDatabaseDir.c_str()));
395 	}
396 
397 	if (!err) {
398 		fRuleList.sort();
399 		fMaxBytesNeeded = maxBytesNeeded;
400 		fHaveDoneFullBuild = true;
401 //		PrintToStream();
402 	} else
403 		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%lx\n", err));
404 	return err;
405 }
406 
407 // GuessMimeType
408 /*!	\brief Guesses a MIME type for the supplied chunk of data.
409 
410 	This is accomplished by searching through the currently installed
411 	list of sniffer rules for a rule that matches on the given data buffer.
412 	Rules are searched in order of priority (higher priority first). Rules
413 	of equal priority are searched in reverse-alphabetical order (that way
414 	"supertype/subtype" form rules are checked before "supertype-only" form
415 	rules if their priorities happen to be identical).
416 
417 	\param file The file to sniff. May be \c NULL. \a buffer is always given.
418 	\param buffer Pointer to a data buffer to sniff
419 	\param length The length of the data buffer pointed to by \a buffer
420 	\param type Pointer to a pre-allocated BString which is set to the
421 		   resulting MIME type.
422 	\return
423 	- \c B_OK: success
424 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
425 	- error code: failure
426 */
427 status_t
428 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
429 	BString *type)
430 {
431 	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
432 	if (err)
433 		return err;
434 
435 	// wrap the buffer by a BMemoryIO
436 	BMemoryIO data(buffer, length);
437 
438 	if (!err && !fHaveDoneFullBuild)
439 		err = BuildRuleList();
440 
441 	// first ask the MimeSnifferAddonManager for a suitable type
442 	float addonPriority = -1;
443 	BMimeType mimeType;
444 	if (!err) {
445 		MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default();
446 		if (manager) {
447 			addonPriority = manager->GuessMimeType(file, buffer, length,
448 				&mimeType);
449 		}
450 	}
451 
452 	if (!err) {
453 		// Run through our rule list, which is sorted in order of
454 		// descreasing priority, and see if one of the rules sniffs
455 		// out a match
456 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
457 			   i != fRuleList.end();
458 			     i++)
459 		{
460 			if (i->rule) {
461 				// If an add-on identified the type with a priority at least
462 				// as great as the remaining rules, we can stop further
463 				// processing and return the type found by the add-on.
464 				if (i->rule->Priority() <= addonPriority) {
465 					*type = mimeType.Type();
466 					return B_OK;
467 				}
468 
469 				if (i->rule->Sniff(&data)) {
470 					type->SetTo(i->type.c_str());
471 					return B_OK;
472 				}
473 			} else {
474 				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
475 					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
476 					"rule_string == '%s'\n",
477 					i->type.c_str(), i->rule_string.c_str()));
478 			}
479 		}
480 
481 		// The sniffer add-on manager might have returned a low priority
482 		// (lower than any of a rule).
483 		if (addonPriority >= 0) {
484 			*type = mimeType.Type();
485 			return B_OK;
486 		}
487 
488 		// If we get here, we didn't find a damn thing
489 		err = kMimeGuessFailureError;
490 	}
491 	return err;
492 }
493 
494 // MaxBytesNeeded
495 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
496 	all the currently installed rules to be able to perform a complete sniff,
497 	or an error code if something goes wrong.
498 
499 	If the internal rule list has not yet been built (this includes parsing
500 	all the installed rules), it will be.
501 
502 	\return: If the return value is non-negative, it represents	the max number
503 	of bytes needed to do a complete sniff. Otherwise, the number returned is
504 	an error code.
505 */
506 ssize_t
507 SnifferRules::MaxBytesNeeded()
508 {
509 	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
510 	if (!err) {
511 		err = fMaxBytesNeeded;
512 		MimeSnifferAddonManager* manager = MimeSnifferAddonManager::Default();
513 		if (manager) {
514 			fMaxBytesNeeded = max(fMaxBytesNeeded,
515 				(ssize_t)manager->MinimalBufferSize());
516 		}
517 	}
518 	return err;
519 }
520 
521 // ProcessType
522 /*! \brief Handles a portion of the initial rule list construction for
523 	the given mime type.
524 
525 	\note To be called by BuildRuleList() *ONLY*. :-)
526 
527 	\param type The mime type of interest. The mime string is expected to be valid
528 	            and lowercase. Both "supertype" and "supertype/subtype" mime types
529 	            are allowed.
530 	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
531 	                   perform a complete sniff. May not be NULL because I'm lazy
532 	                   and this function is for internal use only anyway.
533 	\return
534 	The return value is essentially ignored (as this function prints out the
535 	debug warning if a parse fails), but that being said:
536 	- \c B_OK: success
537 	- \c other error code: failure
538 */
539 status_t
540 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
541 {
542 	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
543 	if (!err)
544 		*bytesNeeded = 0;
545 
546 	BString str;
547 	BString errorMsg;
548 	sniffer_rule rule(new Sniffer::Rule());
549 
550 	// Check the mem alloc
551 	if (!err)
552 		err = rule.rule ? B_OK : B_NO_MEMORY;
553 	// Read the attr
554 	if (!err)
555 		err = read_mime_attr_string(type, kSnifferRuleAttr, &str);
556 	// Parse the rule
557 	if (!err) {
558 		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
559 		if (err)
560 			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
561 	}
562 	if (!err) {
563 		// Note the bytes needed
564 		*bytesNeeded = rule.rule->BytesNeeded();
565 
566 		// Add the rule to the list
567 		rule.type = type;
568 		rule.rule_string = str.String();
569 		fRuleList.push_back(rule);
570 	}
571 	return err;
572 }
573 
574 } // namespace Mime
575 } // namespace Storage
576 } // namespace BPrivate
577 
578