xref: /haiku/src/kits/storage/mime/SnifferRules.cpp (revision 97dfeb96704e5dbc5bec32ad7b21379d0125e031)
1 /*
2  * Copyright 2002-2006, Haiku Inc.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Tyler Dauwalder
7  *		Ingo Weinhold, bonefish@users.sf.net
8  */
9 
10 /*!
11 	\file SnifferRules.cpp
12 	SnifferRules class implementation
13 */
14 
15 #include <mime/SnifferRules.h>
16 
17 #include <stdio.h>
18 #include <sys/stat.h>
19 
20 #include <Directory.h>
21 #include <Entry.h>
22 #include <File.h>
23 #include <MimeType.h>
24 #include <mime/database_support.h>
25 #include <mime/DatabaseDirectory.h>
26 #include <mime/DatabaseLocation.h>
27 #include <mime/MimeSniffer.h>
28 #include <sniffer/Parser.h>
29 #include <sniffer/Rule.h>
30 #include <StorageDefs.h>
31 #include <storage_support.h>
32 #include <String.h>
33 
34 
35 #define DBG(x) x
36 //#define DBG(x)
37 #define OUT printf
38 
39 namespace BPrivate {
40 namespace Storage {
41 namespace Mime {
42 
43 using namespace BPrivate::Storage;
44 
45 /*!
46 	\struct SnifferRules::sniffer_rule
47 	\brief A parsed sniffer rule and its corresponding mime type and rule string
48 
49 	The parse sniffer rule is stored in the \c rule member, which is a pointer
50 	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
51 	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
52 	without unnecessary copying. As a consequence of this decision, the
53 	\c SnifferRules object managing the rule list is responsible for actually
54 	deleting each \c sniffer_rule's \c Sniffer::Rule object.
55 */
56 
57 // sniffer_rule Constructor
58 //! Creates a new \c sniffer_rule object
59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
60 	: rule(rule)
61 {
62 }
63 
64 // sniffer_rule Destructor
65 //! Destroys the \c sniffer_rule object.
66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
67 	object's \c rule member is *NOT* deleted by this function.
68 */
69 SnifferRules::sniffer_rule::~sniffer_rule()
70 {
71 }
72 
73 // private functions
74 /*! \brief Returns true if \a left's priority is greater than \a right's
75 
76 	This may seem slightly backwards, but since sort() using
77 	operator<() sorts in ascending order, we say "left < right"
78 	if "left.priority > right.priority" to get them sorted in
79 	ascending order. Super, no?
80 
81 	Also, sniffer_rule objects with \c NULL \c rule members are
82 	treated as having minimal priority (and thus are placed at
83 	the end of the list of rules).
84 
85 	Finally, sniffer_rule objects that are otherwise equal are
86 	sorted in reverse alphabetic order (thus placing sniffer
87 	rules for supertypes *after* sniffer rules for subtypes
88 	of said supertype when both rules have identical priorities).
89 */
90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right)
91 {
92 	if (left.rule && right.rule) {
93 		double leftPriority = left.rule->Priority();
94 		double rightPriority = right.rule->Priority();
95 		if (leftPriority > rightPriority) {
96 			return true;	// left < right
97 		} else if (rightPriority > leftPriority) {
98 			return false;	// right < left
99 		} else {
100 			return left.type > right.type;
101 		}
102 	} else if (left.rule) {
103 		return true; 	// left < right
104 	} else {
105 		return false;	// right < left
106 	}
107 }
108 
109 /*!
110 	\class SnifferRules
111 	\brief Manages the sniffer rules for the entire database
112 */
113 
114 // Constructor
115 //! Constructs a new SnifferRules object
116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation,
117 	MimeSniffer* mimeSniffer)
118 	:
119 	fDatabaseLocation(databaseLocation),
120 	fMimeSniffer(mimeSniffer),
121 	fMaxBytesNeeded(0),
122 	fHaveDoneFullBuild(false)
123 {
124 }
125 
126 // Destructor
127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
128 	\c Sniffer::Rule objects scattered throughout the rule list in
129 	\c sniffer_rule::rule members.
130 */
131 SnifferRules::~SnifferRules()
132 {
133 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
134 		   i != fRuleList.end();
135 		     i++)
136 	{
137 		delete i->rule;
138 		i->rule = NULL;
139 	}
140 }
141 
142 // GuessMimeType
143 /*!	\brief Guesses a MIME type for the supplied entry_ref.
144 
145 	Only the data in the given entry is considered, not the filename or
146 	its extension. Please see GuessMimeType(BFile*, const void *, int32,
147 	BString*) for more details.
148 
149 	\param ref The entry to sniff
150 	\param type Pointer to a pre-allocated BString which is set to the
151 		   resulting MIME type.
152 	\return
153 	- \c B_OK: success
154 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
155 	- error code: failure
156 */
157 status_t
158 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
159 {
160 	status_t err = ref && type ? B_OK : B_BAD_VALUE;
161 	ssize_t bytes = 0;
162 	char *buffer = NULL;
163 	BFile file;
164 
165 	// First find out the max number of bytes we need to read
166 	// from the file to fully accomodate all of our currently
167 	// installed sniffer rules
168 	if (!err) {
169 		bytes = MaxBytesNeeded();
170 		if (bytes < 0)
171 			err = bytes;
172 	}
173 
174 	// Next read that many bytes (or fewer, if the file isn't
175 	// that long) into a buffer
176 	if (!err) {
177 		buffer = new(std::nothrow) char[bytes];
178 		if (!buffer)
179 			err = B_NO_MEMORY;
180 	}
181 
182 	if (!err)
183 		err = file.SetTo(ref, B_READ_ONLY);
184 	if (!err) {
185 		bytes = file.Read(buffer, bytes);
186 		if (bytes < 0)
187 			err = bytes;
188 	}
189 
190 	// Now sniff the buffer
191 	if (!err)
192 		err = GuessMimeType(&file, buffer, bytes, type);
193 
194 	delete[] buffer;
195 
196 	return err;
197 }
198 
199 // GuessMimeType
200 /*!	\brief Guesses a MIME type for the given chunk of data.
201 
202 	Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
203 	details.
204 
205 	\param buffer Pointer to a data buffer to sniff
206 	\param length The length of the data buffer pointed to by \a buffer
207 	\param type Pointer to a pre-allocated BString which is set to the
208 		   resulting MIME type.
209 	\return
210 	- \c B_OK: success
211 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
212 	- error code: failure
213 */
214 status_t
215 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
216 {
217 	return GuessMimeType(NULL, buffer, length, type);
218 }
219 
220 // SetSnifferRule
221 /*! Updates the sniffer rule for the given type
222 
223 	If the a rule currently exists in the rule list for the given type,
224 	it is first removed before the new rule is inserted.
225 
226 	The new rule is inserted in its proper, sorted position in the list.
227 
228 	\param type The type of interest
229 	\param rule The new sniffer rule
230 	\return
231 	- \c B_OK: success
232 	- other error code: failure
233 */
234 status_t
235 SnifferRules::SetSnifferRule(const char *type, const char *rule)
236 {
237 	status_t err = type && rule ? B_OK : B_BAD_VALUE;
238 	if (!err && !fHaveDoneFullBuild)
239 		return B_OK;
240 
241 	sniffer_rule item(new Sniffer::Rule());
242 	BString parseError;
243 
244 	// Check the mem alloc
245 	if (!err)
246 		err = item.rule ? B_OK : B_NO_MEMORY;
247 	// Prepare the sniffer_rule
248 	if (!err) {
249 		item.type = type;
250 		item.rule_string = rule;
251 		err = Sniffer::parse(rule, item.rule, &parseError);
252 		if (err)
253 			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
254 				parseError.String()));
255 	}
256 	// Remove any previous rule for this type
257 	if (!err)
258 		err = DeleteSnifferRule(type);
259 	// Insert the new rule at the proper position in
260 	// the sorted rule list (remembering that our list
261 	// is sorted in ascending order using
262 	// operator<(sniffer_rule&, sniffer_rule&))
263 	if (!err) {
264 		std::list<sniffer_rule>::iterator i;
265 		for (i = fRuleList.begin(); i != fRuleList.end(); i++)
266 		{
267 			 if (item < (*i)) {
268 			 	fRuleList.insert(i, item);
269 			 	break;
270 			 }
271 		}
272 		if (i == fRuleList.end())
273 			fRuleList.push_back(item);
274 	}
275 
276 	return err;
277 }
278 
279 // DeleteSnifferRule
280 /*! \brief Removes the sniffer rule for the given type from the rule list
281 	\param type The type of interest
282 	\return
283 	- \c B_OK: success (even if no rule existed for the given type)
284 	- other error code: failure
285 */
286 status_t
287 SnifferRules::DeleteSnifferRule(const char *type)
288 {
289 	status_t err = type ? B_OK : B_BAD_VALUE;
290 	if (!err && !fHaveDoneFullBuild)
291 		return B_OK;
292 
293 	// Find the rule in the list and remove it
294 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
295 		   i != fRuleList.end();
296 		     i++)
297 	{
298 		if (i->type == type) {
299 			fRuleList.erase(i);
300 			break;
301 		}
302 	}
303 
304 	return err;
305 }
306 
307 // PrintToStream
308 //! Dumps the list of sniffer rules in sorted order to standard output
309 void
310 SnifferRules::PrintToStream() const
311 {
312 	printf("\n");
313 	printf("--------------\n");
314 	printf("Sniffer Rules:\n");
315 	printf("--------------\n");
316 
317 	if (fHaveDoneFullBuild) {
318 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
319 			   i != fRuleList.end();
320 			     i++)
321 		{
322 			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
323 		}
324 	} else {
325 		printf("You haven't built your rule list yet, chump. ;-)\n");
326 	}
327 }
328 
329 // BuildRuleList
330 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
331 	each parsed rule to the rule list, and sorts the list by priority, largest first.
332 
333 	Initial MaxBytesNeeded() info is compiled by this function as well.
334 */
335 status_t
336 SnifferRules::BuildRuleList()
337 {
338 	fRuleList.clear();
339 
340 	ssize_t maxBytesNeeded = 0;
341 	ssize_t bytesNeeded = 0;
342 	DatabaseDirectory root;
343 
344 	status_t err = root.Init(fDatabaseLocation);
345 	if (!err) {
346 		root.Rewind();
347 		while (true) {
348 			BEntry entry;
349 			err = root.GetNextEntry(&entry);
350 			if (err) {
351 				// If we've come to the end of list, it's not an error
352 				if (err == B_ENTRY_NOT_FOUND)
353 					err = B_OK;
354 				break;
355 			} else {
356 				// Check that this entry is both a directory and a valid MIME string
357 				char supertype[B_PATH_NAME_LENGTH];
358 				if (entry.IsDirectory()
359 				      && entry.GetName(supertype) == B_OK
360 				         && BMimeType::IsValid(supertype))
361 				{
362 					// Make sure the supertype string is all lowercase
363 					BPrivate::Storage::to_lower(supertype);
364 
365 					// First, iterate through this supertype directory and process
366 					// all of its subtypes
367 					DatabaseDirectory dir;
368 					if (dir.Init(fDatabaseLocation, supertype) == B_OK) {
369 						dir.Rewind();
370 						while (true) {
371 							BEntry subEntry;
372 							err = dir.GetNextEntry(&subEntry);
373 							if (err) {
374 								// If we've come to the end of list, it's not an error
375 								if (err == B_ENTRY_NOT_FOUND)
376 									err = B_OK;
377 								break;
378 							} else {
379 								// Get the subtype's name
380 								char subtype[B_PATH_NAME_LENGTH];
381 								if (subEntry.GetName(subtype) == B_OK) {
382 									BPrivate::Storage::to_lower(subtype);
383 
384 									char fulltype[B_PATH_NAME_LENGTH];
385 									sprintf(fulltype, "%s/%s", supertype, subtype);
386 
387 									// Process the subtype
388 									ProcessType(fulltype, &bytesNeeded);
389 									if (bytesNeeded > maxBytesNeeded)
390 										maxBytesNeeded = bytesNeeded;
391 								}
392 							}
393 						}
394 					} else {
395 						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
396 						          "Failed opening supertype directory '%s'\n",
397 						            supertype));
398 					}
399 
400 					// Second, process the supertype
401 					ProcessType(supertype, &bytesNeeded);
402 					if (bytesNeeded > maxBytesNeeded)
403 						maxBytesNeeded = bytesNeeded;
404 				}
405 			}
406 		}
407 	} else {
408 		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
409 		          "Failed opening mime database directory.\n"));
410 	}
411 
412 	if (!err) {
413 		fRuleList.sort();
414 		fMaxBytesNeeded = maxBytesNeeded;
415 		fHaveDoneFullBuild = true;
416 //		PrintToStream();
417 	} else {
418 		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%"
419 			B_PRIx32 "\n", err));
420 	}
421 	return err;
422 }
423 
424 // GuessMimeType
425 /*!	\brief Guesses a MIME type for the supplied chunk of data.
426 
427 	This is accomplished by searching through the currently installed
428 	list of sniffer rules for a rule that matches on the given data buffer.
429 	Rules are searched in order of priority (higher priority first). Rules
430 	of equal priority are searched in reverse-alphabetical order (that way
431 	"supertype/subtype" form rules are checked before "supertype-only" form
432 	rules if their priorities happen to be identical).
433 
434 	\param file The file to sniff. May be \c NULL. \a buffer is always given.
435 	\param buffer Pointer to a data buffer to sniff
436 	\param length The length of the data buffer pointed to by \a buffer
437 	\param type Pointer to a pre-allocated BString which is set to the
438 		   resulting MIME type.
439 	\return
440 	- \c B_OK: success
441 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
442 	- error code: failure
443 */
444 status_t
445 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
446 	BString *type)
447 {
448 	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
449 	if (err)
450 		return err;
451 
452 	// wrap the buffer by a BMemoryIO
453 	BMemoryIO data(buffer, length);
454 
455 	if (!fHaveDoneFullBuild)
456 		err = BuildRuleList();
457 
458 	// first ask the MIME sniffer for a suitable type
459 	float addonPriority = -1;
460 	BMimeType mimeType;
461 	if (!err && fMimeSniffer != NULL) {
462 		addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length,
463 			&mimeType);
464 	}
465 
466 	if (!err) {
467 		// Run through our rule list, which is sorted in order of
468 		// descreasing priority, and see if one of the rules sniffs
469 		// out a match
470 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
471 			   i != fRuleList.end();
472 			     i++)
473 		{
474 			if (i->rule) {
475 				// If an add-on identified the type with a priority at least
476 				// as great as the remaining rules, we can stop further
477 				// processing and return the type found by the add-on.
478 				if (i->rule->Priority() <= addonPriority) {
479 					*type = mimeType.Type();
480 					return B_OK;
481 				}
482 
483 				if (i->rule->Sniff(&data)) {
484 					type->SetTo(i->type.c_str());
485 					return B_OK;
486 				}
487 			} else {
488 				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
489 					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
490 					"rule_string == '%s'\n",
491 					i->type.c_str(), i->rule_string.c_str()));
492 			}
493 		}
494 
495 		// The sniffer add-on manager might have returned a low priority
496 		// (lower than any of a rule).
497 		if (addonPriority >= 0) {
498 			*type = mimeType.Type();
499 			return B_OK;
500 		}
501 
502 		// If we get here, we didn't find a damn thing
503 		err = kMimeGuessFailureError;
504 	}
505 	return err;
506 }
507 
508 // MaxBytesNeeded
509 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
510 	all the currently installed rules to be able to perform a complete sniff,
511 	or an error code if something goes wrong.
512 
513 	If the internal rule list has not yet been built (this includes parsing
514 	all the installed rules), it will be.
515 
516 	\return: If the return value is non-negative, it represents	the max number
517 	of bytes needed to do a complete sniff. Otherwise, the number returned is
518 	an error code.
519 */
520 ssize_t
521 SnifferRules::MaxBytesNeeded()
522 {
523 	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
524 	if (!err) {
525 		err = fMaxBytesNeeded;
526 
527 		if (fMimeSniffer != NULL) {
528 			fMaxBytesNeeded = max_c(fMaxBytesNeeded,
529 				(ssize_t)fMimeSniffer->MinimalBufferSize());
530 		}
531 	}
532 	return err;
533 }
534 
535 // ProcessType
536 /*! \brief Handles a portion of the initial rule list construction for
537 	the given mime type.
538 
539 	\note To be called by BuildRuleList() *ONLY*. :-)
540 
541 	\param type The mime type of interest. The mime string is expected to be valid
542 	            and lowercase. Both "supertype" and "supertype/subtype" mime types
543 	            are allowed.
544 	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
545 	                   perform a complete sniff. May not be NULL because I'm lazy
546 	                   and this function is for internal use only anyway.
547 	\return
548 	The return value is essentially ignored (as this function prints out the
549 	debug warning if a parse fails), but that being said:
550 	- \c B_OK: success
551 	- \c other error code: failure
552 */
553 status_t
554 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
555 {
556 	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
557 	if (!err)
558 		*bytesNeeded = 0;
559 
560 	BString str;
561 	BString errorMsg;
562 	sniffer_rule rule(new Sniffer::Rule());
563 
564 	// Check the mem alloc
565 	if (!err)
566 		err = rule.rule ? B_OK : B_NO_MEMORY;
567 	// Read the attr
568 	if (!err) {
569 		err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr,
570 			str);
571 	}
572 	// Parse the rule
573 	if (!err) {
574 		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
575 		if (err)
576 			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
577 	}
578 	if (!err) {
579 		// Note the bytes needed
580 		*bytesNeeded = rule.rule->BytesNeeded();
581 
582 		// Add the rule to the list
583 		rule.type = type;
584 		rule.rule_string = str.String();
585 		fRuleList.push_back(rule);
586 	}
587 	return err;
588 }
589 
590 } // namespace Mime
591 } // namespace Storage
592 } // namespace BPrivate
593 
594