xref: /haiku/src/kits/storage/mime/SnifferRules.cpp (revision 21258e2674226d6aa732321b6f8494841895af5f)
1 /*
2  * Copyright 2002-2006, Haiku Inc.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Tyler Dauwalder
7  *		Ingo Weinhold, bonefish@users.sf.net
8  */
9 
10 /*!
11 	\file SnifferRules.cpp
12 	SnifferRules class implementation
13 */
14 
15 #include <mime/SnifferRules.h>
16 
17 #include <stdio.h>
18 #include <sys/stat.h>
19 
20 #include <Directory.h>
21 #include <Entry.h>
22 #include <File.h>
23 #include <MimeType.h>
24 #include <mime/database_support.h>
25 #include <mime/DatabaseDirectory.h>
26 #include <mime/DatabaseLocation.h>
27 #include <mime/MimeSniffer.h>
28 #include <sniffer/Parser.h>
29 #include <sniffer/Rule.h>
30 #include <StorageDefs.h>
31 #include <storage_support.h>
32 #include <String.h>
33 
34 
35 #define DBG(x) x
36 //#define DBG(x)
37 #define OUT printf
38 
39 namespace BPrivate {
40 namespace Storage {
41 namespace Mime {
42 
43 using namespace BPrivate::Storage;
44 
45 /*!
46 	\struct SnifferRules::sniffer_rule
47 	\brief A parsed sniffer rule and its corresponding mime type and rule string
48 
49 	The parse sniffer rule is stored in the \c rule member, which is a pointer
50 	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
51 	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
52 	without unnecessary copying. As a consequence of this decision, the
53 	\c SnifferRules object managing the rule list is responsible for actually
54 	deleting each \c sniffer_rule's \c Sniffer::Rule object.
55 */
56 
57 // sniffer_rule Constructor
58 //! Creates a new \c sniffer_rule object
59 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
60 	: rule(rule)
61 {
62 }
63 
64 // sniffer_rule Destructor
65 //! Destroys the \c sniffer_rule object.
66 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
67 	object's \c rule member is *NOT* deleted by this function.
68 */
69 SnifferRules::sniffer_rule::~sniffer_rule()
70 {
71 }
72 
73 // private functions
74 /*! \brief Returns true if \a left's priority is greater than \a right's
75 
76 	This may seem slightly backwards, but since sort() using
77 	operator<() sorts in ascending order, we say "left < right"
78 	if "left.priority > right.priority" to get them sorted in
79 	ascending order. Super, no?
80 
81 	Also, sniffer_rule objects with \c NULL \c rule members are
82 	treated as having minimal priority (and thus are placed at
83 	the end of the list of rules).
84 
85 	Finally, sniffer_rule objects that are otherwise equal are
86 	sorted in reverse alphabetic order (thus placing sniffer
87 	rules for supertypes *after* sniffer rules for subtypes
88 	of said supertype when both rules have identical priorities).
89 */
90 bool operator<(const SnifferRules::sniffer_rule &left, const SnifferRules::sniffer_rule &right)
91 {
92 	if (left.rule && right.rule) {
93 		double leftPriority = left.rule->Priority();
94 		double rightPriority = right.rule->Priority();
95 		if (leftPriority > rightPriority) {
96 			return true;	// left < right
97 		} else if (rightPriority > leftPriority) {
98 			return false;	// right < left
99 		} else {
100 			return left.type > right.type;
101 		}
102 	} else if (left.rule) {
103 		return true; 	// left < right
104 	} else {
105 		return false;	// right < left
106 	}
107 }
108 
109 /*!
110 	\class SnifferRules
111 	\brief Manages the sniffer rules for the entire database
112 */
113 
114 // Constructor
115 //! Constructs a new SnifferRules object
116 SnifferRules::SnifferRules(DatabaseLocation* databaseLocation,
117 	MimeSniffer* mimeSniffer)
118 	:
119 	fDatabaseLocation(databaseLocation),
120 	fMimeSniffer(mimeSniffer),
121 	fMaxBytesNeeded(0),
122 	fHaveDoneFullBuild(false)
123 {
124 }
125 
126 // Destructor
127 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
128 	\c Sniffer::Rule objects scattered throughout the rule list in
129 	\c sniffer_rule::rule members.
130 */
131 SnifferRules::~SnifferRules()
132 {
133 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
134 		   i != fRuleList.end(); i++) {
135 		delete i->rule;
136 		i->rule = NULL;
137 	}
138 }
139 
140 // GuessMimeType
141 /*!	\brief Guesses a MIME type for the supplied entry_ref.
142 
143 	Only the data in the given entry is considered, not the filename or
144 	its extension. Please see GuessMimeType(BFile*, const void *, int32,
145 	BString*) for more details.
146 
147 	\param ref The entry to sniff
148 	\param type Pointer to a pre-allocated BString which is set to the
149 		   resulting MIME type.
150 	\return
151 	- \c B_OK: success
152 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
153 	- error code: failure
154 */
155 status_t
156 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
157 {
158 	status_t err = ref && type ? B_OK : B_BAD_VALUE;
159 	ssize_t bytes = 0;
160 	char *buffer = NULL;
161 	BFile file;
162 
163 	// First find out the max number of bytes we need to read
164 	// from the file to fully accomodate all of our currently
165 	// installed sniffer rules
166 	if (!err) {
167 		bytes = MaxBytesNeeded();
168 		if (bytes < 0)
169 			err = bytes;
170 	}
171 
172 	// Next read that many bytes (or fewer, if the file isn't
173 	// that long) into a buffer
174 	if (!err) {
175 		buffer = new(std::nothrow) char[bytes];
176 		if (!buffer)
177 			err = B_NO_MEMORY;
178 	}
179 
180 	if (!err)
181 		err = file.SetTo(ref, B_READ_ONLY);
182 	if (!err) {
183 		bytes = file.Read(buffer, bytes);
184 		if (bytes < 0)
185 			err = bytes;
186 	}
187 
188 	// Now sniff the buffer
189 	if (!err)
190 		err = GuessMimeType(&file, buffer, bytes, type);
191 
192 	delete[] buffer;
193 
194 	return err;
195 }
196 
197 // GuessMimeType
198 /*!	\brief Guesses a MIME type for the given chunk of data.
199 
200 	Please see GuessMimeType(BFile*, const void *, int32, BString*) for more
201 	details.
202 
203 	\param buffer Pointer to a data buffer to sniff
204 	\param length The length of the data buffer pointed to by \a buffer
205 	\param type Pointer to a pre-allocated BString which is set to the
206 		   resulting MIME type.
207 	\return
208 	- \c B_OK: success
209 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
210 	- error code: failure
211 */
212 status_t
213 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
214 {
215 	return GuessMimeType(NULL, buffer, length, type);
216 }
217 
218 // SetSnifferRule
219 /*! Updates the sniffer rule for the given type
220 
221 	If the a rule currently exists in the rule list for the given type,
222 	it is first removed before the new rule is inserted.
223 
224 	The new rule is inserted in its proper, sorted position in the list.
225 
226 	\param type The type of interest
227 	\param rule The new sniffer rule
228 	\return
229 	- \c B_OK: success
230 	- other error code: failure
231 */
232 status_t
233 SnifferRules::SetSnifferRule(const char *type, const char *rule)
234 {
235 	status_t err = type && rule ? B_OK : B_BAD_VALUE;
236 	if (!err && !fHaveDoneFullBuild)
237 		return B_OK;
238 
239 	sniffer_rule item(new Sniffer::Rule());
240 	BString parseError;
241 
242 	// Check the mem alloc
243 	if (!err)
244 		err = item.rule ? B_OK : B_NO_MEMORY;
245 	// Prepare the sniffer_rule
246 	if (!err) {
247 		item.type = type;
248 		item.rule_string = rule;
249 		err = Sniffer::parse(rule, item.rule, &parseError);
250 		if (err)
251 			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
252 				parseError.String()));
253 	}
254 	// Remove any previous rule for this type
255 	if (!err)
256 		err = DeleteSnifferRule(type);
257 	// Insert the new rule at the proper position in
258 	// the sorted rule list (remembering that our list
259 	// is sorted in ascending order using
260 	// operator<(sniffer_rule&, sniffer_rule&))
261 	if (!err) {
262 		std::list<sniffer_rule>::iterator i;
263 		for (i = fRuleList.begin(); i != fRuleList.end(); i++) {
264 			 if (item < (*i)) {
265 			 	fRuleList.insert(i, item);
266 			 	break;
267 			 }
268 		}
269 		if (i == fRuleList.end())
270 			fRuleList.push_back(item);
271 	}
272 
273 	return err;
274 }
275 
276 // DeleteSnifferRule
277 /*! \brief Removes the sniffer rule for the given type from the rule list
278 	\param type The type of interest
279 	\return
280 	- \c B_OK: success (even if no rule existed for the given type)
281 	- other error code: failure
282 */
283 status_t
284 SnifferRules::DeleteSnifferRule(const char *type)
285 {
286 	status_t err = type ? B_OK : B_BAD_VALUE;
287 	if (!err && !fHaveDoneFullBuild)
288 		return B_OK;
289 
290 	// Find the rule in the list and remove it
291 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
292 		   i != fRuleList.end(); i++) {
293 		if (i->type == type) {
294 			fRuleList.erase(i);
295 			break;
296 		}
297 	}
298 
299 	return err;
300 }
301 
302 // PrintToStream
303 //! Dumps the list of sniffer rules in sorted order to standard output
304 void
305 SnifferRules::PrintToStream() const
306 {
307 	printf("\n");
308 	printf("--------------\n");
309 	printf("Sniffer Rules:\n");
310 	printf("--------------\n");
311 
312 	if (fHaveDoneFullBuild) {
313 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
314 			   i != fRuleList.end(); i++) {
315 			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
316 		}
317 	} else {
318 		printf("You haven't built your rule list yet, chump. ;-)\n");
319 	}
320 }
321 
322 // BuildRuleList
323 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
324 	each parsed rule to the rule list, and sorts the list by priority, largest first.
325 
326 	Initial MaxBytesNeeded() info is compiled by this function as well.
327 */
328 status_t
329 SnifferRules::BuildRuleList()
330 {
331 	fRuleList.clear();
332 
333 	ssize_t maxBytesNeeded = 0;
334 	ssize_t bytesNeeded = 0;
335 	DatabaseDirectory root;
336 
337 	status_t err = root.Init(fDatabaseLocation);
338 	if (!err) {
339 		root.Rewind();
340 		while (true) {
341 			BEntry entry;
342 			err = root.GetNextEntry(&entry);
343 			if (err) {
344 				// If we've come to the end of list, it's not an error
345 				if (err == B_ENTRY_NOT_FOUND)
346 					err = B_OK;
347 				break;
348 			} else {
349 				// Check that this entry is both a directory and a valid MIME string
350 				char supertype[B_PATH_NAME_LENGTH];
351 				if (entry.IsDirectory()
352 				      && entry.GetName(supertype) == B_OK
353 				         && BMimeType::IsValid(supertype)) {
354 					// Make sure the supertype string is all lowercase
355 					BPrivate::Storage::to_lower(supertype);
356 
357 					// First, iterate through this supertype directory and process
358 					// all of its subtypes
359 					DatabaseDirectory dir;
360 					if (dir.Init(fDatabaseLocation, supertype) == B_OK) {
361 						dir.Rewind();
362 						while (true) {
363 							BEntry subEntry;
364 							err = dir.GetNextEntry(&subEntry);
365 							if (err) {
366 								// If we've come to the end of list, it's not an error
367 								if (err == B_ENTRY_NOT_FOUND)
368 									err = B_OK;
369 								break;
370 							} else {
371 								// Get the subtype's name
372 								char subtype[B_PATH_NAME_LENGTH];
373 								if (subEntry.GetName(subtype) == B_OK) {
374 									BPrivate::Storage::to_lower(subtype);
375 
376 									char fulltype[B_PATH_NAME_LENGTH];
377 									snprintf(fulltype, B_PATH_NAME_LENGTH, "%s/%s",
378 										supertype, subtype);
379 
380 									// Process the subtype
381 									ProcessType(fulltype, &bytesNeeded);
382 									if (bytesNeeded > maxBytesNeeded)
383 										maxBytesNeeded = bytesNeeded;
384 								}
385 							}
386 						}
387 					} else {
388 						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
389 						          "Failed opening supertype directory '%s'\n",
390 						            supertype));
391 					}
392 
393 					// Second, process the supertype
394 					ProcessType(supertype, &bytesNeeded);
395 					if (bytesNeeded > maxBytesNeeded)
396 						maxBytesNeeded = bytesNeeded;
397 				}
398 			}
399 		}
400 	} else {
401 		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
402 		          "Failed opening mime database directory.\n"));
403 	}
404 
405 	if (!err) {
406 		fRuleList.sort();
407 		fMaxBytesNeeded = maxBytesNeeded;
408 		fHaveDoneFullBuild = true;
409 //		PrintToStream();
410 	} else {
411 		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%"
412 			B_PRIx32 "\n", err));
413 	}
414 	return err;
415 }
416 
417 // GuessMimeType
418 /*!	\brief Guesses a MIME type for the supplied chunk of data.
419 
420 	This is accomplished by searching through the currently installed
421 	list of sniffer rules for a rule that matches on the given data buffer.
422 	Rules are searched in order of priority (higher priority first). Rules
423 	of equal priority are searched in reverse-alphabetical order (that way
424 	"supertype/subtype" form rules are checked before "supertype-only" form
425 	rules if their priorities happen to be identical).
426 
427 	\param file The file to sniff. May be \c NULL. \a buffer is always given.
428 	\param buffer Pointer to a data buffer to sniff
429 	\param length The length of the data buffer pointed to by \a buffer
430 	\param type Pointer to a pre-allocated BString which is set to the
431 		   resulting MIME type.
432 	\return
433 	- \c B_OK: success
434 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
435 	- error code: failure
436 */
437 status_t
438 SnifferRules::GuessMimeType(BFile* file, const void *buffer, int32 length,
439 	BString *type)
440 {
441 	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
442 	if (err)
443 		return err;
444 
445 	// wrap the buffer by a BMemoryIO
446 	BMemoryIO data(buffer, length);
447 
448 	if (!fHaveDoneFullBuild)
449 		err = BuildRuleList();
450 
451 	// first ask the MIME sniffer for a suitable type
452 	float addonPriority = -1;
453 	BMimeType mimeType;
454 	if (!err && fMimeSniffer != NULL) {
455 		addonPriority = fMimeSniffer->GuessMimeType(file, buffer, length,
456 			&mimeType);
457 	}
458 
459 	if (!err) {
460 		// Run through our rule list, which is sorted in order of
461 		// descreasing priority, and see if one of the rules sniffs
462 		// out a match
463 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
464 			   i != fRuleList.end(); i++) {
465 			if (i->rule) {
466 				// If an add-on identified the type with a priority at least
467 				// as great as the remaining rules, we can stop further
468 				// processing and return the type found by the add-on.
469 				if (i->rule->Priority() <= addonPriority) {
470 					*type = mimeType.Type();
471 					return B_OK;
472 				}
473 
474 				if (i->rule->Sniff(&data)) {
475 					type->SetTo(i->type.c_str());
476 					return B_OK;
477 				}
478 			} else {
479 				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
480 					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
481 					"rule_string == '%s'\n",
482 					i->type.c_str(), i->rule_string.c_str()));
483 			}
484 		}
485 
486 		// The sniffer add-on manager might have returned a low priority
487 		// (lower than any of a rule).
488 		if (addonPriority >= 0) {
489 			*type = mimeType.Type();
490 			return B_OK;
491 		}
492 
493 		// If we get here, we didn't find a damn thing
494 		err = kMimeGuessFailureError;
495 	}
496 	return err;
497 }
498 
499 // MaxBytesNeeded
500 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
501 	all the currently installed rules to be able to perform a complete sniff,
502 	or an error code if something goes wrong.
503 
504 	If the internal rule list has not yet been built (this includes parsing
505 	all the installed rules), it will be.
506 
507 	\return: If the return value is non-negative, it represents	the max number
508 	of bytes needed to do a complete sniff. Otherwise, the number returned is
509 	an error code.
510 */
511 ssize_t
512 SnifferRules::MaxBytesNeeded()
513 {
514 	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
515 	if (!err) {
516 		err = fMaxBytesNeeded;
517 
518 		if (fMimeSniffer != NULL) {
519 			fMaxBytesNeeded = max_c(fMaxBytesNeeded,
520 				(ssize_t)fMimeSniffer->MinimalBufferSize());
521 		}
522 	}
523 	return err;
524 }
525 
526 // ProcessType
527 /*! \brief Handles a portion of the initial rule list construction for
528 	the given mime type.
529 
530 	\note To be called by BuildRuleList() *ONLY*. :-)
531 
532 	\param type The mime type of interest. The mime string is expected to be valid
533 	            and lowercase. Both "supertype" and "supertype/subtype" mime types
534 	            are allowed.
535 	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
536 	                   perform a complete sniff. May not be NULL because I'm lazy
537 	                   and this function is for internal use only anyway.
538 	\return
539 	The return value is essentially ignored (as this function prints out the
540 	debug warning if a parse fails), but that being said:
541 	- \c B_OK: success
542 	- \c other error code: failure
543 */
544 status_t
545 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
546 {
547 	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
548 	if (!err)
549 		*bytesNeeded = 0;
550 
551 	BString str;
552 	BString errorMsg;
553 	sniffer_rule rule(new Sniffer::Rule());
554 
555 	// Check the mem alloc
556 	if (!err)
557 		err = rule.rule ? B_OK : B_NO_MEMORY;
558 	// Read the attr
559 	if (!err) {
560 		err = fDatabaseLocation->ReadStringAttribute(type, kSnifferRuleAttr,
561 			str);
562 	}
563 	// Parse the rule
564 	if (!err) {
565 		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
566 		if (err)
567 			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
568 	}
569 	if (!err) {
570 		// Note the bytes needed
571 		*bytesNeeded = rule.rule->BytesNeeded();
572 
573 		// Add the rule to the list
574 		rule.type = type;
575 		rule.rule_string = str.String();
576 		fRuleList.push_back(rule);
577 	}
578 	return err;
579 }
580 
581 } // namespace Mime
582 } // namespace Storage
583 } // namespace BPrivate
584 
585