xref: /haiku/src/kits/storage/mime/SnifferRules.cpp (revision d7e489f80a82a0dc5974df1e780d7a908129bab4)
1 //----------------------------------------------------------------------
2 //  This software is part of the OpenBeOS distribution and is covered
3 //  by the OpenBeOS license.
4 //----------------------------------------------------------------------
5 /*!
6 	\file SnifferRules.cpp
7 	SnifferRules class implementation
8 */
9 
10 #include "mime/SnifferRules.h"
11 
12 #include <Directory.h>
13 #include <Entry.h>
14 #include <File.h>
15 #include <MimeType.h>
16 #include <mime/database_support.h>
17 #include <sniffer/Parser.h>
18 #include <sniffer/Rule.h>
19 #include <StorageDefs.h>
20 #include <storage_support.h>
21 #include <String.h>
22 
23 #include <stdio.h>
24 #include <sys/stat.h>
25 
26 #define DBG(x) x
27 //#define DBG(x)
28 #define OUT printf
29 
30 namespace BPrivate {
31 namespace Storage {
32 namespace Mime {
33 
34 using namespace BPrivate::Storage;
35 
36 /*!
37 	\struct SnifferRules::sniffer_rule
38 	\brief A parsed sniffer rule and its corresponding mime type and rule string
39 
40 	The parse sniffer rule is stored in the \c rule member, which is a pointer
41 	to a \c Sniffer::Rule object. This design was chosen to allow \c sniffer_rule
42 	objects	(as opposed to \c sniffer_rule pointers) to be used with STL objects
43 	without unnecessary copying. As a consequence of this decision, the
44 	\c SnifferRules object managing the rule list is responsible for actually
45 	deleting each \c sniffer_rule's \c Sniffer::Rule object.
46 */
47 
48 // sniffer_rule Constructor
49 //! Creates a new \c sniffer_rule object
50 SnifferRules::sniffer_rule::sniffer_rule(Sniffer::Rule *rule)
51 	: rule(rule)
52 {
53 }
54 
55 // sniffer_rule Destructor
56 //! Destroys the \c sniffer_rule object.
57 /*! \note The \c Sniffer::Rule object pointed to by the \c sniffer_rule
58 	object's \c rule member is *NOT* deleted by this function.
59 */
60 SnifferRules::sniffer_rule::~sniffer_rule()
61 {
62 }
63 
64 // private functions
65 /*! \brief Returns true if \a left's priority is greater than \a right's
66 
67 	This may seem slightly backwards, but since sort() using
68 	operator<() sorts in ascending order, we say "left < right"
69 	if "left.priority > right.priority" to get them sorted in
70 	ascending order. Super, no?
71 
72 	Also, sniffer_rule objects with \c NULL \c rule members are
73 	treated as having minimal priority (and thus are placed at
74 	the end of the list of rules).
75 
76 	Finally, sniffer_rule objects that are otherwise equal are
77 	sorted in reverse alphabetic order (thus placing sniffer
78 	rules for supertypes *after* sniffer rules for subtypes
79 	of said supertype when both rules have identical priorities).
80 */
81 bool operator<(SnifferRules::sniffer_rule &left, SnifferRules::sniffer_rule &right)
82 {
83 	if (left.rule && right.rule) {
84 		double leftPriority = left.rule->Priority();
85 		double rightPriority = right.rule->Priority();
86 		if (leftPriority > rightPriority) {
87 			return true;	// left < right
88 		} else if (rightPriority > leftPriority) {
89 			return false;	// right < left
90 		} else {
91 			return left.type > right.type;
92 		}
93 	} else if (left.rule) {
94 		return true; 	// left < right
95 	} else {
96 		return false;	// right < left
97 	}
98 }
99 
100 /*!
101 	\class SnifferRules
102 	\brief Manages the sniffer rules for the entire database
103 */
104 
105 // Constructor
106 //! Constructs a new SnifferRules object
107 SnifferRules::SnifferRules()
108 	: fHaveDoneFullBuild(false)
109 {
110 }
111 
112 // Destructor
113 /*! \brief Destroys the \c SnifferRules object and all dynamically allocated
114 	\c Sniffer::Rule objects scattered throughout the rule list in
115 	\c sniffer_rule::rule members.
116 */
117 SnifferRules::~SnifferRules()
118 {
119 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
120 		   i != fRuleList.end();
121 		     i++)
122 	{
123 		delete i->rule;
124 		i->rule = NULL;
125 	}
126 }
127 
128 // GuessMimeType
129 /*!	\brief Guesses a MIME type for the supplied entry_ref.
130 
131 	Only the data in the given entry is considered, not the filename or
132 	its extension. Please see GuessMimeType(BPositionIO*, BString*) for
133 	more details.
134 
135 	\param ref The entry to sniff
136 	\param type Pointer to a pre-allocated BString which is set to the
137 		   resulting MIME type.
138 	\return
139 	- \c B_OK: success
140 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
141 	- error code: failure
142 */
143 status_t
144 SnifferRules::GuessMimeType(const entry_ref *ref, BString *type)
145 {
146 	status_t err = ref && type ? B_OK : B_BAD_VALUE;
147 	ssize_t bytes = 0;
148 	char *buffer = NULL;
149 	BFile file;
150 
151 	// First find out the max number of bytes we need to read
152 	// from the file to fully accomodate all of our currently
153 	// installed sniffer rules
154 	if (!err) {
155 		bytes = MaxBytesNeeded();
156 		if (bytes < 0)
157 			err = bytes;
158 	}
159 
160 	// Next read that many bytes (or fewer, if the file isn't
161 	// that long) into a buffer
162 	if (!err) {
163 		buffer = new(nothrow) char[bytes];
164 		if (!buffer)
165 			err = B_NO_MEMORY;
166 	}
167 	if (!err)
168 		err = file.SetTo(ref, B_READ_ONLY);
169 	if (!err) {
170 		bytes = file.Read(buffer, bytes);
171 		if (bytes < 0)
172 			err = bytes;
173 	}
174 
175 	// Now sniff the buffer
176 	if (!err) {
177 		BMemoryIO data(buffer, bytes);
178 		err = GuessMimeType(&data, type);
179 	}
180 
181 	return err;
182 }
183 
184 // GuessMimeType
185 /*!	\brief Guesses a MIME type for the given chunk of data.
186 
187 	Please see GuessMimeType(BPositionIO*, BString*) for more details.
188 
189 	\param buffer Pointer to a data buffer to sniff
190 	\param length The length of the data buffer pointed to by \a buffer
191 	\param type Pointer to a pre-allocated BString which is set to the
192 		   resulting MIME type.
193 	\return
194 	- \c B_OK: success
195 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
196 	- error code: failure
197 */
198 status_t
199 SnifferRules::GuessMimeType(const void *buffer, int32 length, BString *type)
200 {
201 	status_t err = buffer && type ? B_OK : B_BAD_VALUE;
202 	// Wrap a BMemoryIO around the buffer and call our private
203 	// GuessMimeType(BPositionIO*, BString*) function to do the
204 	// dirty work
205 	if (!err) {
206 		BMemoryIO data(buffer, length);
207 		err = GuessMimeType(&data, type);
208 	}
209 	return err;
210 }
211 
212 // SetSnifferRule
213 /*! Updates the sniffer rule for the given type
214 
215 	If the a rule currently exists in the rule list for the given type,
216 	it is first removed before the new rule is inserted.
217 
218 	The new rule is inserted in its proper, sorted position in the list.
219 
220 	\param type The type of interest
221 	\param rule The new sniffer rule
222 	\return
223 	- \c B_OK: success
224 	- other error code: failure
225 */
226 status_t
227 SnifferRules::SetSnifferRule(const char *type, const char *rule)
228 {
229 	status_t err = type && rule ? B_OK : B_BAD_VALUE;
230 	if (!err && !fHaveDoneFullBuild)
231 		return B_OK;
232 
233 	sniffer_rule item(new Sniffer::Rule());
234 	BString parseError;
235 
236 	// Check the mem alloc
237 	if (!err)
238 		err = item.rule ? B_OK : B_NO_MEMORY;
239 	// Prepare the sniffer_rule
240 	if (!err) {
241 		item.type = type;
242 		item.rule_string = rule;
243 		err = Sniffer::parse(rule, item.rule, &parseError);
244 		if (err)
245 			DBG(OUT("ERROR: SnifferRules::SetSnifferRule(): rule parsing error:\n%s\n",
246 				parseError.String()));
247 	}
248 	// Remove any previous rule for this type
249 	if (!err)
250 		err = DeleteSnifferRule(type);
251 	// Insert the new rule at the proper position in
252 	// the sorted rule list (remembering that our list
253 	// is sorted in ascending order using
254 	// operator<(sniffer_rule&, sniffer_rule&))
255 	if (!err) {
256 		std::list<sniffer_rule>::iterator i;
257 		for (i = fRuleList.begin(); i != fRuleList.end(); i++)
258 		{
259 			 if (item < (*i)) {
260 			 	fRuleList.insert(i, item);
261 			 	break;
262 			 }
263 		}
264 		if (i == fRuleList.end())
265 			fRuleList.push_back(item);
266 	}
267 
268 	return err;
269 }
270 
271 // DeleteSnifferRule
272 /*! \brief Removes the sniffer rule for the given type from the rule list
273 	\param type The type of interest
274 	\return
275 	- \c B_OK: success (even if no rule existed for the given type)
276 	- other error code: failure
277 */
278 status_t
279 SnifferRules::DeleteSnifferRule(const char *type)
280 {
281 	status_t err = type ? B_OK : B_BAD_VALUE;
282 	if (!err && !fHaveDoneFullBuild)
283 		return B_OK;
284 
285 	// Find the rule in the list and remove it
286 	for (std::list<sniffer_rule>::iterator i = fRuleList.begin();
287 		   i != fRuleList.end();
288 		     i++)
289 	{
290 		if (i->type == type) {
291 			fRuleList.erase(i);
292 			break;
293 		}
294 	}
295 
296 	return err;
297 }
298 
299 // PrintToStream
300 //! Dumps the list of sniffer rules in sorted order to standard output
301 void
302 SnifferRules::PrintToStream() const
303 {
304 	printf("\n");
305 	printf("--------------\n");
306 	printf("Sniffer Rules:\n");
307 	printf("--------------\n");
308 
309 	if (fHaveDoneFullBuild) {
310 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
311 			   i != fRuleList.end();
312 			     i++)
313 		{
314 			printf("%s: '%s'\n", i->type.c_str(), i->rule_string.c_str());
315 		}
316 	} else {
317 		printf("You haven't built your rule list yet, chump. ;-)\n");
318 	}
319 }
320 
321 // BuildRuleList
322 /*! \brief Crawls through the database, parses each sniffer rule it finds, adds
323 	each parsed rule to the rule list, and sorts the list by priority, largest first.
324 
325 	Initial MaxBytesNeeded() info is compiled by this function as well.
326 */
327 status_t
328 SnifferRules::BuildRuleList()
329 {
330 	fRuleList.clear();
331 
332 	ssize_t maxBytesNeeded = 0;
333 	ssize_t bytesNeeded = 0;
334 	BDirectory root;
335 
336 	status_t err = root.SetTo(kDatabaseDir.c_str());
337 	if (!err) {
338 		root.Rewind();
339 		while (true) {
340 			BEntry entry;
341 			err = root.GetNextEntry(&entry);
342 			if (err) {
343 				// If we've come to the end of list, it's not an error
344 				if (err == B_ENTRY_NOT_FOUND)
345 					err = B_OK;
346 				break;
347 			} else {
348 				// Check that this entry is both a directory and a valid MIME string
349 				char supertype[B_PATH_NAME_LENGTH];
350 				if (entry.IsDirectory()
351 				      && entry.GetName(supertype) == B_OK
352 				         && BMimeType::IsValid(supertype))
353 				{
354 					// Make sure the supertype string is all lowercase
355 					BPrivate::Storage::to_lower(supertype);
356 
357 					// First, iterate through this supertype directory and process
358 					// all of its subtypes
359 					BDirectory dir;
360 					if (dir.SetTo(&entry) == B_OK) {
361 						dir.Rewind();
362 						while (true) {
363 							BEntry subEntry;
364 							err = dir.GetNextEntry(&subEntry);
365 							if (err) {
366 								// If we've come to the end of list, it's not an error
367 								if (err == B_ENTRY_NOT_FOUND)
368 									err = B_OK;
369 								break;
370 							} else {
371 								// Get the subtype's name
372 								char subtype[B_PATH_NAME_LENGTH];
373 								if (subEntry.GetName(subtype) == B_OK) {
374 									BPrivate::Storage::to_lower(subtype);
375 
376 									char fulltype[B_PATH_NAME_LENGTH];
377 									sprintf(fulltype, "%s/%s", supertype, subtype);
378 
379 									// Process the subtype
380 									ProcessType(fulltype, &bytesNeeded);
381 									if (bytesNeeded > maxBytesNeeded)
382 										maxBytesNeeded = bytesNeeded;
383 								}
384 							}
385 						}
386 					} else {
387 						DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
388 						          "Failed opening supertype directory '%s'\n",
389 						            supertype));
390 					}
391 
392 					// Second, process the supertype
393 					ProcessType(supertype, &bytesNeeded);
394 					if (bytesNeeded > maxBytesNeeded)
395 						maxBytesNeeded = bytesNeeded;
396 				}
397 			}
398 		}
399 	} else {
400 		DBG(OUT("Mime::SnifferRules::BuildRuleList(): "
401 		          "Failed opening mime database directory '%s'\n",
402 		            kDatabaseDir.c_str()));
403 	}
404 
405 	if (!err) {
406 		fRuleList.sort();
407 		fMaxBytesNeeded = maxBytesNeeded;
408 		fHaveDoneFullBuild = true;
409 //		PrintToStream();
410 	} else
411 		DBG(OUT("Mime::SnifferRules::BuildRuleList() failed, error code == 0x%lx\n", err));
412 	return err;
413 }
414 
415 // GuessMimeType
416 /*!	\brief Guesses a MIME type for the supplied chunk of data.
417 
418 	This is accomplished by searching through the currently installed
419 	list of sniffer rules for a rule that matches on the given data buffer.
420 	Rules are searched in order of priority (higher priority first). Rules
421 	of equal priority are searched in reverse-alphabetical order (that way
422 	"supertype/subtype" form rules are checked before "supertype-only" form
423 	rules if their priorities happen to be identical).
424 
425 	\param data The data to sniff
426 	\param type Pointer to a pre-allocated BString which is set to the
427 		   resulting MIME type.
428 	\return
429 	- \c B_OK: success
430 	- \c Mime::kMimeGuessFailure: no match found (\a type is left unmodified)
431 	- error code: failure
432 */
433 status_t
434 SnifferRules::GuessMimeType(BPositionIO *data, BString *type)
435 {
436 	status_t err = data && type ? B_OK : B_BAD_VALUE;
437 	if (!err && !fHaveDoneFullBuild)
438 		err = BuildRuleList();
439 	if (!err) {
440 		// Run through our rule list, which is sorted in order of
441 		// descreasing priority, and see if one of the rules sniffs
442 		// out a match
443 		for (std::list<sniffer_rule>::const_iterator i = fRuleList.begin();
444 			   i != fRuleList.end();
445 			     i++)
446 		{
447 			if (i->rule) {
448 				if (i->rule->Sniff(data)) {
449 					type->SetTo(i->type.c_str());
450 					return B_OK;
451 				}
452 			} else {
453 				DBG(OUT("WARNING: Mime::SnifferRules::GuessMimeType(BPositionIO*,BString*): "
454 					"NULL sniffer_rule::rule member found in rule list for type == '%s', "
455 					"rule_string == '%s'\n",
456 					i->type.c_str(), i->rule_string.c_str()));
457 			}
458 		}
459 
460 		// If we get here, we didn't find a damn thing
461 		err = kMimeGuessFailureError;
462 	}
463 	return err;
464 }
465 
466 // MaxBytesNeeded
467 /*! \brief Returns the maxmimum number of bytes needed in a data buffer for
468 	all the currently installed rules to be able to perform a complete sniff,
469 	or an error code if something goes wrong.
470 
471 	If the internal rule list has not yet been built (this includes parsing
472 	all the installed rules), it will be.
473 
474 	\return: If the return value is non-negative, it represents	the max number
475 	of bytes needed to do a complete sniff. Otherwise, the number returned is
476 	an error code.
477 */
478 ssize_t
479 SnifferRules::MaxBytesNeeded()
480 {
481 	ssize_t err = fHaveDoneFullBuild ? B_OK : BuildRuleList();
482 	if (!err)
483 		err = fMaxBytesNeeded;
484 	return err;
485 }
486 
487 // ProcessType
488 /*! \brief Handles a portion of the initial rule list construction for
489 	the given mime type.
490 
491 	\note To be called by BuildRuleList() *ONLY*. :-)
492 
493 	\param type The mime type of interest. The mime string is expected to be valid
494 	            and lowercase. Both "supertype" and "supertype/subtype" mime types
495 	            are allowed.
496 	\param bytesNeeded Returns the minimum number of bytes needed for this rule to
497 	                   perform a complete sniff. May not be NULL because I'm lazy
498 	                   and this function is for internal use only anyway.
499 	\return
500 	The return value is essentially ignored (as this function prints out the
501 	debug warning if a parse fails), but that being said:
502 	- \c B_OK: success
503 	- \c other error code: failure
504 */
505 status_t
506 SnifferRules::ProcessType(const char *type, ssize_t *bytesNeeded)
507 {
508 	status_t err = type && bytesNeeded ? B_OK : B_BAD_VALUE;
509 	if (!err)
510 		*bytesNeeded = 0;
511 
512 	BString str;
513 	BString errorMsg;
514 	sniffer_rule rule(new Sniffer::Rule());
515 
516 	// Check the mem alloc
517 	if (!err)
518 		err = rule.rule ? B_OK : B_NO_MEMORY;
519 	// Read the attr
520 	if (!err)
521 		err = read_mime_attr_string(type, kSnifferRuleAttr, &str);
522 	// Parse the rule
523 	if (!err) {
524 		err = Sniffer::parse(str.String(), rule.rule, &errorMsg);
525 		if (err)
526 			DBG(OUT("WARNING: SnifferRules::ProcessType(): Parse failure:\n%s\n", errorMsg.String()));
527 	}
528 	if (!err) {
529 		// Note the bytes needed
530 		*bytesNeeded = rule.rule->BytesNeeded();
531 
532 		// Add the rule to the list
533 		rule.type = type;
534 		rule.rule_string = str.String();
535 		fRuleList.push_back(rule);
536 	}
537 	return err;
538 }
539 
540 } // namespace Mime
541 } // namespace Storage
542 } // namespace BPrivate
543 
544