xref: /haiku/src/kits/storage/sniffer/Pattern.cpp (revision 268f99dd7dc4bd7474a8bd2742d3f1ec1de6752a)
174cd43ecSTyler Dauwalder //----------------------------------------------------------------------
2*2ca13760SColdfirex //  This software is part of the Haiku distribution and is covered
3b6f76ebeSAugustin Cavalier //  by the MIT License.
474cd43ecSTyler Dauwalder //---------------------------------------------------------------------
574cd43ecSTyler Dauwalder /*!
674cd43ecSTyler Dauwalder 	\file Pattern.cpp
774cd43ecSTyler Dauwalder 	MIME sniffer pattern implementation
874cd43ecSTyler Dauwalder */
974cd43ecSTyler Dauwalder 
1074cd43ecSTyler Dauwalder #include <sniffer/Err.h>
1174cd43ecSTyler Dauwalder #include <sniffer/Pattern.h>
1274cd43ecSTyler Dauwalder #include <DataIO.h>
1374cd43ecSTyler Dauwalder #include <stdio.h>	// for SEEK_* defines
14e9e56038STyler Dauwalder #include <new>
1574cd43ecSTyler Dauwalder 
16b4080297SIngo Weinhold #include <AutoDeleter.h>
17b4080297SIngo Weinhold 
1809d84e61STyler Dauwalder using namespace BPrivate::Storage::Sniffer;
1974cd43ecSTyler Dauwalder 
Pattern(const std::string & string,const std::string & mask)20fe70cd16STyler Dauwalder Pattern::Pattern(const std::string &string, const std::string &mask)
2174cd43ecSTyler Dauwalder 	: fCStatus(B_NO_INIT)
2274cd43ecSTyler Dauwalder 	, fErrorMessage(NULL)
2374cd43ecSTyler Dauwalder {
2474cd43ecSTyler Dauwalder 	SetTo(string, mask);
2574cd43ecSTyler Dauwalder }
2674cd43ecSTyler Dauwalder 
Pattern(const std::string & string)27fe70cd16STyler Dauwalder Pattern::Pattern(const std::string &string)
28fe70cd16STyler Dauwalder 	: fCStatus(B_NO_INIT)
29fe70cd16STyler Dauwalder 	, fErrorMessage(NULL)
30fe70cd16STyler Dauwalder {
31fe70cd16STyler Dauwalder 	// Build a mask with all bits turned on of the
32fe70cd16STyler Dauwalder 	// appropriate length
33fe70cd16STyler Dauwalder 	std::string mask = "";
340a87d01cSTyler Dauwalder 	for (uint i = 0; i < string.length(); i++)
35fe70cd16STyler Dauwalder 		mask += (char)0xFF;
36fe70cd16STyler Dauwalder 	SetTo(string, mask);
37fe70cd16STyler Dauwalder }
38fe70cd16STyler Dauwalder 
~Pattern()3974cd43ecSTyler Dauwalder Pattern::~Pattern() {
4074cd43ecSTyler Dauwalder 	delete fErrorMessage;
4174cd43ecSTyler Dauwalder }
4274cd43ecSTyler Dauwalder 
4374cd43ecSTyler Dauwalder status_t
InitCheck() const4474cd43ecSTyler Dauwalder Pattern::InitCheck() const {
4574cd43ecSTyler Dauwalder 	return fCStatus;
4674cd43ecSTyler Dauwalder }
4774cd43ecSTyler Dauwalder 
4874cd43ecSTyler Dauwalder Err*
GetErr() const4974cd43ecSTyler Dauwalder Pattern::GetErr() const {
5074cd43ecSTyler Dauwalder 	if (fCStatus == B_OK)
5174cd43ecSTyler Dauwalder 		return NULL;
5274cd43ecSTyler Dauwalder 	else
5317e6de7aSshadow303 		return new(std::nothrow) Err(*fErrorMessage);
5474cd43ecSTyler Dauwalder }
5574cd43ecSTyler Dauwalder 
dumpStr(const std::string & string,const char * label=NULL)56fe70cd16STyler Dauwalder void dumpStr(const std::string &string, const char *label = NULL) {
57fe70cd16STyler Dauwalder 	if (label)
58fe70cd16STyler Dauwalder 		printf("%s: ", label);
590a87d01cSTyler Dauwalder 	for (uint i = 0; i < string.length(); i++)
60fe70cd16STyler Dauwalder 		printf("%x ", string[i]);
61fe70cd16STyler Dauwalder 	printf("\n");
62fe70cd16STyler Dauwalder }
63fe70cd16STyler Dauwalder 
6474cd43ecSTyler Dauwalder status_t
SetTo(const std::string & string,const std::string & mask)65fe70cd16STyler Dauwalder Pattern::SetTo(const std::string &string, const std::string &mask) {
6674cd43ecSTyler Dauwalder 	fString = string;
6774cd43ecSTyler Dauwalder 	if (fString.length() == 0) {
6874cd43ecSTyler Dauwalder 		SetStatus(B_BAD_VALUE, "Sniffer pattern error: illegal empty pattern");
6974cd43ecSTyler Dauwalder 	} else {
7074cd43ecSTyler Dauwalder 		fMask = mask;
71fe70cd16STyler Dauwalder //		dumpStr(string, "data");
72fe70cd16STyler Dauwalder //		dumpStr(mask, "mask");
7374cd43ecSTyler Dauwalder 		if (fString.length() != fMask.length()) {
7474cd43ecSTyler Dauwalder 			SetStatus(B_BAD_VALUE, "Sniffer pattern error: pattern and mask lengths do not match");
7574cd43ecSTyler Dauwalder 		} else {
7674cd43ecSTyler Dauwalder 			SetStatus(B_OK);
7774cd43ecSTyler Dauwalder 		}
7874cd43ecSTyler Dauwalder 	}
79201e236eSIngo Weinhold 	return fCStatus;
8074cd43ecSTyler Dauwalder }
8174cd43ecSTyler Dauwalder 
8210db8711STyler Dauwalder /*! \brief Looks for a pattern match in the given data stream, starting from
8310db8711STyler Dauwalder 	each offset withing the given range. Returns true is a match is found,
8410db8711STyler Dauwalder 	false if not.
8510db8711STyler Dauwalder */
8674cd43ecSTyler Dauwalder bool
Sniff(Range range,BPositionIO * data,bool caseInsensitive) const8793d145bbSTyler Dauwalder Pattern::Sniff(Range range, BPositionIO *data, bool caseInsensitive) const {
8874cd43ecSTyler Dauwalder 	int32 start = range.Start();
8974cd43ecSTyler Dauwalder 	int32 end = range.End();
9074cd43ecSTyler Dauwalder 	off_t size = data->Seek(0, SEEK_END);
9174cd43ecSTyler Dauwalder 	if (end >= size)
9293d145bbSTyler Dauwalder 		end = size-1;	// Don't bother searching beyond the end of the stream
9374cd43ecSTyler Dauwalder 	for (int i = start; i <= end; i++) {
9493d145bbSTyler Dauwalder 		if (Sniff(i, size, data, caseInsensitive))
9574cd43ecSTyler Dauwalder 			return true;
9674cd43ecSTyler Dauwalder 	}
97fe70cd16STyler Dauwalder 	return false;
9874cd43ecSTyler Dauwalder }
9974cd43ecSTyler Dauwalder 
1002150894bSTyler Dauwalder // BytesNeeded
1012150894bSTyler Dauwalder /*! \brief Returns the number of bytes needed to perform a complete sniff, or an error
1022150894bSTyler Dauwalder 	code if something goes wrong.
1032150894bSTyler Dauwalder */
1042150894bSTyler Dauwalder ssize_t
BytesNeeded() const1052150894bSTyler Dauwalder Pattern::BytesNeeded() const
1062150894bSTyler Dauwalder {
1072150894bSTyler Dauwalder 	ssize_t result = InitCheck();
1082150894bSTyler Dauwalder 	if (result == B_OK)
1092150894bSTyler Dauwalder 		result = fString.length();
1102150894bSTyler Dauwalder 	return result;
1112150894bSTyler Dauwalder }
1122150894bSTyler Dauwalder 
1132150894bSTyler Dauwalder //#define OPTIMIZATION_IS_FOR_CHUMPS
1142150894bSTyler Dauwalder #if OPTIMIZATION_IS_FOR_CHUMPS
11574cd43ecSTyler Dauwalder bool
Sniff(off_t start,off_t size,BPositionIO * data,bool caseInsensitive) const11693d145bbSTyler Dauwalder Pattern::Sniff(off_t start, off_t size, BPositionIO *data, bool caseInsensitive) const {
11774cd43ecSTyler Dauwalder 	off_t len = fString.length();
11874cd43ecSTyler Dauwalder 	char *buffer = new(nothrow) char[len+1];
11974cd43ecSTyler Dauwalder 	if (buffer) {
120b4080297SIngo Weinhold 		ArrayDeleter<char> _(buffer);
12174cd43ecSTyler Dauwalder 		ssize_t bytesRead = data->ReadAt(start, buffer, len);
12293d145bbSTyler Dauwalder 		// \todo If there are fewer bytes left in the data stream
12374cd43ecSTyler Dauwalder 		// from the given position than the length of our data
12474cd43ecSTyler Dauwalder 		// string, should we just return false (which is what we're
12574cd43ecSTyler Dauwalder 		// doing now), or should we compare as many bytes as we
12674cd43ecSTyler Dauwalder 		// can and return true if those match?
12774cd43ecSTyler Dauwalder 		if (bytesRead < len)
12874cd43ecSTyler Dauwalder 			return false;
12974cd43ecSTyler Dauwalder 		else {
13074cd43ecSTyler Dauwalder 			bool result = true;
13193d145bbSTyler Dauwalder 			if (caseInsensitive) {
13293d145bbSTyler Dauwalder 				for (int i = 0; i < len; i++) {
13393d145bbSTyler Dauwalder 					char secondChar;
13493d145bbSTyler Dauwalder 					if ('A' <= fString[i] && fString[i] <= 'Z')
13593d145bbSTyler Dauwalder 						secondChar = 'a' + (fString[i] - 'A');	// Also check lowercase
13693d145bbSTyler Dauwalder 					else if ('a' <= fString[i] && fString[i] <= 'z')
13793d145bbSTyler Dauwalder 						secondChar = 'A' + (fString[i] - 'a');	// Also check uppercase
13893d145bbSTyler Dauwalder 					else
13993d145bbSTyler Dauwalder 						secondChar = fString[i]; // Check the same char twice as punishment for doing a case insensitive search ;-)
14093d145bbSTyler Dauwalder 					if (((fString[i] & fMask[i]) != (buffer[i] & fMask[i]))
14193d145bbSTyler Dauwalder 					     && ((secondChar & fMask[i]) != (buffer[i] & fMask[i])))
14293d145bbSTyler Dauwalder 					{
14393d145bbSTyler Dauwalder 						result = false;
14493d145bbSTyler Dauwalder 						break;
14593d145bbSTyler Dauwalder 					}
14693d145bbSTyler Dauwalder 				}
14793d145bbSTyler Dauwalder 			} else {
14874cd43ecSTyler Dauwalder 				for (int i = 0; i < len; i++) {
14974cd43ecSTyler Dauwalder 					if ((fString[i] & fMask[i]) != (buffer[i] & fMask[i])) {
15074cd43ecSTyler Dauwalder 						result = false;
15174cd43ecSTyler Dauwalder 						break;
15274cd43ecSTyler Dauwalder 					}
15374cd43ecSTyler Dauwalder 				}
15493d145bbSTyler Dauwalder 			}
15574cd43ecSTyler Dauwalder 			return result;
15674cd43ecSTyler Dauwalder 		}
15774cd43ecSTyler Dauwalder 	} else
15874cd43ecSTyler Dauwalder 		return false;
15974cd43ecSTyler Dauwalder }
1602150894bSTyler Dauwalder #else
1612150894bSTyler Dauwalder bool
Sniff(off_t start,off_t size,BPositionIO * data,bool caseInsensitive) const1622150894bSTyler Dauwalder Pattern::Sniff(off_t start, off_t size, BPositionIO *data, bool caseInsensitive) const {
1632150894bSTyler Dauwalder 	off_t len = fString.length();
16417e6de7aSshadow303 	char *buffer = new(std::nothrow) char[len+1];
1652150894bSTyler Dauwalder 	if (buffer) {
166b4080297SIngo Weinhold 		ArrayDeleter<char> _(buffer);
1672150894bSTyler Dauwalder 		ssize_t bytesRead = data->ReadAt(start, buffer, len);
1682150894bSTyler Dauwalder 		// \todo If there are fewer bytes left in the data stream
1692150894bSTyler Dauwalder 		// from the given position than the length of our data
1702150894bSTyler Dauwalder 		// string, should we just return false (which is what we're
1712150894bSTyler Dauwalder 		// doing now), or should we compare as many bytes as we
1722150894bSTyler Dauwalder 		// can and return true if those match?
1732150894bSTyler Dauwalder 		if (bytesRead < len)
1742150894bSTyler Dauwalder 			return false;
1752150894bSTyler Dauwalder 		else {
1762150894bSTyler Dauwalder 			bool result = true;
1772150894bSTyler Dauwalder 			if (caseInsensitive) {
1782150894bSTyler Dauwalder 				for (int i = 0; i < len; i++) {
1792150894bSTyler Dauwalder 					char secondChar;
1802150894bSTyler Dauwalder 					if ('A' <= fString[i] && fString[i] <= 'Z')
1812150894bSTyler Dauwalder 						secondChar = 'a' + (fString[i] - 'A');	// Also check lowercase
1822150894bSTyler Dauwalder 					else if ('a' <= fString[i] && fString[i] <= 'z')
1832150894bSTyler Dauwalder 						secondChar = 'A' + (fString[i] - 'a');	// Also check uppercase
1842150894bSTyler Dauwalder 					else
1852150894bSTyler Dauwalder 						secondChar = fString[i]; // Check the same char twice as punishment for doing a case insensitive search ;-)
1862150894bSTyler Dauwalder 					if (((fString[i] & fMask[i]) != (buffer[i] & fMask[i]))
1872150894bSTyler Dauwalder 					     && ((secondChar & fMask[i]) != (buffer[i] & fMask[i])))
1882150894bSTyler Dauwalder 					{
1892150894bSTyler Dauwalder 						result = false;
1902150894bSTyler Dauwalder 						break;
1912150894bSTyler Dauwalder 					}
1922150894bSTyler Dauwalder 				}
1932150894bSTyler Dauwalder 			} else {
1942150894bSTyler Dauwalder 				for (int i = 0; i < len; i++) {
1952150894bSTyler Dauwalder 					if ((fString[i] & fMask[i]) != (buffer[i] & fMask[i])) {
1962150894bSTyler Dauwalder 						result = false;
1972150894bSTyler Dauwalder 						break;
1982150894bSTyler Dauwalder 					}
1992150894bSTyler Dauwalder 				}
2002150894bSTyler Dauwalder 			}
2012150894bSTyler Dauwalder 			return result;
2022150894bSTyler Dauwalder 		}
2032150894bSTyler Dauwalder 	} else
2042150894bSTyler Dauwalder 		return false;
2052150894bSTyler Dauwalder }
2062150894bSTyler Dauwalder #endif
20774cd43ecSTyler Dauwalder 
20874cd43ecSTyler Dauwalder void
SetStatus(status_t status,const char * msg)20974cd43ecSTyler Dauwalder Pattern::SetStatus(status_t status, const char *msg) {
21074cd43ecSTyler Dauwalder 	fCStatus = status;
21174cd43ecSTyler Dauwalder 	if (status == B_OK)
21274cd43ecSTyler Dauwalder 		SetErrorMessage(NULL);
21374cd43ecSTyler Dauwalder 	else {
21474cd43ecSTyler Dauwalder 		if (msg)
21574cd43ecSTyler Dauwalder 			SetErrorMessage(msg);
21674cd43ecSTyler Dauwalder 		else {
21774cd43ecSTyler Dauwalder 			SetErrorMessage("Sniffer parser error: Pattern::SetStatus() -- NULL msg with non-B_OK status.\n"
21874cd43ecSTyler Dauwalder 				"(This is officially the most helpful error message you will ever receive ;-)");
21974cd43ecSTyler Dauwalder 		}
22074cd43ecSTyler Dauwalder 	}
22174cd43ecSTyler Dauwalder }
22274cd43ecSTyler Dauwalder 
22374cd43ecSTyler Dauwalder void
SetErrorMessage(const char * msg)22474cd43ecSTyler Dauwalder Pattern::SetErrorMessage(const char *msg) {
22574cd43ecSTyler Dauwalder 	delete fErrorMessage;
22617e6de7aSshadow303 	fErrorMessage = (msg) ? (new(std::nothrow) Err(msg, -1)) : (NULL);
22774cd43ecSTyler Dauwalder }
22809d84e61STyler Dauwalder 
22909d84e61STyler Dauwalder 
23009d84e61STyler Dauwalder 
231