xref: /haiku/src/kits/tracker/RegExp.h (revision 8bc62a0a1247f4697b3be07d51a2670ad23af21b)
102be5353SAxel Dörfler /*
202be5353SAxel Dörfler Open Tracker License
302be5353SAxel Dörfler 
402be5353SAxel Dörfler Terms and Conditions
502be5353SAxel Dörfler 
602be5353SAxel Dörfler Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
702be5353SAxel Dörfler 
802be5353SAxel Dörfler Permission is hereby granted, free of charge, to any person obtaining a copy of
902be5353SAxel Dörfler this software and associated documentation files (the "Software"), to deal in
1002be5353SAxel Dörfler the Software without restriction, including without limitation the rights to
1102be5353SAxel Dörfler use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
1202be5353SAxel Dörfler of the Software, and to permit persons to whom the Software is furnished to do
1302be5353SAxel Dörfler so, subject to the following conditions:
1402be5353SAxel Dörfler 
1502be5353SAxel Dörfler The above copyright notice and this permission notice applies to all licensees
1602be5353SAxel Dörfler and shall be included in all copies or substantial portions of the Software.
1702be5353SAxel Dörfler 
1802be5353SAxel Dörfler THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1902be5353SAxel Dörfler IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
2002be5353SAxel Dörfler FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2102be5353SAxel Dörfler BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
2202be5353SAxel Dörfler AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
2302be5353SAxel Dörfler WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2402be5353SAxel Dörfler 
2502be5353SAxel Dörfler Except as contained in this notice, the name of Be Incorporated shall not be
2602be5353SAxel Dörfler used in advertising or otherwise to promote the sale, use or other dealings in
2702be5353SAxel Dörfler this Software without prior written authorization from Be Incorporated.
2802be5353SAxel Dörfler 
2902be5353SAxel Dörfler Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
3002be5353SAxel Dörfler of Be Incorporated in the United States and other countries. Other brand product
3102be5353SAxel Dörfler names are registered trademarks or trademarks of their respective holders.
3202be5353SAxel Dörfler All rights reserved.
3302be5353SAxel Dörfler */
34b05aa8b5SJohn Scipione #ifndef _REG_EXP_H
35b05aa8b5SJohn Scipione #define _REG_EXP_H
3602be5353SAxel Dörfler 
3702be5353SAxel Dörfler 
3802be5353SAxel Dörfler // This code is based on regexp.c, v.1.3 by Henry Spencer:
3902be5353SAxel Dörfler 
4002be5353SAxel Dörfler // @(#)regexp.c	1.3 of 18 April 87
4102be5353SAxel Dörfler //
4202be5353SAxel Dörfler //	Copyright (c) 1986 by University of Toronto.
4302be5353SAxel Dörfler //	Written by Henry Spencer.  Not derived from licensed software.
4402be5353SAxel Dörfler //
4502be5353SAxel Dörfler //	Permission is granted to anyone to use this software for any
4602be5353SAxel Dörfler //	purpose on any computer system, and to redistribute it freely,
4702be5353SAxel Dörfler //	subject to the following restrictions:
4802be5353SAxel Dörfler //
4902be5353SAxel Dörfler //	1. The author is not responsible for the consequences of use of
5002be5353SAxel Dörfler //		this software, no matter how awful, even if they arise
5102be5353SAxel Dörfler //		from defects in it.
5202be5353SAxel Dörfler //
5302be5353SAxel Dörfler //	2. The origin of this software must not be misrepresented, either
5402be5353SAxel Dörfler //		by explicit claim or by omission.
5502be5353SAxel Dörfler //
5602be5353SAxel Dörfler //	3. Altered versions must be plainly marked as such, and must not
5702be5353SAxel Dörfler //		be misrepresented as being the original software.
5802be5353SAxel Dörfler //
5902be5353SAxel Dörfler // Beware that some of this code is subtly aware of the way operator
6002be5353SAxel Dörfler // precedence is structured in regular expressions.  Serious changes in
6102be5353SAxel Dörfler // regular-expression syntax might require a total rethink.
6202be5353SAxel Dörfler //
6302be5353SAxel Dörfler 
6402be5353SAxel Dörfler // ALTERED VERSION: Adapted to ANSI C and C++ for the OpenTracker
6502be5353SAxel Dörfler // project (www.opentracker.org), Jul 11, 2000.
6602be5353SAxel Dörfler 
6702be5353SAxel Dörfler 
6802be5353SAxel Dörfler #include <String.h>
6902be5353SAxel Dörfler 
70b05aa8b5SJohn Scipione 
7102be5353SAxel Dörfler namespace BPrivate {
7202be5353SAxel Dörfler 
73b05aa8b5SJohn Scipione 
7402be5353SAxel Dörfler enum {
7502be5353SAxel Dörfler 	REGEXP_UNMATCHED_PARENTHESIS = B_ERRORS_END,
7602be5353SAxel Dörfler 	REGEXP_TOO_BIG,
7702be5353SAxel Dörfler 	REGEXP_TOO_MANY_PARENTHESIS,
7802be5353SAxel Dörfler 	REGEXP_JUNK_ON_END,
7902be5353SAxel Dörfler 	REGEXP_STAR_PLUS_OPERAND_EMPTY,
8002be5353SAxel Dörfler 	REGEXP_NESTED_STAR_QUESTION_PLUS,
8102be5353SAxel Dörfler 	REGEXP_INVALID_BRACKET_RANGE,
8202be5353SAxel Dörfler 	REGEXP_UNMATCHED_BRACKET,
8302be5353SAxel Dörfler 	REGEXP_INTERNAL_ERROR,
8402be5353SAxel Dörfler 	REGEXP_QUESTION_PLUS_STAR_FOLLOWS_NOTHING,
8502be5353SAxel Dörfler 	REGEXP_TRAILING_BACKSLASH,
8602be5353SAxel Dörfler 	REGEXP_CORRUPTED_PROGRAM,
8702be5353SAxel Dörfler 	REGEXP_MEMORY_CORRUPTION,
8802be5353SAxel Dörfler 	REGEXP_CORRUPTED_POINTERS,
8902be5353SAxel Dörfler 	REGEXP_CORRUPTED_OPCODE
9002be5353SAxel Dörfler };
9102be5353SAxel Dörfler 
92*96d9dde0SJohn Scipione 
9302be5353SAxel Dörfler const int32 kSubExpressionMax = 10;
9402be5353SAxel Dörfler 
95*96d9dde0SJohn Scipione 
9602be5353SAxel Dörfler struct regexp {
9702be5353SAxel Dörfler 	const char* startp[kSubExpressionMax];
9802be5353SAxel Dörfler 	const char* endp[kSubExpressionMax];
99b05aa8b5SJohn Scipione 	char regstart;		// Internal use only. See RegExp.cpp for details.
100b05aa8b5SJohn Scipione 	char reganch;		// Internal use only.
101b05aa8b5SJohn Scipione 	const char* regmust;// Internal use only.
102b05aa8b5SJohn Scipione 	int regmlen;		// Internal use only.
103b05aa8b5SJohn Scipione 	char program[1];	// Unwarranted chumminess with compiler.
10402be5353SAxel Dörfler };
10502be5353SAxel Dörfler 
10602be5353SAxel Dörfler 
107b05aa8b5SJohn Scipione class RegExp {
10802be5353SAxel Dörfler public:
10902be5353SAxel Dörfler 	RegExp();
11002be5353SAxel Dörfler 	RegExp(const char*);
11102be5353SAxel Dörfler 	RegExp(const BString&);
11202be5353SAxel Dörfler 	~RegExp();
11302be5353SAxel Dörfler 
11402be5353SAxel Dörfler 	status_t InitCheck() const;
11502be5353SAxel Dörfler 
11602be5353SAxel Dörfler 	status_t SetTo(const char*);
11702be5353SAxel Dörfler 	status_t SetTo(const BString&);
11802be5353SAxel Dörfler 
11902be5353SAxel Dörfler 	bool Matches(const char* string) const;
12002be5353SAxel Dörfler 	bool Matches(const BString&) const;
12102be5353SAxel Dörfler 
12202be5353SAxel Dörfler 	int32 RunMatcher(regexp*, const char*) const;
12302be5353SAxel Dörfler 	regexp* Compile(const char*);
12402be5353SAxel Dörfler 	regexp* Expression() const;
12502be5353SAxel Dörfler 	const char* ErrorString() const;
12602be5353SAxel Dörfler 
12702be5353SAxel Dörfler #ifdef DEBUG
12802be5353SAxel Dörfler 	void Dump();
12902be5353SAxel Dörfler #endif
13002be5353SAxel Dörfler 
13102be5353SAxel Dörfler private:
13202be5353SAxel Dörfler 	void SetError(status_t error) const;
13302be5353SAxel Dörfler 
13402be5353SAxel Dörfler 	// Working functions for Compile():
13502be5353SAxel Dörfler 	char* Reg(int32, int32*);
13602be5353SAxel Dörfler 	char* Branch(int32*);
13702be5353SAxel Dörfler 	char* Piece(int32*);
13802be5353SAxel Dörfler 	char* Atom(int32*);
13902be5353SAxel Dörfler 	char* Node(char);
14002be5353SAxel Dörfler 	char* Next(char*);
14102be5353SAxel Dörfler 	const char* Next(const char*) const;
14202be5353SAxel Dörfler 	void Char(char);
14302be5353SAxel Dörfler 	void Insert(char, char*);
14402be5353SAxel Dörfler 	void Tail(char*, char*);
14502be5353SAxel Dörfler 	void OpTail(char*, char*);
14602be5353SAxel Dörfler 
14702be5353SAxel Dörfler 	// Working functions for RunMatcher():
14802be5353SAxel Dörfler 	int32 Try(regexp*, const char*) const;
14902be5353SAxel Dörfler 	int32 Match(const char*) const;
15002be5353SAxel Dörfler 	int32 Repeat(const char*) const;
15102be5353SAxel Dörfler 
15202be5353SAxel Dörfler 	// Utility functions:
15302be5353SAxel Dörfler #ifdef DEBUG
15402be5353SAxel Dörfler 	char* Prop(const char*) const;
15502be5353SAxel Dörfler 	void RegExpError(const char*) const;
15602be5353SAxel Dörfler #endif
15702be5353SAxel Dörfler 	inline int32 UCharAt(const char* p) const;
15802be5353SAxel Dörfler 	inline char* Operand(char* p) const;
15902be5353SAxel Dörfler 	inline const char* Operand(const char* p) const;
16002be5353SAxel Dörfler 	inline bool	IsMult(char c) const;
16102be5353SAxel Dörfler 
16202be5353SAxel Dörfler // --------- Variables -------------
16302be5353SAxel Dörfler 
16402be5353SAxel Dörfler 	mutable status_t fError;
16502be5353SAxel Dörfler 	regexp* fRegExp;
16602be5353SAxel Dörfler 
16702be5353SAxel Dörfler 	// Work variables for Compile().
16802be5353SAxel Dörfler 	const char* fInputScanPointer;
16902be5353SAxel Dörfler 	int32 fParenthesisCount;
17002be5353SAxel Dörfler 	char fDummy;
171b05aa8b5SJohn Scipione 	char* fCodeEmitPointer;
172b05aa8b5SJohn Scipione 		// &fDummy = don't.
17302be5353SAxel Dörfler 	long fCodeSize;
17402be5353SAxel Dörfler 
17502be5353SAxel Dörfler 	// Work variables for RunMatcher().
17602be5353SAxel Dörfler 	mutable const char* fStringInputPointer;
177b05aa8b5SJohn Scipione 	mutable const char* fRegBol;
178b05aa8b5SJohn Scipione 		// Beginning of input, for ^ check.
17902be5353SAxel Dörfler 	mutable const char** fStartPArrayPointer;
18002be5353SAxel Dörfler 	mutable const char** fEndPArrayPointer;
18102be5353SAxel Dörfler };
18202be5353SAxel Dörfler 
183b05aa8b5SJohn Scipione 
18402be5353SAxel Dörfler } // namespace BPrivate
18502be5353SAxel Dörfler 
18602be5353SAxel Dörfler using namespace BPrivate;
18702be5353SAxel Dörfler 
188*96d9dde0SJohn Scipione 
189b05aa8b5SJohn Scipione #endif	// _REG_EXP_H
190