xref: /haiku/src/kits/tracker/RegExp.h (revision 02be5353fd41ea40007a382e13f007eaf6b5a3a0)
1*02be5353SAxel Dörfler /*
2*02be5353SAxel Dörfler Open Tracker License
3*02be5353SAxel Dörfler 
4*02be5353SAxel Dörfler Terms and Conditions
5*02be5353SAxel Dörfler 
6*02be5353SAxel Dörfler Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7*02be5353SAxel Dörfler 
8*02be5353SAxel Dörfler Permission is hereby granted, free of charge, to any person obtaining a copy of
9*02be5353SAxel Dörfler this software and associated documentation files (the "Software"), to deal in
10*02be5353SAxel Dörfler the Software without restriction, including without limitation the rights to
11*02be5353SAxel Dörfler use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12*02be5353SAxel Dörfler of the Software, and to permit persons to whom the Software is furnished to do
13*02be5353SAxel Dörfler so, subject to the following conditions:
14*02be5353SAxel Dörfler 
15*02be5353SAxel Dörfler The above copyright notice and this permission notice applies to all licensees
16*02be5353SAxel Dörfler and shall be included in all copies or substantial portions of the Software.
17*02be5353SAxel Dörfler 
18*02be5353SAxel Dörfler THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19*02be5353SAxel Dörfler IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20*02be5353SAxel Dörfler FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21*02be5353SAxel Dörfler BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22*02be5353SAxel Dörfler AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23*02be5353SAxel Dörfler WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*02be5353SAxel Dörfler 
25*02be5353SAxel Dörfler Except as contained in this notice, the name of Be Incorporated shall not be
26*02be5353SAxel Dörfler used in advertising or otherwise to promote the sale, use or other dealings in
27*02be5353SAxel Dörfler this Software without prior written authorization from Be Incorporated.
28*02be5353SAxel Dörfler 
29*02be5353SAxel Dörfler Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30*02be5353SAxel Dörfler of Be Incorporated in the United States and other countries. Other brand product
31*02be5353SAxel Dörfler names are registered trademarks or trademarks of their respective holders.
32*02be5353SAxel Dörfler All rights reserved.
33*02be5353SAxel Dörfler */
34*02be5353SAxel Dörfler 
35*02be5353SAxel Dörfler 
36*02be5353SAxel Dörfler // This code is based on regexp.c, v.1.3 by Henry Spencer:
37*02be5353SAxel Dörfler 
38*02be5353SAxel Dörfler // @(#)regexp.c	1.3 of 18 April 87
39*02be5353SAxel Dörfler //
40*02be5353SAxel Dörfler //	Copyright (c) 1986 by University of Toronto.
41*02be5353SAxel Dörfler //	Written by Henry Spencer.  Not derived from licensed software.
42*02be5353SAxel Dörfler //
43*02be5353SAxel Dörfler //	Permission is granted to anyone to use this software for any
44*02be5353SAxel Dörfler //	purpose on any computer system, and to redistribute it freely,
45*02be5353SAxel Dörfler //	subject to the following restrictions:
46*02be5353SAxel Dörfler //
47*02be5353SAxel Dörfler //	1. The author is not responsible for the consequences of use of
48*02be5353SAxel Dörfler //		this software, no matter how awful, even if they arise
49*02be5353SAxel Dörfler //		from defects in it.
50*02be5353SAxel Dörfler //
51*02be5353SAxel Dörfler //	2. The origin of this software must not be misrepresented, either
52*02be5353SAxel Dörfler //		by explicit claim or by omission.
53*02be5353SAxel Dörfler //
54*02be5353SAxel Dörfler //	3. Altered versions must be plainly marked as such, and must not
55*02be5353SAxel Dörfler //		be misrepresented as being the original software.
56*02be5353SAxel Dörfler //
57*02be5353SAxel Dörfler // Beware that some of this code is subtly aware of the way operator
58*02be5353SAxel Dörfler // precedence is structured in regular expressions.  Serious changes in
59*02be5353SAxel Dörfler // regular-expression syntax might require a total rethink.
60*02be5353SAxel Dörfler //
61*02be5353SAxel Dörfler 
62*02be5353SAxel Dörfler // ALTERED VERSION: Adapted to ANSI C and C++ for the OpenTracker
63*02be5353SAxel Dörfler // project (www.opentracker.org), Jul 11, 2000.
64*02be5353SAxel Dörfler 
65*02be5353SAxel Dörfler #ifndef _REG_EXP_H
66*02be5353SAxel Dörfler #define _REG_EXP_H
67*02be5353SAxel Dörfler 
68*02be5353SAxel Dörfler #include <String.h>
69*02be5353SAxel Dörfler 
70*02be5353SAxel Dörfler namespace BPrivate {
71*02be5353SAxel Dörfler 
72*02be5353SAxel Dörfler enum {
73*02be5353SAxel Dörfler 	REGEXP_UNMATCHED_PARENTHESIS = B_ERRORS_END,
74*02be5353SAxel Dörfler 	REGEXP_TOO_BIG,
75*02be5353SAxel Dörfler 	REGEXP_TOO_MANY_PARENTHESIS,
76*02be5353SAxel Dörfler 	REGEXP_JUNK_ON_END,
77*02be5353SAxel Dörfler 	REGEXP_STAR_PLUS_OPERAND_EMPTY,
78*02be5353SAxel Dörfler 	REGEXP_NESTED_STAR_QUESTION_PLUS,
79*02be5353SAxel Dörfler 	REGEXP_INVALID_BRACKET_RANGE,
80*02be5353SAxel Dörfler 	REGEXP_UNMATCHED_BRACKET,
81*02be5353SAxel Dörfler 	REGEXP_INTERNAL_ERROR,
82*02be5353SAxel Dörfler 	REGEXP_QUESTION_PLUS_STAR_FOLLOWS_NOTHING,
83*02be5353SAxel Dörfler 	REGEXP_TRAILING_BACKSLASH,
84*02be5353SAxel Dörfler 	REGEXP_CORRUPTED_PROGRAM,
85*02be5353SAxel Dörfler 	REGEXP_MEMORY_CORRUPTION,
86*02be5353SAxel Dörfler 	REGEXP_CORRUPTED_POINTERS,
87*02be5353SAxel Dörfler 	REGEXP_CORRUPTED_OPCODE
88*02be5353SAxel Dörfler };
89*02be5353SAxel Dörfler 
90*02be5353SAxel Dörfler const int32 kSubExpressionMax = 10;
91*02be5353SAxel Dörfler 
92*02be5353SAxel Dörfler struct regexp {
93*02be5353SAxel Dörfler 	const char *startp[kSubExpressionMax];
94*02be5353SAxel Dörfler 	const char *endp[kSubExpressionMax];
95*02be5353SAxel Dörfler 	char regstart;		/* Internal use only. See RegExp.cpp for details. */
96*02be5353SAxel Dörfler 	char reganch;		/* Internal use only. */
97*02be5353SAxel Dörfler 	const char *regmust;/* Internal use only. */
98*02be5353SAxel Dörfler 	int regmlen;		/* Internal use only. */
99*02be5353SAxel Dörfler 	char program[1];	/* Unwarranted chumminess with compiler. */
100*02be5353SAxel Dörfler };
101*02be5353SAxel Dörfler 
102*02be5353SAxel Dörfler class RegExp {
103*02be5353SAxel Dörfler 
104*02be5353SAxel Dörfler public:
105*02be5353SAxel Dörfler 	RegExp();
106*02be5353SAxel Dörfler 	RegExp(const char *);
107*02be5353SAxel Dörfler 	RegExp(const BString &);
108*02be5353SAxel Dörfler 	~RegExp();
109*02be5353SAxel Dörfler 
110*02be5353SAxel Dörfler 	status_t InitCheck() const;
111*02be5353SAxel Dörfler 
112*02be5353SAxel Dörfler 	status_t SetTo(const char*);
113*02be5353SAxel Dörfler 	status_t SetTo(const BString &);
114*02be5353SAxel Dörfler 
115*02be5353SAxel Dörfler 	bool Matches(const char *string) const;
116*02be5353SAxel Dörfler 	bool Matches(const BString &) const;
117*02be5353SAxel Dörfler 
118*02be5353SAxel Dörfler 	int32 RunMatcher(regexp *, const char *) const;
119*02be5353SAxel Dörfler 	regexp *Compile(const char *);
120*02be5353SAxel Dörfler 	regexp *Expression() const;
121*02be5353SAxel Dörfler 	const char *ErrorString() const;
122*02be5353SAxel Dörfler 
123*02be5353SAxel Dörfler #ifdef DEBUG
124*02be5353SAxel Dörfler 	void Dump();
125*02be5353SAxel Dörfler #endif
126*02be5353SAxel Dörfler 
127*02be5353SAxel Dörfler private:
128*02be5353SAxel Dörfler 
129*02be5353SAxel Dörfler 	void SetError(status_t error) const;
130*02be5353SAxel Dörfler 
131*02be5353SAxel Dörfler 	// Working functions for Compile():
132*02be5353SAxel Dörfler 	char *Reg(int32, int32 *);
133*02be5353SAxel Dörfler 	char *Branch(int32 *);
134*02be5353SAxel Dörfler 	char *Piece(int32 *);
135*02be5353SAxel Dörfler 	char *Atom(int32 *);
136*02be5353SAxel Dörfler 	char *Node(char);
137*02be5353SAxel Dörfler 	char *Next(char *);
138*02be5353SAxel Dörfler 	const char *Next(const char *) const;
139*02be5353SAxel Dörfler 	void Char(char);
140*02be5353SAxel Dörfler 	void Insert(char, char *);
141*02be5353SAxel Dörfler 	void Tail(char *, char *);
142*02be5353SAxel Dörfler 	void OpTail(char *, char *);
143*02be5353SAxel Dörfler 
144*02be5353SAxel Dörfler 	// Working functions for RunMatcher():
145*02be5353SAxel Dörfler 	int32 Try(regexp *, const char *) const;
146*02be5353SAxel Dörfler 	int32 Match(const char *) const;
147*02be5353SAxel Dörfler 	int32 Repeat(const char *) const;
148*02be5353SAxel Dörfler 
149*02be5353SAxel Dörfler 	// Utility functions:
150*02be5353SAxel Dörfler #ifdef DEBUG
151*02be5353SAxel Dörfler 	char *Prop(const char *) const;
152*02be5353SAxel Dörfler 	void RegExpError(const char *) const;
153*02be5353SAxel Dörfler #endif
154*02be5353SAxel Dörfler 	inline int32 UCharAt(const char *p) const;
155*02be5353SAxel Dörfler 	inline char *Operand(char* p) const;
156*02be5353SAxel Dörfler 	inline const char *Operand(const char* p) const;
157*02be5353SAxel Dörfler 	inline bool	IsMult(char c) const;
158*02be5353SAxel Dörfler 
159*02be5353SAxel Dörfler // --------- Variables -------------
160*02be5353SAxel Dörfler 
161*02be5353SAxel Dörfler 	mutable status_t fError;
162*02be5353SAxel Dörfler 	regexp *fRegExp;
163*02be5353SAxel Dörfler 
164*02be5353SAxel Dörfler 	// Work variables for Compile().
165*02be5353SAxel Dörfler 
166*02be5353SAxel Dörfler 	const char *fInputScanPointer;
167*02be5353SAxel Dörfler 	int32 fParenthesisCount;
168*02be5353SAxel Dörfler 	char fDummy;
169*02be5353SAxel Dörfler 	char *fCodeEmitPointer;		// &fDummy = don't.
170*02be5353SAxel Dörfler 	long fCodeSize;
171*02be5353SAxel Dörfler 
172*02be5353SAxel Dörfler 	// Work variables for RunMatcher().
173*02be5353SAxel Dörfler 
174*02be5353SAxel Dörfler 	mutable const char *fStringInputPointer;
175*02be5353SAxel Dörfler 	mutable const char *fRegBol;	// Beginning of input, for ^ check.
176*02be5353SAxel Dörfler 	mutable const char **fStartPArrayPointer;
177*02be5353SAxel Dörfler 	mutable const char **fEndPArrayPointer;
178*02be5353SAxel Dörfler };
179*02be5353SAxel Dörfler 
180*02be5353SAxel Dörfler } // namespace BPrivate
181*02be5353SAxel Dörfler 
182*02be5353SAxel Dörfler using namespace BPrivate;
183*02be5353SAxel Dörfler 
184*02be5353SAxel Dörfler #endif
185