1 /*
2 * Copyright 2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2013, Rene Gollent, rene@gollent.com.
4 * Distributed under the terms of the MIT License.
5 */
6
7
8 #include <RegExp.h>
9
10 #include <new>
11
12 #include <regex.h>
13
14 #include <String.h>
15
16 #include <Referenceable.h>
17
18
19 // #pragma mark - RegExp::Data
20
21
22 struct RegExp::Data : public BReferenceable {
DataRegExp::Data23 Data(const char* pattern, PatternType patternType, bool caseSensitive)
24 :
25 BReferenceable()
26 {
27 // convert the shell pattern to a regular expression
28 BString patternString;
29 if (patternType == PATTERN_TYPE_WILDCARD) {
30 while (*pattern != '\0') {
31 char c = *pattern++;
32 switch (c) {
33 case '?':
34 patternString += '.';
35 continue;
36 case '*':
37 patternString += ".*";
38 continue;
39 case '[':
40 {
41 // find the matching ']' first
42 const char* end = pattern;
43 while (*end != ']') {
44 if (*end++ == '\0') {
45 fError = REG_EBRACK;
46 return;
47 }
48 }
49
50 if (pattern == end) {
51 // Empty bracket expression. It will never match
52 // anything. Strictly speaking this is not
53 // considered an error, but we handle it like one.
54 fError = REG_EBRACK;
55 return;
56 }
57
58 patternString += '[';
59
60 // We need to avoid "[." ... ".]", "[=" ... "=]", and
61 // "[:" ... ":]" sequences, since those have special
62 // meaning in regular expressions. If we encounter
63 // a '[' followed by either of '.', '=', or ':', we
64 // replace the '[' by "[.[.]".
65 while (pattern < end) {
66 c = *pattern++;
67 if (c == '[' && pattern < end) {
68 switch (*pattern) {
69 case '.':
70 case '=':
71 case ':':
72 patternString += "[.[.]";
73 continue;
74 }
75 }
76 patternString += c;
77 }
78
79 pattern++;
80 patternString += ']';
81 break;
82 }
83
84 case '\\':
85 {
86 // Quotes the next character. Works the same way for
87 // regular expressions.
88 if (*pattern == '\0') {
89 fError = REG_EESCAPE;
90 return;
91 }
92
93 patternString += '\\';
94 patternString += *pattern++;
95 break;
96 }
97
98 case '^':
99 case '.':
100 case '$':
101 case '(':
102 case ')':
103 case '|':
104 case '+':
105 case '{':
106 // need to be quoted
107 patternString += '\\';
108 // fall through
109 default:
110 patternString += c;
111 break;
112 }
113 }
114
115 pattern = patternString.String();
116 }
117
118 int flags = REG_EXTENDED;
119 if (!caseSensitive)
120 flags |= REG_ICASE;
121
122 fError = regcomp(&fCompiledExpression, pattern, flags);
123 }
124
~DataRegExp::Data125 ~Data()
126 {
127 if (fError == 0)
128 regfree(&fCompiledExpression);
129 }
130
IsValidRegExp::Data131 bool IsValid() const
132 {
133 return fError == 0;
134 }
135
CompiledExpressionRegExp::Data136 const regex_t* CompiledExpression() const
137 {
138 return &fCompiledExpression;
139 }
140
141 private:
142 int fError;
143 regex_t fCompiledExpression;
144 };
145
146
147 // #pragma mark - RegExp::MatchResultData
148
149
150 struct RegExp::MatchResultData : public BReferenceable {
MatchResultDataRegExp::MatchResultData151 MatchResultData(const regex_t* compiledExpression, const char* string)
152 :
153 BReferenceable(),
154 fMatchCount(0),
155 fMatches(NULL)
156 {
157 // fMatchCount is always set to the number of matching groups in the
158 // expression (or 0 if an error occured). Some of the "matches" in
159 // the array may still point to the (-1,-1) range if they don't
160 // actually match anything.
161 fMatchCount = compiledExpression->re_nsub + 1;
162 fMatches = new regmatch_t[fMatchCount];
163 if (regexec(compiledExpression, string, fMatchCount, fMatches, 0)
164 != 0) {
165 delete[] fMatches;
166 fMatches = NULL;
167 fMatchCount = 0;
168 }
169 }
170
~MatchResultDataRegExp::MatchResultData171 ~MatchResultData()
172 {
173 delete[] fMatches;
174 }
175
MatchCountRegExp::MatchResultData176 size_t MatchCount() const
177 {
178 return fMatchCount;
179 }
180
MatchesRegExp::MatchResultData181 const regmatch_t* Matches() const
182 {
183 return fMatches;
184 }
185
186 private:
187 size_t fMatchCount;
188 regmatch_t* fMatches;
189 };
190
191
192 // #pragma mark - RegExp
193
194
RegExp()195 RegExp::RegExp()
196 :
197 fData(NULL)
198 {
199 }
200
201
RegExp(const char * pattern,PatternType patternType,bool caseSensitive)202 RegExp::RegExp(const char* pattern, PatternType patternType,
203 bool caseSensitive)
204 :
205 fData(NULL)
206 {
207 SetPattern(pattern, patternType, caseSensitive);
208 }
209
210
RegExp(const RegExp & other)211 RegExp::RegExp(const RegExp& other)
212 :
213 fData(other.fData)
214 {
215 if (fData != NULL)
216 fData->AcquireReference();
217 }
218
219
~RegExp()220 RegExp::~RegExp()
221 {
222 if (fData != NULL)
223 fData->ReleaseReference();
224 }
225
226
227 bool
SetPattern(const char * pattern,PatternType patternType,bool caseSensitive)228 RegExp::SetPattern(const char* pattern, PatternType patternType,
229 bool caseSensitive)
230 {
231 if (fData != NULL) {
232 fData->ReleaseReference();
233 fData = NULL;
234 }
235
236 Data* newData = new(std::nothrow) Data(pattern, patternType, caseSensitive);
237 if (newData == NULL)
238 return false;
239
240 BReference<Data> dataReference(newData, true);
241 if (!newData->IsValid())
242 return false;
243
244 fData = dataReference.Detach();
245 return true;
246 }
247
248
249 RegExp::MatchResult
Match(const char * string) const250 RegExp::Match(const char* string) const
251 {
252 if (!IsValid())
253 return MatchResult();
254
255 return MatchResult(
256 new(std::nothrow) MatchResultData(fData->CompiledExpression(),
257 string));
258 }
259
260
261 RegExp&
operator =(const RegExp & other)262 RegExp::operator=(const RegExp& other)
263 {
264 if (fData != NULL)
265 fData->ReleaseReference();
266
267 fData = other.fData;
268
269 if (fData != NULL)
270 fData->AcquireReference();
271
272 return *this;
273 }
274
275
276 // #pragma mark - RegExp::MatchResult
277
278
MatchResult()279 RegExp::MatchResult::MatchResult()
280 :
281 fData(NULL)
282 {
283 }
284
285
MatchResult(MatchResultData * data)286 RegExp::MatchResult::MatchResult(MatchResultData* data)
287 :
288 fData(data)
289 {
290 }
291
292
MatchResult(const MatchResult & other)293 RegExp::MatchResult::MatchResult(const MatchResult& other)
294 :
295 fData(other.fData)
296 {
297 if (fData != NULL)
298 fData->AcquireReference();
299 }
300
301
~MatchResult()302 RegExp::MatchResult::~MatchResult()
303 {
304 if (fData != NULL)
305 fData->ReleaseReference();
306 }
307
308
309 bool
HasMatched() const310 RegExp::MatchResult::HasMatched() const
311 {
312 return fData != NULL && fData->MatchCount() > 0;
313 }
314
315
316 size_t
StartOffset() const317 RegExp::MatchResult::StartOffset() const
318 {
319 return fData != NULL && fData->MatchCount() > 0
320 ? fData->Matches()[0].rm_so : 0;
321 }
322
323
324 size_t
EndOffset() const325 RegExp::MatchResult::EndOffset() const
326 {
327 return fData != NULL && fData->MatchCount() > 0
328 ? fData->Matches()[0].rm_eo : 0;
329 }
330
331
332 size_t
GroupCount() const333 RegExp::MatchResult::GroupCount() const
334 {
335 if (fData == NULL)
336 return 0;
337
338 size_t matchCount = fData->MatchCount();
339 return matchCount > 0 ? matchCount - 1 : 0;
340 }
341
342
343 size_t
GroupStartOffsetAt(size_t index) const344 RegExp::MatchResult::GroupStartOffsetAt(size_t index) const
345 {
346 return fData != NULL && fData->MatchCount() > index + 1
347 ? fData->Matches()[index + 1].rm_so : 0;
348 }
349
350
351 size_t
GroupEndOffsetAt(size_t index) const352 RegExp::MatchResult::GroupEndOffsetAt(size_t index) const
353 {
354 return fData != NULL && fData->MatchCount() > index + 1
355 ? fData->Matches()[index + 1].rm_eo : 0;
356 }
357
358
359 RegExp::MatchResult&
operator =(const MatchResult & other)360 RegExp::MatchResult::operator=(const MatchResult& other)
361 {
362 if (fData != NULL)
363 fData->ReleaseReference();
364
365 fData = other.fData;
366
367 if (fData != NULL)
368 fData->AcquireReference();
369
370 return *this;
371 }
372