xref: /haiku/src/apps/text_search/Grepper.cpp (revision 040a81419dda83d1014e9dc94936a4cb3f027303)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 
24 #include "Grepper.h"
25 
26 #include <new>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include <Catalog.h>
32 #include <Directory.h>
33 #include <List.h>
34 #include <Locale.h>
35 #include <NodeInfo.h>
36 #include <Path.h>
37 #include <UTF8.h>
38 
39 #include "FileIterator.h"
40 #include "Model.h"
41 
42 #undef B_TRANSLATION_CONTEXT
43 #define B_TRANSLATION_CONTEXT "Grepper"
44 
45 
46 using std::nothrow;
47 
48 // TODO: stippi: Check if this is a the best place to maintain a global
49 // list of files and folders for node monitoring. It should probably monitor
50 // every file that was grepped, as well as every visited (sub) folder.
51 // For the moment I don't know the life cycle of the Grepper object.
52 
53 
54 char*
55 strdup_to_utf8(uint32 encode, const char* src, int32 length)
56 {
57 	int32 srcLen = length;
58 	int32 dstLen = 2 * srcLen;
59 	// TODO: stippi: Why the duplicate copy? Why not just return
60 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
61 	// enough space? Check return value of convert_to_utf8 and keep
62 	// converting if it didn't fit?
63 	char* dst = new (nothrow) char[dstLen + 1];
64 	if (dst == NULL)
65 		return NULL;
66 	int32 cookie = 0;
67 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
68 	dst[dstLen] = '\0';
69 	char* dup = strdup(dst);
70 	delete[] dst;
71 	if (srcLen != length) {
72 		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
73 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
74 	}
75 	return dup;
76 }
77 
78 
79 char*
80 strdup_from_utf8(uint32 encode, const char* src, int32 length)
81 {
82 	int32 srcLen = length;
83 	int32 dstLen = srcLen;
84 	char* dst = new (nothrow) char[dstLen + 1];
85 	if (dst == NULL)
86 		return NULL;
87 	int32 cookie = 0;
88 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
89 	// TODO: See above.
90 	dst[dstLen] = '\0';
91 	char* dup = strdup(dst);
92 	delete[] dst;
93 	if (srcLen != length) {
94 		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
95 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
96 	}
97 	return dup;
98 }
99 
100 
101 Grepper::Grepper(const char* pattern, const Model* model,
102 		const BHandler* target, FileIterator* iterator)
103 	: fPattern(NULL),
104 	  fTarget(target),
105 	  fEscapeText(model->fEscapeText),
106 	  fCaseSensitive(model->fCaseSensitive),
107 	  fEncoding(model->fEncoding),
108 
109 	  fIterator(iterator),
110 	  fThreadId(-1),
111 	  fMustQuit(false)
112 {
113 	if (fEncoding > 0) {
114 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
115 		_SetPattern(src);
116 		free(src);
117 	} else
118 		_SetPattern(pattern);
119 }
120 
121 
122 Grepper::~Grepper()
123 {
124 	Cancel();
125 	free(fPattern);
126 	delete fIterator;
127 }
128 
129 
130 bool
131 Grepper::IsValid() const
132 {
133 	if (fIterator == NULL || !fIterator->IsValid())
134 		return false;
135 	return fPattern != NULL;
136 }
137 
138 
139 void
140 Grepper::Start()
141 {
142 	Cancel();
143 
144 	fMustQuit = false;
145 	fThreadId = spawn_thread(
146 		_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
147 
148 	resume_thread(fThreadId);
149 }
150 
151 
152 void
153 Grepper::Cancel()
154 {
155 	if (fThreadId < 0)
156 		return;
157 
158 	fMustQuit = true;
159 	int32 exitValue;
160 	wait_for_thread(fThreadId, &exitValue);
161 	fThreadId = -1;
162 }
163 
164 
165 // #pragma mark - private
166 
167 
168 int32
169 Grepper::_SpawnThread(void* cookie)
170 {
171 	Grepper* self = static_cast<Grepper*>(cookie);
172 	return self->_GrepperThread();
173 }
174 
175 
176 int32
177 Grepper::_GrepperThread()
178 {
179 	BMessage message;
180 
181 	char fileName[B_PATH_NAME_LENGTH];
182 	char tempString[B_PATH_NAME_LENGTH];
183 	char command[B_PATH_NAME_LENGTH + 32];
184 
185 	BPath tempFile;
186 	sprintf(fileName, "/tmp/SearchText%" B_PRId32, fThreadId);
187 	tempFile.SetTo(fileName);
188 
189 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
190 
191 		message.MakeEmpty();
192 		message.what = MSG_REPORT_FILE_NAME;
193 		message.AddString("filename", fileName);
194 		fTarget.SendMessage(&message);
195 
196 		message.MakeEmpty();
197 		message.what = MSG_REPORT_RESULT;
198 		message.AddString("filename", fileName);
199 
200 		BEntry entry(fileName);
201 		entry_ref ref;
202 		entry.GetRef(&ref);
203 		message.AddRef("ref", &ref);
204 
205 		if (!entry.Exists()) {
206 			if (fIterator->NotifyNegatives())
207 				fTarget.SendMessage(&message);
208 			continue;
209 		}
210 
211 		if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
212 			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
213 				"the filename."), fileName);
214 
215 			message.MakeEmpty();
216 			message.what = MSG_REPORT_ERROR;
217 			message.AddString("error", tempString);
218 			fTarget.SendMessage(&message);
219 			continue;
220 		}
221 
222 		sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
223 			fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
224 
225 		int res = system(command);
226 
227 		if (res == 0 || res == 1) {
228 			FILE *results = fopen(tempFile.Path(), "r");
229 
230 			if (results != NULL) {
231 				while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
232 					if (fEncoding > 0) {
233 						char* tempdup = strdup_to_utf8(fEncoding, tempString,
234 							strlen(tempString));
235 						message.AddString("text", tempdup);
236 						free(tempdup);
237 					} else
238 						message.AddString("text", tempString);
239 				}
240 
241 				if (message.HasString("text") || fIterator->NotifyNegatives())
242 					fTarget.SendMessage(&message);
243 
244 				fclose(results);
245 				continue;
246 			}
247 		}
248 
249 		sprintf(tempString, B_TRANSLATE("%s: There was a problem running grep."), fileName);
250 
251 		message.MakeEmpty();
252 		message.what = MSG_REPORT_ERROR;
253 		message.AddString("error", tempString);
254 		fTarget.SendMessage(&message);
255 	}
256 
257 	// We wait with removing the temporary file until after the
258 	// entire search has finished, to prevent a lot of flickering
259 	// if the Tracker window for /tmp/ might be open.
260 
261 	remove(tempFile.Path());
262 
263 	message.MakeEmpty();
264 	message.what = MSG_SEARCH_FINISHED;
265 	fTarget.SendMessage(&message);
266 
267 	return 0;
268 }
269 
270 
271 void
272 Grepper::_SetPattern(const char* src)
273 {
274 	if (src == NULL)
275 		return;
276 
277 	if (!fEscapeText) {
278 		fPattern = strdup(src);
279 		return;
280 	}
281 
282 	// We will simply guess the size of the memory buffer
283 	// that we need. This should always be large enough.
284 	fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
285 	if (fPattern == NULL)
286 		return;
287 
288 	const char* srcPtr = src;
289 	char* dstPtr = fPattern;
290 
291 	// Put double quotes around the pattern, so separate
292 	// words are considered to be part of a single string.
293 	*dstPtr++ = '"';
294 
295 	while (*srcPtr != '\0') {
296 		char c = *srcPtr++;
297 
298 		// Put a backslash in front of characters
299 		// that should be escaped.
300 		if ((c == '.')  || (c == ',')
301 			||  (c == '[')  || (c == ']')
302 			||  (c == '?')  || (c == '*')
303 			||  (c == '+')  || (c == '-')
304 			||  (c == ':')  || (c == '^')
305 			||  (c == '"')	|| (c == '`')) {
306 			*dstPtr++ = '\\';
307 		} else if ((c == '\\') || (c == '$')) {
308 			// Some characters need to be escaped
309 			// with *three* backslashes in a row.
310 			*dstPtr++ = '\\';
311 			*dstPtr++ = '\\';
312 			*dstPtr++ = '\\';
313 		}
314 
315 		// Note: we do not have to escape the
316 		// { } ( ) < > and | characters.
317 
318 		*dstPtr++ = c;
319 	}
320 
321 	*dstPtr++ = '"';
322 	*dstPtr = '\0';
323 }
324 
325 
326 bool
327 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
328 {
329 	char* copy = strdup(buffer);
330 	char* start = buffer;
331 	uint32 len = strlen(copy);
332 	bool result = true;
333 	for (uint32 count = 0; count < len; ++count) {
334 		if (copy[count] == '"' || copy[count] == '$')
335 			*buffer++ = '\\';
336 		if (buffer - start == bufferSize - 1) {
337 			result = false;
338 			break;
339 		}
340 		*buffer++ = copy[count];
341 	}
342 	*buffer = '\0';
343 	free(copy);
344 	return result;
345 }
346 
347