xref: /haiku/src/apps/text_search/Grepper.cpp (revision 9760dcae2038d47442f4658c2575844c6cf92c40)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 
24 #include "Grepper.h"
25 
26 #include <new>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include <Directory.h>
32 #include <List.h>
33 #include <NodeInfo.h>
34 #include <Path.h>
35 #include <UTF8.h>
36 
37 #include "FileIterator.h"
38 #include "Model.h"
39 
40 using std::nothrow;
41 
42 // TODO: stippi: Check if this is a the best place to maintain a global
43 // list of files and folders for node monitoring. It should probably monitor
44 // every file that was grepped, as well as every visited (sub) folder.
45 // For the moment I don't know the life cycle of the Grepper object.
46 
47 
48 char*
49 strdup_to_utf8(uint32 encode, const char* src, int32 length)
50 {
51 	int32 srcLen = length;
52 	int32 dstLen = 2 * srcLen;
53 	// TODO: stippi: Why the duplicate copy? Why not just return
54 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
55 	// enough space? Check return value of convert_to_utf8 and keep
56 	// converting if it didn't fit?
57 	char* dst = new (nothrow) char[dstLen + 1];
58 	if (dst == NULL)
59 		return NULL;
60 	int32 cookie = 0;
61 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
62 	dst[dstLen] = '\0';
63 	char* dup = strdup(dst);
64 	delete[] dst;
65 	if (srcLen != length) {
66 		fprintf(stderr, "strdup_to_utf8(%ld, %ld) dst allocate smalled(%ld)\n",
67 			encode, length, dstLen);
68 	}
69 	return dup;
70 }
71 
72 
73 char*
74 strdup_from_utf8(uint32 encode, const char* src, int32 length)
75 {
76 	int32 srcLen = length;
77 	int32 dstLen = srcLen;
78 	char* dst = new (nothrow) char[dstLen + 1];
79 	if (dst == NULL)
80 		return NULL;
81 	int32 cookie = 0;
82 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
83 	// TODO: See above.
84 	dst[dstLen] = '\0';
85 	char* dup = strdup(dst);
86 	delete[] dst;
87 	if (srcLen != length) {
88 		fprintf(stderr, "strdup_from_utf8(%ld, %ld) dst allocate "
89 			"smalled(%ld)\n", encode, length, dstLen);
90 	}
91 	return dup;
92 }
93 
94 
95 Grepper::Grepper(const char* pattern, const Model* model,
96 		const BHandler* target, FileIterator* iterator)
97 	: fPattern(NULL),
98 	  fTarget(target),
99 	  fEscapeText(model->fEscapeText),
100 	  fCaseSensitive(model->fCaseSensitive),
101 	  fEncoding(model->fEncoding),
102 
103 	  fIterator(iterator),
104 	  fThreadId(-1),
105 	  fMustQuit(false)
106 {
107 	if (fEncoding > 0) {
108 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
109 		_SetPattern(src);
110 		free(src);
111 	} else
112 		_SetPattern(pattern);
113 }
114 
115 
116 Grepper::~Grepper()
117 {
118 	Cancel();
119 	free(fPattern);
120 	delete fIterator;
121 }
122 
123 
124 bool
125 Grepper::IsValid() const
126 {
127 	if (fIterator == NULL || !fIterator->IsValid())
128 		return false;
129 	return fPattern != NULL;
130 }
131 
132 
133 void
134 Grepper::Start()
135 {
136 	Cancel();
137 
138 	fMustQuit = false;
139 	fThreadId = spawn_thread(
140 		_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
141 
142 	resume_thread(fThreadId);
143 }
144 
145 
146 void
147 Grepper::Cancel()
148 {
149 	if (fThreadId < 0)
150 		return;
151 
152 	fMustQuit = true;
153 	int32 exitValue;
154 	wait_for_thread(fThreadId, &exitValue);
155 	fThreadId = -1;
156 }
157 
158 
159 // #pragma mark - private
160 
161 
162 int32
163 Grepper::_SpawnThread(void* cookie)
164 {
165 	Grepper* self = static_cast<Grepper*>(cookie);
166 	return self->_GrepperThread();
167 }
168 
169 
170 int32
171 Grepper::_GrepperThread()
172 {
173 	BMessage message;
174 
175 	char fileName[B_PATH_NAME_LENGTH];
176 	char tempString[B_PATH_NAME_LENGTH];
177 	char command[B_PATH_NAME_LENGTH + 32];
178 
179 	BPath tempFile;
180 	sprintf(fileName, "/tmp/SearchText%ld", fThreadId);
181 	tempFile.SetTo(fileName);
182 
183 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
184 
185 		message.MakeEmpty();
186 		message.what = MSG_REPORT_FILE_NAME;
187 		message.AddString("filename", fileName);
188 		fTarget.SendMessage(&message);
189 
190 		message.MakeEmpty();
191 		message.what = MSG_REPORT_RESULT;
192 		message.AddString("filename", fileName);
193 
194 		BEntry entry(fileName);
195 		entry_ref ref;
196 		entry.GetRef(&ref);
197 		message.AddRef("ref", &ref);
198 
199 		if (!entry.Exists()) {
200 			if (fIterator->NotifyNegatives())
201 				fTarget.SendMessage(&message);
202 			continue;
203 		}
204 
205 		if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
206 			sprintf(tempString, "%s: Not enough room to escape the filename.",
207 				fileName);
208 
209 			message.MakeEmpty();
210 			message.what = MSG_REPORT_ERROR;
211 			message.AddString("error", tempString);
212 			fTarget.SendMessage(&message);
213 			continue;
214 		}
215 
216 		sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
217 			fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
218 
219 		int res = system(command);
220 
221 		if (res == 0 || res == 1) {
222 			FILE *results = fopen(tempFile.Path(), "r");
223 
224 			if (results != NULL) {
225 				while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
226 					if (fEncoding > 0) {
227 						char* tempdup = strdup_to_utf8(fEncoding, tempString,
228 							strlen(tempString));
229 						message.AddString("text", tempdup);
230 						free(tempdup);
231 					} else
232 						message.AddString("text", tempString);
233 				}
234 
235 				if (message.HasString("text") || fIterator->NotifyNegatives())
236 					fTarget.SendMessage(&message);
237 
238 				fclose(results);
239 				continue;
240 			}
241 		}
242 
243 		sprintf(tempString, "%s: There was a problem running grep.", fileName);
244 
245 		message.MakeEmpty();
246 		message.what = MSG_REPORT_ERROR;
247 		message.AddString("error", tempString);
248 		fTarget.SendMessage(&message);
249 	}
250 
251 	// We wait with removing the temporary file until after the
252 	// entire search has finished, to prevent a lot of flickering
253 	// if the Tracker window for /tmp/ might be open.
254 
255 	remove(tempFile.Path());
256 
257 	message.MakeEmpty();
258 	message.what = MSG_SEARCH_FINISHED;
259 	fTarget.SendMessage(&message);
260 
261 	return 0;
262 }
263 
264 
265 void
266 Grepper::_SetPattern(const char* src)
267 {
268 	if (src == NULL)
269 		return;
270 
271 	if (!fEscapeText) {
272 		fPattern = strdup(src);
273 		return;
274 	}
275 
276 	// We will simply guess the size of the memory buffer
277 	// that we need. This should always be large enough.
278 	fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
279 	if (fPattern == NULL)
280 		return;
281 
282 	const char* srcPtr = src;
283 	char* dstPtr = fPattern;
284 
285 	// Put double quotes around the pattern, so separate
286 	// words are considered to be part of a single string.
287 	*dstPtr++ = '"';
288 
289 	while (*srcPtr != '\0') {
290 		char c = *srcPtr++;
291 
292 		// Put a backslash in front of characters
293 		// that should be escaped.
294 		if ((c == '.')  || (c == ',')
295 			||  (c == '[')  || (c == ']')
296 			||  (c == '?')  || (c == '*')
297 			||  (c == '+')  || (c == '-')
298 			||  (c == ':')  || (c == '^')
299 			||  (c == '\'') || (c == '"')) {
300 			*dstPtr++ = '\\';
301 		} else if ((c == '\\') || (c == '$')) {
302 			// Some characters need to be escaped
303 			// with *three* backslashes in a row.
304 			*dstPtr++ = '\\';
305 			*dstPtr++ = '\\';
306 			*dstPtr++ = '\\';
307 		}
308 
309 		// Note: we do not have to escape the
310 		// { } ( ) < > and | characters.
311 
312 		*dstPtr++ = c;
313 	}
314 
315 	*dstPtr++ = '"';
316 	*dstPtr = '\0';
317 }
318 
319 
320 bool
321 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
322 {
323 	char* copy = strdup(buffer);
324 	char* start = buffer;
325 	uint32 len = strlen(copy);
326 	bool result = true;
327 	for (uint32 count = 0; count < len; ++count) {
328 		if (copy[count] == '"' || copy[count] == '$')
329 			*buffer++ = '\\';
330 		if (buffer - start == bufferSize - 1) {
331 			result = false;
332 			break;
333 		}
334 		*buffer++ = copy[count];
335 	}
336 	*buffer = '\0';
337 	free(copy);
338 	return result;
339 }
340 
341