xref: /haiku/src/apps/text_search/Grepper.cpp (revision 746cac055adc6ac3308c7bc2d29040fb95689cc9)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "Grepper.h"
24 
25 #include <new>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include <Directory.h>
31 #include <List.h>
32 #include <NodeInfo.h>
33 #include <Path.h>
34 #include <UTF8.h>
35 
36 #include "FileIterator.h"
37 #include "Model.h"
38 
39 using std::nothrow;
40 
41 // TODO: stippi: Check if this is a the best place to maintain a global
42 // list of files and folders for node monitoring. It should probably monitor
43 // every file that was grepped, as well as every visited (sub) folder.
44 // For the moment I don't know the life cycle of the Grepper object.
45 
46 
47 char*
48 strdup_to_utf8(uint32 encode, const char* src, int32 length)
49 {
50 	int32 srcLen = length;
51 	int32 dstLen = 2 * srcLen;
52 	// TODO: stippi: Why the duplicate copy? Why not just return
53 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
54 	// enough space? Check return value of convert_to_utf8 and keep
55 	// converting if it didn't fit?
56 	char* dst = new (nothrow) char[dstLen + 1];
57 	if (dst == NULL)
58 		return NULL;
59 	int32 cookie = 0;
60 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
61 	dst[dstLen] = '\0';
62 	char* dup = strdup(dst);
63 	delete[] dst;
64 	if (srcLen != length) {
65 		fprintf(stderr, "strdup_to_utf8(%ld, %ld) dst allocate smalled(%ld)\n",
66 			encode, length, dstLen);
67 	}
68 	return dup;
69 }
70 
71 
72 char*
73 strdup_from_utf8(uint32 encode, const char* src, int32 length)
74 {
75 	int32 srcLen = length;
76 	int32 dstLen = srcLen;
77 	char* dst = new (nothrow) char[dstLen + 1];
78 	if (dst == NULL)
79 		return NULL;
80 	int32 cookie = 0;
81 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
82 	// TODO: See above.
83 	dst[dstLen] = '\0';
84 	char* dup = strdup(dst);
85 	delete[] dst;
86 	if (srcLen != length) {
87 		fprintf(stderr, "strdup_from_utf8(%ld, %ld) dst allocate "
88 			"smalled(%ld)\n", encode, length, dstLen);
89 	}
90 	return dup;
91 }
92 
93 
94 Grepper::Grepper(const char* pattern, const Model* model,
95 		const BHandler* target, FileIterator* iterator)
96 	: fPattern(NULL),
97 	  fTarget(target),
98 	  fEscapeText(model->fEscapeText),
99 	  fCaseSensitive(model->fCaseSensitive),
100 	  fEncoding(model->fEncoding),
101 
102 	  fIterator(iterator),
103 	  fThreadId(-1),
104 	  fMustQuit(false)
105 {
106 	if (fEncoding > 0) {
107 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
108 		_SetPattern(src);
109 		free(src);
110 	} else
111 		_SetPattern(pattern);
112 }
113 
114 
115 Grepper::~Grepper()
116 {
117 	Cancel();
118 	free(fPattern);
119 	delete fIterator;
120 }
121 
122 
123 bool
124 Grepper::IsValid() const
125 {
126 	if (fIterator == NULL || !fIterator->IsValid())
127 		return false;
128 	return fPattern != NULL;
129 }
130 
131 
132 void
133 Grepper::Start()
134 {
135 	Cancel();
136 
137 	fMustQuit = false;
138 	fThreadId = spawn_thread(
139 		_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
140 
141 	resume_thread(fThreadId);
142 }
143 
144 
145 void
146 Grepper::Cancel()
147 {
148 	if (fThreadId < 0)
149 		return;
150 
151 	fMustQuit = true;
152 	int32 exitValue;
153 	wait_for_thread(fThreadId, &exitValue);
154 	fThreadId = -1;
155 }
156 
157 
158 // #pragma mark - private
159 
160 
161 int32
162 Grepper::_SpawnThread(void* cookie)
163 {
164 	Grepper* self = static_cast<Grepper*>(cookie);
165 	return self->_GrepperThread();
166 }
167 
168 
169 int32
170 Grepper::_GrepperThread()
171 {
172 	BMessage message;
173 
174 	char fileName[B_PATH_NAME_LENGTH];
175 	char tempString[B_PATH_NAME_LENGTH];
176 	char command[B_PATH_NAME_LENGTH + 32];
177 
178 	BPath tempFile;
179 	sprintf(fileName, "/boot/var/tmp/SearchText%ld", fThreadId);
180 	tempFile.SetTo(fileName);
181 
182 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
183 
184 		message.MakeEmpty();
185 		message.what = MSG_REPORT_FILE_NAME;
186 		message.AddString("filename", fileName);
187 		fTarget.SendMessage(&message);
188 
189 		message.MakeEmpty();
190 		message.what = MSG_REPORT_RESULT;
191 		message.AddString("filename", fileName);
192 
193 		BEntry entry(fileName);
194 		entry_ref ref;
195 		entry.GetRef(&ref);
196 		message.AddRef("ref", &ref);
197 
198 		if (!entry.Exists()) {
199 			if (fIterator->NotifyNegatives())
200 				fTarget.SendMessage(&message);
201 			continue;
202 		}
203 
204 		if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
205 			sprintf(tempString, "%s: Not enough room to escape the filename.",
206 				fileName);
207 
208 			message.MakeEmpty();
209 			message.what = MSG_REPORT_ERROR;
210 			message.AddString("error", tempString);
211 			fTarget.SendMessage(&message);
212 			continue;
213 		}
214 
215 		sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
216 			fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
217 
218 		int res = system(command);
219 
220 		if (res == 0 || res == 1) {
221 			FILE *results = fopen(tempFile.Path(), "r");
222 
223 			if (results != NULL) {
224 				while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
225 					if (fEncoding > 0) {
226 						char* tempdup = strdup_to_utf8(fEncoding, tempString,
227 							strlen(tempString));
228 						message.AddString("text", tempdup);
229 						free(tempdup);
230 					} else
231 						message.AddString("text", tempString);
232 				}
233 
234 				if (message.HasString("text") || fIterator->NotifyNegatives())
235 					fTarget.SendMessage(&message);
236 
237 				fclose(results);
238 				continue;
239 			}
240 		}
241 
242 		sprintf(tempString, "%s: There was a problem running grep.", fileName);
243 
244 		message.MakeEmpty();
245 		message.what = MSG_REPORT_ERROR;
246 		message.AddString("error", tempString);
247 		fTarget.SendMessage(&message);
248 	}
249 
250 	// We wait with removing the temporary file until after the
251 	// entire search has finished, to prevent a lot of flickering
252 	// if the Tracker window for /boot/var/tmp/ might be open.
253 
254 	remove(tempFile.Path());
255 
256 	message.MakeEmpty();
257 	message.what = MSG_SEARCH_FINISHED;
258 	fTarget.SendMessage(&message);
259 
260 	return 0;
261 }
262 
263 
264 void
265 Grepper::_SetPattern(const char* src)
266 {
267 	if (src == NULL)
268 		return;
269 
270 	if (!fEscapeText) {
271 		fPattern = strdup(src);
272 		return;
273 	}
274 
275 	// We will simply guess the size of the memory buffer
276 	// that we need. This should always be large enough.
277 	fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
278 	if (fPattern == NULL)
279 		return;
280 
281 	const char* srcPtr = src;
282 	char* dstPtr = fPattern;
283 
284 	// Put double quotes around the pattern, so separate
285 	// words are considered to be part of a single string.
286 	*dstPtr++ = '"';
287 
288 	while (*srcPtr != '\0') {
289 		char c = *srcPtr++;
290 
291 		// Put a backslash in front of characters
292 		// that should be escaped.
293 		if ((c == '.')  || (c == ',')
294 			||  (c == '[')  || (c == ']')
295 			||  (c == '?')  || (c == '*')
296 			||  (c == '+')  || (c == '-')
297 			||  (c == ':')  || (c == '^')
298 			||  (c == '\'') || (c == '"')) {
299 			*dstPtr++ = '\\';
300 		} else if ((c == '\\') || (c == '$')) {
301 			// Some characters need to be escaped
302 			// with *three* backslashes in a row.
303 			*dstPtr++ = '\\';
304 			*dstPtr++ = '\\';
305 			*dstPtr++ = '\\';
306 		}
307 
308 		// Note: we do not have to escape the
309 		// { } ( ) < > and | characters.
310 
311 		*dstPtr++ = c;
312 	}
313 
314 	*dstPtr++ = '"';
315 	*dstPtr = '\0';
316 }
317 
318 
319 bool
320 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
321 {
322 	char* copy = strdup(buffer);
323 	char* start = buffer;
324 	uint32 len = strlen(copy);
325 	bool result = true;
326 	for (uint32 count = 0; count < len; ++count) {
327 		if (copy[count] == '"' || copy[count] == '$')
328 			*buffer++ = '\\';
329 		if (buffer - start == bufferSize - 1) {
330 			result = false;
331 			break;
332 		}
333 		*buffer++ = copy[count];
334 	}
335 	*buffer = '\0';
336 	free(copy);
337 	return result;
338 }
339 
340