xref: /haiku/src/apps/text_search/Grepper.cpp (revision 56430ad8002b8fd1ac69b590e9cc130de6d9e852)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  * All rights reserved. Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "Grepper.h"
8 
9 #include <new>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <Catalog.h>
15 #include <Directory.h>
16 #include <List.h>
17 #include <Locale.h>
18 #include <NodeInfo.h>
19 #include <Path.h>
20 #include <UTF8.h>
21 
22 #include "FileIterator.h"
23 #include "Model.h"
24 
25 #undef B_TRANSLATION_CONTEXT
26 #define B_TRANSLATION_CONTEXT "Grepper"
27 
28 
29 using std::nothrow;
30 
31 // TODO: stippi: Check if this is a the best place to maintain a global
32 // list of files and folders for node monitoring. It should probably monitor
33 // every file that was grepped, as well as every visited (sub) folder.
34 // For the moment I don't know the life cycle of the Grepper object.
35 
36 
37 char*
38 strdup_to_utf8(uint32 encode, const char* src, int32 length)
39 {
40 	int32 srcLen = length;
41 	int32 dstLen = 2 * srcLen;
42 	// TODO: stippi: Why the duplicate copy? Why not just return
43 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
44 	// enough space? Check return value of convert_to_utf8 and keep
45 	// converting if it didn't fit?
46 	char* dst = new (nothrow) char[dstLen + 1];
47 	if (dst == NULL)
48 		return NULL;
49 	int32 cookie = 0;
50 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
51 	dst[dstLen] = '\0';
52 	char* dup = strdup(dst);
53 	delete[] dst;
54 	if (srcLen != length) {
55 		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
56 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
57 	}
58 	return dup;
59 }
60 
61 
62 char*
63 strdup_from_utf8(uint32 encode, const char* src, int32 length)
64 {
65 	int32 srcLen = length;
66 	int32 dstLen = srcLen;
67 	char* dst = new (nothrow) char[dstLen + 1];
68 	if (dst == NULL)
69 		return NULL;
70 	int32 cookie = 0;
71 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
72 	// TODO: See above.
73 	dst[dstLen] = '\0';
74 	char* dup = strdup(dst);
75 	delete[] dst;
76 	if (srcLen != length) {
77 		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
78 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
79 	}
80 	return dup;
81 }
82 
83 
84 Grepper::Grepper(const char* pattern, const Model* model,
85 		const BHandler* target, FileIterator* iterator)
86 	: fPattern(NULL),
87 	  fTarget(target),
88 	  fEscapeText(model->fEscapeText),
89 	  fCaseSensitive(model->fCaseSensitive),
90 	  fEncoding(model->fEncoding),
91 
92 	  fIterator(iterator),
93 	  fThreadId(-1),
94 	  fMustQuit(false)
95 {
96 	if (fEncoding > 0) {
97 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
98 		_SetPattern(src);
99 		free(src);
100 	} else
101 		_SetPattern(pattern);
102 }
103 
104 
105 Grepper::~Grepper()
106 {
107 	Cancel();
108 	free(fPattern);
109 	delete fIterator;
110 }
111 
112 
113 bool
114 Grepper::IsValid() const
115 {
116 	if (fIterator == NULL || !fIterator->IsValid())
117 		return false;
118 	return fPattern != NULL;
119 }
120 
121 
122 void
123 Grepper::Start()
124 {
125 	Cancel();
126 
127 	fMustQuit = false;
128 	fThreadId = spawn_thread(
129 		_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
130 
131 	resume_thread(fThreadId);
132 }
133 
134 
135 void
136 Grepper::Cancel()
137 {
138 	if (fThreadId < 0)
139 		return;
140 
141 	fMustQuit = true;
142 	int32 exitValue;
143 	wait_for_thread(fThreadId, &exitValue);
144 	fThreadId = -1;
145 }
146 
147 
148 // #pragma mark - private
149 
150 
151 int32
152 Grepper::_SpawnThread(void* cookie)
153 {
154 	Grepper* self = static_cast<Grepper*>(cookie);
155 	return self->_GrepperThread();
156 }
157 
158 
159 int32
160 Grepper::_GrepperThread()
161 {
162 	BMessage message;
163 
164 	char fileName[B_PATH_NAME_LENGTH];
165 	char tempString[B_PATH_NAME_LENGTH];
166 	char command[B_PATH_NAME_LENGTH + 32];
167 
168 	BPath tempFile;
169 	sprintf(fileName, "/tmp/SearchText%" B_PRId32, fThreadId);
170 	tempFile.SetTo(fileName);
171 
172 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
173 
174 		message.MakeEmpty();
175 		message.what = MSG_REPORT_FILE_NAME;
176 		message.AddString("filename", fileName);
177 		fTarget.SendMessage(&message);
178 
179 		message.MakeEmpty();
180 		message.what = MSG_REPORT_RESULT;
181 		message.AddString("filename", fileName);
182 
183 		BEntry entry(fileName);
184 		entry_ref ref;
185 		entry.GetRef(&ref);
186 		message.AddRef("ref", &ref);
187 
188 		if (!entry.Exists()) {
189 			if (fIterator->NotifyNegatives())
190 				fTarget.SendMessage(&message);
191 			continue;
192 		}
193 
194 		if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
195 			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
196 				"the filename."), fileName);
197 
198 			message.MakeEmpty();
199 			message.what = MSG_REPORT_ERROR;
200 			message.AddString("error", tempString);
201 			fTarget.SendMessage(&message);
202 			continue;
203 		}
204 
205 		sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
206 			fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
207 
208 		int res = system(command);
209 
210 		if (res == 0 || res == 1) {
211 			FILE *results = fopen(tempFile.Path(), "r");
212 
213 			if (results != NULL) {
214 				while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
215 					if (fEncoding > 0) {
216 						char* tempdup = strdup_to_utf8(fEncoding, tempString,
217 							strlen(tempString));
218 						message.AddString("text", tempdup);
219 						free(tempdup);
220 					} else
221 						message.AddString("text", tempString);
222 				}
223 
224 				if (message.HasString("text") || fIterator->NotifyNegatives())
225 					fTarget.SendMessage(&message);
226 
227 				fclose(results);
228 				continue;
229 			}
230 		}
231 
232 		sprintf(tempString, B_TRANSLATE("%s: There was a problem running grep."), fileName);
233 
234 		message.MakeEmpty();
235 		message.what = MSG_REPORT_ERROR;
236 		message.AddString("error", tempString);
237 		fTarget.SendMessage(&message);
238 	}
239 
240 	// We wait with removing the temporary file until after the
241 	// entire search has finished, to prevent a lot of flickering
242 	// if the Tracker window for /tmp/ might be open.
243 
244 	remove(tempFile.Path());
245 
246 	message.MakeEmpty();
247 	message.what = MSG_SEARCH_FINISHED;
248 	fTarget.SendMessage(&message);
249 
250 	return 0;
251 }
252 
253 
254 void
255 Grepper::_SetPattern(const char* src)
256 {
257 	if (src == NULL)
258 		return;
259 
260 	if (!fEscapeText) {
261 		fPattern = strdup(src);
262 		return;
263 	}
264 
265 	// We will simply guess the size of the memory buffer
266 	// that we need. This should always be large enough.
267 	fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
268 	if (fPattern == NULL)
269 		return;
270 
271 	const char* srcPtr = src;
272 	char* dstPtr = fPattern;
273 
274 	// Put double quotes around the pattern, so separate
275 	// words are considered to be part of a single string.
276 	*dstPtr++ = '"';
277 
278 	while (*srcPtr != '\0') {
279 		char c = *srcPtr++;
280 
281 		// Put a backslash in front of characters
282 		// that should be escaped.
283 		if ((c == '.')  || (c == ',')
284 			||  (c == '[')  || (c == ']')
285 			||  (c == '?')  || (c == '*')
286 			||  (c == '+')  || (c == '-')
287 			||  (c == ':')  || (c == '^')
288 			||  (c == '"')	|| (c == '`')) {
289 			*dstPtr++ = '\\';
290 		} else if ((c == '\\') || (c == '$')) {
291 			// Some characters need to be escaped
292 			// with *three* backslashes in a row.
293 			*dstPtr++ = '\\';
294 			*dstPtr++ = '\\';
295 			*dstPtr++ = '\\';
296 		}
297 
298 		// Note: we do not have to escape the
299 		// { } ( ) < > and | characters.
300 
301 		*dstPtr++ = c;
302 	}
303 
304 	*dstPtr++ = '"';
305 	*dstPtr = '\0';
306 }
307 
308 
309 bool
310 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
311 {
312 	char* copy = strdup(buffer);
313 	char* start = buffer;
314 	uint32 len = strlen(copy);
315 	bool result = true;
316 	for (uint32 count = 0; count < len; ++count) {
317 		if (copy[count] == '"' || copy[count] == '$')
318 			*buffer++ = '\\';
319 		if (buffer - start == bufferSize - 1) {
320 			result = false;
321 			break;
322 		}
323 		*buffer++ = copy[count];
324 	}
325 	*buffer = '\0';
326 	free(copy);
327 	return result;
328 }
329 
330