xref: /haiku/src/apps/text_search/Grepper.cpp (revision 04a0e9c7b68cbe3a43d38e2bca8e860fd80936fb)
1  /*
2   * Copyright (c) 1998-2007 Matthijs Hollemans
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20   * DEALINGS IN THE SOFTWARE.
21   */
22  
23  
24  #include "Grepper.h"
25  
26  #include <new>
27  #include <stdio.h>
28  #include <stdlib.h>
29  #include <string.h>
30  
31  #include <Catalog.h>
32  #include <Directory.h>
33  #include <List.h>
34  #include <Locale.h>
35  #include <NodeInfo.h>
36  #include <Path.h>
37  #include <UTF8.h>
38  
39  #include "FileIterator.h"
40  #include "Model.h"
41  
42  #undef B_TRANSLATION_CONTEXT
43  #define B_TRANSLATION_CONTEXT "Grepper"
44  
45  
46  using std::nothrow;
47  
48  // TODO: stippi: Check if this is a the best place to maintain a global
49  // list of files and folders for node monitoring. It should probably monitor
50  // every file that was grepped, as well as every visited (sub) folder.
51  // For the moment I don't know the life cycle of the Grepper object.
52  
53  
54  char*
55  strdup_to_utf8(uint32 encode, const char* src, int32 length)
56  {
57  	int32 srcLen = length;
58  	int32 dstLen = 2 * srcLen;
59  	// TODO: stippi: Why the duplicate copy? Why not just return
60  	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
61  	// enough space? Check return value of convert_to_utf8 and keep
62  	// converting if it didn't fit?
63  	char* dst = new (nothrow) char[dstLen + 1];
64  	if (dst == NULL)
65  		return NULL;
66  	int32 cookie = 0;
67  	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
68  	dst[dstLen] = '\0';
69  	char* dup = strdup(dst);
70  	delete[] dst;
71  	if (srcLen != length) {
72  		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
73  			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
74  	}
75  	return dup;
76  }
77  
78  
79  char*
80  strdup_from_utf8(uint32 encode, const char* src, int32 length)
81  {
82  	int32 srcLen = length;
83  	int32 dstLen = srcLen;
84  	char* dst = new (nothrow) char[dstLen + 1];
85  	if (dst == NULL)
86  		return NULL;
87  	int32 cookie = 0;
88  	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
89  	// TODO: See above.
90  	dst[dstLen] = '\0';
91  	char* dup = strdup(dst);
92  	delete[] dst;
93  	if (srcLen != length) {
94  		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
95  			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
96  	}
97  	return dup;
98  }
99  
100  
101  Grepper::Grepper(const char* pattern, const Model* model,
102  		const BHandler* target, FileIterator* iterator)
103  	: fPattern(NULL),
104  	  fTarget(target),
105  	  fEscapeText(model->fEscapeText),
106  	  fCaseSensitive(model->fCaseSensitive),
107  	  fEncoding(model->fEncoding),
108  
109  	  fIterator(iterator),
110  	  fThreadId(-1),
111  	  fMustQuit(false)
112  {
113  	if (fEncoding > 0) {
114  		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
115  		_SetPattern(src);
116  		free(src);
117  	} else
118  		_SetPattern(pattern);
119  }
120  
121  
122  Grepper::~Grepper()
123  {
124  	Cancel();
125  	free(fPattern);
126  	delete fIterator;
127  }
128  
129  
130  bool
131  Grepper::IsValid() const
132  {
133  	if (fIterator == NULL || !fIterator->IsValid())
134  		return false;
135  	return fPattern != NULL;
136  }
137  
138  
139  void
140  Grepper::Start()
141  {
142  	Cancel();
143  
144  	fMustQuit = false;
145  	fThreadId = spawn_thread(
146  		_SpawnThread, "_GrepperThread", B_NORMAL_PRIORITY, this);
147  
148  	resume_thread(fThreadId);
149  }
150  
151  
152  void
153  Grepper::Cancel()
154  {
155  	if (fThreadId < 0)
156  		return;
157  
158  	fMustQuit = true;
159  	int32 exitValue;
160  	wait_for_thread(fThreadId, &exitValue);
161  	fThreadId = -1;
162  }
163  
164  
165  // #pragma mark - private
166  
167  
168  int32
169  Grepper::_SpawnThread(void* cookie)
170  {
171  	Grepper* self = static_cast<Grepper*>(cookie);
172  	return self->_GrepperThread();
173  }
174  
175  
176  int32
177  Grepper::_GrepperThread()
178  {
179  	BMessage message;
180  
181  	char fileName[B_PATH_NAME_LENGTH];
182  	char tempString[B_PATH_NAME_LENGTH];
183  	char command[B_PATH_NAME_LENGTH + 32];
184  
185  	BPath tempFile;
186  	sprintf(fileName, "/tmp/SearchText%" B_PRId32, fThreadId);
187  	tempFile.SetTo(fileName);
188  
189  	while (!fMustQuit && fIterator->GetNextName(fileName)) {
190  
191  		message.MakeEmpty();
192  		message.what = MSG_REPORT_FILE_NAME;
193  		message.AddString("filename", fileName);
194  		fTarget.SendMessage(&message);
195  
196  		message.MakeEmpty();
197  		message.what = MSG_REPORT_RESULT;
198  		message.AddString("filename", fileName);
199  
200  		BEntry entry(fileName);
201  		entry_ref ref;
202  		entry.GetRef(&ref);
203  		message.AddRef("ref", &ref);
204  
205  		if (!entry.Exists()) {
206  			if (fIterator->NotifyNegatives())
207  				fTarget.SendMessage(&message);
208  			continue;
209  		}
210  
211  		if (!_EscapeSpecialChars(fileName, B_PATH_NAME_LENGTH)) {
212  			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
213  				"the filename."), fileName);
214  
215  			message.MakeEmpty();
216  			message.what = MSG_REPORT_ERROR;
217  			message.AddString("error", tempString);
218  			fTarget.SendMessage(&message);
219  			continue;
220  		}
221  
222  		sprintf(command, "grep -hn %s %s \"%s\" > \"%s\"",
223  			fCaseSensitive ? "" : "-i", fPattern, fileName, tempFile.Path());
224  
225  		int res = system(command);
226  
227  		if (res == 0 || res == 1) {
228  			FILE *results = fopen(tempFile.Path(), "r");
229  
230  			if (results != NULL) {
231  				while (fgets(tempString, B_PATH_NAME_LENGTH, results) != 0) {
232  					if (fEncoding > 0) {
233  						char* tempdup = strdup_to_utf8(fEncoding, tempString,
234  							strlen(tempString));
235  						message.AddString("text", tempdup);
236  						free(tempdup);
237  					} else
238  						message.AddString("text", tempString);
239  				}
240  
241  				if (message.HasString("text") || fIterator->NotifyNegatives())
242  					fTarget.SendMessage(&message);
243  
244  				fclose(results);
245  				continue;
246  			}
247  		}
248  
249  		sprintf(tempString, B_TRANSLATE("%s: There was a problem running grep."), fileName);
250  
251  		message.MakeEmpty();
252  		message.what = MSG_REPORT_ERROR;
253  		message.AddString("error", tempString);
254  		fTarget.SendMessage(&message);
255  	}
256  
257  	// We wait with removing the temporary file until after the
258  	// entire search has finished, to prevent a lot of flickering
259  	// if the Tracker window for /tmp/ might be open.
260  
261  	remove(tempFile.Path());
262  
263  	message.MakeEmpty();
264  	message.what = MSG_SEARCH_FINISHED;
265  	fTarget.SendMessage(&message);
266  
267  	return 0;
268  }
269  
270  
271  void
272  Grepper::_SetPattern(const char* src)
273  {
274  	if (src == NULL)
275  		return;
276  
277  	if (!fEscapeText) {
278  		fPattern = strdup(src);
279  		return;
280  	}
281  
282  	// We will simply guess the size of the memory buffer
283  	// that we need. This should always be large enough.
284  	fPattern = (char*)malloc((strlen(src) + 1) * 3 * sizeof(char));
285  	if (fPattern == NULL)
286  		return;
287  
288  	const char* srcPtr = src;
289  	char* dstPtr = fPattern;
290  
291  	// Put double quotes around the pattern, so separate
292  	// words are considered to be part of a single string.
293  	*dstPtr++ = '"';
294  
295  	while (*srcPtr != '\0') {
296  		char c = *srcPtr++;
297  
298  		// Put a backslash in front of characters
299  		// that should be escaped.
300  		if ((c == '.')  || (c == ',')
301  			||  (c == '[')  || (c == ']')
302  			||  (c == '?')  || (c == '*')
303  			||  (c == '+')  || (c == '-')
304  			||  (c == ':')  || (c == '^')
305  			||  (c == '"')	|| (c == '`')) {
306  			*dstPtr++ = '\\';
307  		} else if ((c == '\\') || (c == '$')) {
308  			// Some characters need to be escaped
309  			// with *three* backslashes in a row.
310  			*dstPtr++ = '\\';
311  			*dstPtr++ = '\\';
312  			*dstPtr++ = '\\';
313  		}
314  
315  		// Note: we do not have to escape the
316  		// { } ( ) < > and | characters.
317  
318  		*dstPtr++ = c;
319  	}
320  
321  	*dstPtr++ = '"';
322  	*dstPtr = '\0';
323  }
324  
325  
326  bool
327  Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
328  {
329  	char* copy = strdup(buffer);
330  	char* start = buffer;
331  	uint32 len = strlen(copy);
332  	bool result = true;
333  	for (uint32 count = 0; count < len; ++count) {
334  		if (copy[count] == '"' || copy[count] == '$')
335  			*buffer++ = '\\';
336  		if (buffer - start == bufferSize - 1) {
337  			result = false;
338  			break;
339  		}
340  		*buffer++ = copy[count];
341  	}
342  	*buffer = '\0';
343  	free(copy);
344  	return result;
345  }
346  
347