xref: /haiku/src/apps/text_search/Grepper.cpp (revision df3ac004ba00d875be84ec7853864b739a2292bf)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  * Copyright (c) 2008-2017, Haiku Inc.
4  * Distributed under the terms of the MIT license.
5  *
6  * Authors:
7  *      Matthijs Holleman
8  *      Stephan Aßmus <superstippi@gmx.de>
9  *      Philippe Houdoin
10  */
11 
12 #include "Grepper.h"
13 
14 #include <errno.h>
15 #include <new>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <sys/time.h>
21 
22 #include <Catalog.h>
23 #include <Directory.h>
24 #include <image.h>
25 #include <List.h>
26 #include <Locale.h>
27 #include <NodeInfo.h>
28 #include <OS.h>
29 #include <Path.h>
30 #include <UTF8.h>
31 
32 #include "FileIterator.h"
33 #include "Model.h"
34 
35 #undef B_TRANSLATION_CONTEXT
36 #define B_TRANSLATION_CONTEXT "Grepper"
37 
38 
39 const char* kEOFTag = "//EOF";
40 
41 
42 using std::nothrow;
43 
44 char*
45 strdup_to_utf8(uint32 encode, const char* src, int32 length)
46 {
47 	int32 srcLen = length;
48 	int32 dstLen = 2 * srcLen;
49 	// TODO: stippi: Why the duplicate copy? Why not just return
50 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51 	// enough space? Check return value of convert_to_utf8 and keep
52 	// converting if it didn't fit?
53 	char* dst = new (nothrow) char[dstLen + 1];
54 	if (dst == NULL)
55 		return NULL;
56 	int32 cookie = 0;
57 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58 	dst[dstLen] = '\0';
59 	char* dup = strdup(dst);
60 	delete[] dst;
61 	if (srcLen != length) {
62 		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
64 	}
65 	return dup;
66 }
67 
68 
69 char*
70 strdup_from_utf8(uint32 encode, const char* src, int32 length)
71 {
72 	int32 srcLen = length;
73 	int32 dstLen = srcLen;
74 	char* dst = new (nothrow) char[dstLen + 1];
75 	if (dst == NULL)
76 		return NULL;
77 	int32 cookie = 0;
78 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79 	// TODO: See above.
80 	dst[dstLen] = '\0';
81 	char* dup = strdup(dst);
82 	delete[] dst;
83 	if (srcLen != length) {
84 		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
86 	}
87 	return dup;
88 }
89 
90 
91 Grepper::Grepper(const char* pattern, const Model* model,
92 		const BHandler* target, FileIterator* iterator)
93 	: fPattern(NULL),
94 	  fTarget(target),
95 	  fRegularExpression(model->fRegularExpression),
96 	  fCaseSensitive(model->fCaseSensitive),
97 	  fEncoding(model->fEncoding),
98 
99 	  fIterator(iterator),
100 	  fRunnerThreadId(-1),
101 	  fXargsInput(-1),
102 	  fMustQuit(false)
103 {
104 	if (fEncoding > 0) {
105 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106 		_SetPattern(src);
107 		free(src);
108 	} else
109 		_SetPattern(pattern);
110 }
111 
112 
113 Grepper::~Grepper()
114 {
115 	Cancel();
116 	free(fPattern);
117 	delete fIterator;
118 }
119 
120 
121 bool
122 Grepper::IsValid() const
123 {
124 	if (fIterator == NULL || !fIterator->IsValid())
125 		return false;
126 	return fPattern != NULL;
127 }
128 
129 
130 void
131 Grepper::Start()
132 {
133 	Cancel();
134 
135 	fMustQuit = false;
136 	fRunnerThreadId = spawn_thread(
137 		_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
138 
139 	resume_thread(fRunnerThreadId);
140 }
141 
142 
143 void
144 Grepper::Cancel()
145 {
146 	if (fRunnerThreadId < 0)
147 		return;
148 
149 	fMustQuit = true;
150 	int32 exitValue;
151 	wait_for_thread(fRunnerThreadId, &exitValue);
152 	fRunnerThreadId = -1;
153 }
154 
155 
156 // #pragma mark - private
157 
158 
159 int32
160 Grepper::_SpawnWriterThread(void* cookie)
161 {
162 	Grepper* self = static_cast<Grepper*>(cookie);
163 	return self->_WriterThread();
164 }
165 
166 
167 int32
168 Grepper::_WriterThread()
169 {
170 	BMessage message;
171 	char fileName[B_PATH_NAME_LENGTH*2];
172 	int count = 0;
173 
174 	printf("paths_writer started.\n");
175 
176 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
177 
178 		message.MakeEmpty();
179 		message.what = MSG_REPORT_FILE_NAME;
180 		message.AddString("filename", fileName);
181 
182 		BEntry entry(fileName);
183 		entry_ref ref;
184 		entry.GetRef(&ref);
185 		if (!entry.Exists()) {
186 			if (fIterator->NotifyNegatives()) {
187 				message.what = MSG_REPORT_RESULT;
188 				message.AddRef("ref", &ref);
189 				fTarget.SendMessage(&message);
190 			}
191 			continue;
192 		}
193 
194 		if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
195 			char tempString[B_PATH_NAME_LENGTH + 32];
196 			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
197 				"the filename."), fileName);
198 			message.MakeEmpty();
199 			message.what = MSG_REPORT_ERROR;
200 			message.AddString("error", tempString);
201 			fTarget.SendMessage(&message);
202 			continue;
203 		}
204 
205 		// file exists, send it to xargs
206 		write(fXargsInput, fileName, strlen(fileName));
207 		write(fXargsInput, "\n", 1);
208 		// printf(">>>>>> %s\n", fileName);
209 
210 		fTarget.SendMessage(&message);
211 
212 		count++;
213 	}
214 
215 	write(fXargsInput, kEOFTag, strlen(kEOFTag));
216 	write(fXargsInput, "\n", 1);
217 	close(fXargsInput);
218 
219 	printf("paths_writer stopped (%d paths).\n", count);
220 
221 	return 0;
222 }
223 
224 
225 int32
226 Grepper::_SpawnRunnerThread(void* cookie)
227 {
228 	Grepper* self = static_cast<Grepper*>(cookie);
229 	return self->_RunnerThread();
230 }
231 
232 
233 int32
234 Grepper::_RunnerThread()
235 {
236 	BMessage message;
237 	char fileName[B_PATH_NAME_LENGTH];
238 
239 	const char* argv[32];
240 	int argc = 0;
241 	argv[argc++] = "xargs";
242 
243 	// can't use yet the --null mode due to pipe issue
244 	// the xargs stdin input pipe closure is not detected
245 	// by xargs. Instead, we use eof-string mode
246 
247 	// argv[argc++] = "--null";
248 	argv[argc++] = "-E";
249 	argv[argc++] = kEOFTag;
250 
251 	// Enable parallel mode
252 	// Retrieve cpu count for to parallel xargs via -P argument
253 	char cpuCount[8];
254 	system_info sys_info;
255 	get_system_info(&sys_info);
256 	snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
257 	argv[argc++] = "-P";
258 	argv[argc++] = cpuCount;
259 
260 	// grep command driven by xargs dispatcher
261 	argv[argc++] = "grep";
262 	argv[argc++] = "-n"; // need matching line(s) number(s)
263 	argv[argc++] = "-H"; // need filename prefix
264 	if (! fCaseSensitive)
265 		argv[argc++] = "-i";
266 	if (! fRegularExpression)
267 		argv[argc++] = "-F";	 // no a regexp: force fixed string,
268 	argv[argc++] = fPattern;
269 	argv[argc] = NULL;
270 
271 	// prepare xargs to run with stdin, stdout and stderr pipes
272 
273 	int oldStdIn, oldStdOut, oldStdErr;
274 	oldStdIn  = dup(STDIN_FILENO);
275 	oldStdOut = dup(STDOUT_FILENO);
276 	oldStdErr = dup(STDERR_FILENO);
277 
278 	int fds[2];
279 	if (pipe(fds) != 0) {
280 		message.MakeEmpty();
281 		message.what = MSG_REPORT_ERROR;
282 		message.AddString("error",
283 			B_TRANSLATE("Failed to open input pipe!"));
284 		fTarget.SendMessage(&message);
285 		return 0;
286 	}
287 	dup2(fds[0], STDIN_FILENO);
288 	close(fds[0]);
289 	fXargsInput = fds[1];	// write to in, appears on command's stdin
290 
291 	if (pipe(fds) != 0) {
292 		close(fXargsInput);
293 		message.MakeEmpty();
294 		message.what = MSG_REPORT_ERROR;
295 		message.AddString("error",
296 			B_TRANSLATE("Failed to open output pipe!"));
297 		fTarget.SendMessage(&message);
298 		return 0;
299 	}
300 	dup2(fds[1], STDOUT_FILENO);
301 	close(fds[1]);
302 	int out = fds[0]; // read from out, taken from command's stdout
303 
304 	if (pipe(fds) != 0) {
305 		close(fXargsInput);
306 		close(out);
307 		message.MakeEmpty();
308 		message.what = MSG_REPORT_ERROR;
309 		message.AddString("error",
310 			B_TRANSLATE("Failed to open errors pipe!"));
311 		fTarget.SendMessage(&message);
312 		return 0;
313 	}
314 	dup2(fds[1], STDERR_FILENO);
315 	close(fds[1]);
316 	int err = fds[0]; // read from err, taken from command's stderr
317 
318 	// "load" xargs tool
319 	thread_id xargsThread = load_image(argc, argv,
320 		const_cast<const char**>(environ));
321 	// xargsThread is suspended after loading
322 
323 	// restore our previous stdin, stdout and stderr
324 	close(STDIN_FILENO);
325 	dup(oldStdIn);
326 	close(oldStdIn);
327 	close(STDOUT_FILENO);
328 	dup(oldStdOut);
329 	close(oldStdOut);
330 	close(STDERR_FILENO);
331 	dup(oldStdErr);
332 	close(oldStdErr);
333 
334 	if (xargsThread < B_OK) {
335 		close(fXargsInput);
336 		close(out);
337 		close(err);
338 		message.MakeEmpty();
339 		message.what = MSG_REPORT_ERROR;
340 		message.AddString("error",
341 			B_TRANSLATE("Failed to start xargs program!"));
342 		fTarget.SendMessage(&message);
343 		return 0;
344 	}
345 
346 	// Listen on xargs's stdout and stderr via select()
347 	printf("Running: ");
348 	for (int i = 0; i < argc; i++) {
349 		printf("%s ", argv[i]);
350 	}
351 	printf("\n");
352 
353 	int fdl[2] = { out, err };
354 	int maxfd = 0;
355 	for (int i = 0; i < 2; i++) {
356 		if (maxfd < fdl[i])
357 			maxfd = fdl[i];
358 	}
359 
360 	fd_set readSet;
361     struct timeval timeout = { 0, 100000 };
362 	char line[B_PATH_NAME_LENGTH * 2];
363 
364 	FILE* output = fdopen(out, "r");
365 	FILE* errors = fdopen(err, "r");
366 
367 	char currentFileName[B_PATH_NAME_LENGTH];
368 	currentFileName[0] = '\0';
369 	bool canReadOutput, canReadErrors;
370 	canReadOutput = canReadErrors = true;
371 
372 	thread_id writerThread = spawn_thread(_SpawnWriterThread,
373 		"Grep writer", B_LOW_PRIORITY, this);
374 	set_thread_priority(xargsThread, B_LOW_PRIORITY);
375 
376 	// we're ready, let's go!
377 	resume_thread(xargsThread);
378 	resume_thread(writerThread);
379 
380 	while (!fMustQuit && (canReadOutput || canReadErrors)) {
381 		FD_ZERO(&readSet);
382 		if (canReadOutput) {
383 			FD_SET(out, &readSet);
384 		}
385 		if (canReadErrors) {
386 			FD_SET(err, &readSet);
387 		}
388 
389 		int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
390 		if (result == -1 && errno == EINTR)
391 			continue;
392 		if (result == 0) {
393 			// timeout, but meanwhile fMustQuit was changed maybe...
394 			continue;
395 		}
396 		if (result < 0) {
397 			perror("select():");
398 			message.MakeEmpty();
399 			message.what = MSG_REPORT_ERROR;
400 			message.AddString("error", strerror(errno));
401 			fTarget.SendMessage(&message);
402 			break;
403 		}
404 
405 		if (canReadOutput && FD_ISSET(out, &readSet)) {
406 			if (fgets(line, sizeof(line), output) != NULL) {
407 				// parse grep output
408 				int lineNumber = -1;
409 				int textPos = -1;
410 				sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
411 				// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
412 				//		lineNumber, textPos);
413 				if (textPos > 0) {
414 					if (strcmp(fileName, currentFileName) != 0) {
415 						fTarget.SendMessage(&message);
416 
417 						strncpy(currentFileName, fileName,
418 							sizeof(currentFileName));
419 
420 						message.MakeEmpty();
421 						message.what = MSG_REPORT_RESULT;
422 						message.AddString("filename", fileName);
423 
424 						BEntry entry(fileName);
425 						entry_ref ref;
426 						entry.GetRef(&ref);
427 						message.AddRef("ref", &ref);
428 					}
429 
430 					char* text = &line[strlen(fileName)+1];
431 					// printf("[%s] %s", fileName, text);
432 					if (fEncoding > 0) {
433 						char* tempdup = strdup_to_utf8(fEncoding, text,
434 							strlen(text));
435 						message.AddString("text", tempdup);
436 						free(tempdup);
437 					} else {
438 						message.AddString("text", text);
439 					}
440 					message.AddInt32("line", lineNumber);
441 				}
442 			} else {
443 				canReadOutput = false;
444 			}
445 		}
446 		if (canReadErrors && FD_ISSET(err, &readSet)) {
447 			if (fgets(line, sizeof(line), errors) != NULL) {
448 				// printf("ERROR: %s", line);
449 				if (message.HasString("text"))
450 					fTarget.SendMessage(&message);
451 				currentFileName[0] = '\0';
452 
453 				message.MakeEmpty();
454 				message.what = MSG_REPORT_ERROR;
455 				message.AddString("error", line);
456 				fTarget.SendMessage(&message);
457 			} else {
458 				canReadErrors = false;
459 			}
460 		}
461 	}
462 
463 	// send last pending message, if any
464 	if (message.HasString("text"))
465 		fTarget.SendMessage(&message);
466 
467 	printf("Done.\n");
468 	fclose(output);
469 	fclose(errors);
470 
471 	close(out);
472 	close(err);
473 
474 	fMustQuit = true;
475 	int32 exitValue;
476 	wait_for_thread(xargsThread, &exitValue);
477 	wait_for_thread(writerThread, &exitValue);
478 
479 	message.MakeEmpty();
480 	message.what = MSG_SEARCH_FINISHED;
481 	fTarget.SendMessage(&message);
482 
483 	return 0;
484 }
485 
486 
487 void
488 Grepper::_SetPattern(const char* src)
489 {
490 	if (src == NULL)
491 		return;
492 
493 	fPattern = strdup(src);
494 }
495 
496 
497 bool
498 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
499 {
500 	char* copy = strdup(buffer);
501 	char* start = buffer;
502 	uint32 len = strlen(copy);
503 	bool result = true;
504 	for (uint32 count = 0; count < len; ++count) {
505 		if (copy[count] == '\'' || copy[count] == '\\'
506 			|| copy[count] == ' ' || copy[count] == '\n'
507 			|| copy[count] == '"')
508 			*buffer++ = '\\';
509 		if (buffer - start == bufferSize - 1) {
510 			result = false;
511 			break;
512 		}
513 		*buffer++ = copy[count];
514 	}
515 	*buffer = '\0';
516 	free(copy);
517 	return result;
518 }
519