xref: /haiku/src/apps/text_search/Grepper.cpp (revision adcf5b05a8ca9e17407aa4640675c3873c9f0a6c)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  * Copyright (c) 2008-2017, Haiku Inc.
4  * Distributed under the terms of the MIT license.
5  *
6  * Authors:
7  *      Matthijs Holleman
8  *      Stephan Aßmus <superstippi@gmx.de>
9  *      Philippe Houdoin
10  */
11 
12 #include "Grepper.h"
13 
14 #include <errno.h>
15 #include <new>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <sys/time.h>
21 
22 #include <Catalog.h>
23 #include <Directory.h>
24 #include <image.h>
25 #include <List.h>
26 #include <Locale.h>
27 #include <NodeInfo.h>
28 #include <OS.h>
29 #include <Path.h>
30 #include <UTF8.h>
31 
32 #include "FileIterator.h"
33 #include "Model.h"
34 
35 #undef B_TRANSLATION_CONTEXT
36 #define B_TRANSLATION_CONTEXT "Grepper"
37 
38 
39 const char* kEOFTag = "//EOF";
40 
41 
42 using std::nothrow;
43 
44 char*
45 strdup_to_utf8(uint32 encode, const char* src, int32 length)
46 {
47 	int32 srcLen = length;
48 	int32 dstLen = 2 * srcLen;
49 	// TODO: stippi: Why the duplicate copy? Why not just return
50 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51 	// enough space? Check return value of convert_to_utf8 and keep
52 	// converting if it didn't fit?
53 	char* dst = new (nothrow) char[dstLen + 1];
54 	if (dst == NULL)
55 		return NULL;
56 	int32 cookie = 0;
57 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58 	dst[dstLen] = '\0';
59 	char* dup = strdup(dst);
60 	delete[] dst;
61 	if (srcLen != length) {
62 		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
64 	}
65 	return dup;
66 }
67 
68 
69 char*
70 strdup_from_utf8(uint32 encode, const char* src, int32 length)
71 {
72 	int32 srcLen = length;
73 	int32 dstLen = srcLen;
74 	char* dst = new (nothrow) char[dstLen + 1];
75 	if (dst == NULL)
76 		return NULL;
77 	int32 cookie = 0;
78 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79 	// TODO: See above.
80 	dst[dstLen] = '\0';
81 	char* dup = strdup(dst);
82 	delete[] dst;
83 	if (srcLen != length) {
84 		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
86 	}
87 	return dup;
88 }
89 
90 
91 Grepper::Grepper(const char* pattern, const Model* model,
92 		const BHandler* target, FileIterator* iterator)
93 	: fPattern(NULL),
94 	  fTarget(target),
95 	  fRegularExpression(model->fRegularExpression),
96 	  fCaseSensitive(model->fCaseSensitive),
97 	  fEncoding(model->fEncoding),
98 
99 	  fIterator(iterator),
100 	  fRunnerThreadId(-1),
101 	  fXargsInput(-1),
102 	  fMustQuit(false)
103 {
104 	if (fEncoding > 0) {
105 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106 		_SetPattern(src);
107 		free(src);
108 	} else
109 		_SetPattern(pattern);
110 }
111 
112 
113 Grepper::~Grepper()
114 {
115 	Cancel();
116 	free(fPattern);
117 	delete fIterator;
118 }
119 
120 
121 bool
122 Grepper::IsValid() const
123 {
124 	if (fIterator == NULL || !fIterator->IsValid())
125 		return false;
126 	return fPattern != NULL;
127 }
128 
129 
130 void
131 Grepper::Start()
132 {
133 	Cancel();
134 
135 	fMustQuit = false;
136 	fRunnerThreadId = spawn_thread(
137 		_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
138 
139 	resume_thread(fRunnerThreadId);
140 }
141 
142 
143 void
144 Grepper::Cancel()
145 {
146 	if (fRunnerThreadId < 0)
147 		return;
148 
149 	fMustQuit = true;
150 	int32 exitValue;
151 	wait_for_thread(fRunnerThreadId, &exitValue);
152 	fRunnerThreadId = -1;
153 }
154 
155 
156 // #pragma mark - private
157 
158 
159 int32
160 Grepper::_SpawnWriterThread(void* cookie)
161 {
162 	Grepper* self = static_cast<Grepper*>(cookie);
163 	return self->_WriterThread();
164 }
165 
166 
167 int32
168 Grepper::_WriterThread()
169 {
170 	BMessage message;
171 	char fileName[B_PATH_NAME_LENGTH*2];
172 	int count = 0;
173 	bigtime_t lastProgressReportTime = 0, now;
174 
175 	printf("paths_writer started.\n");
176 
177 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
178 		BEntry entry(fileName);
179 		entry_ref ref;
180 		entry.GetRef(&ref);
181 		if (!entry.Exists()) {
182 			if (fIterator->NotifyNegatives()) {
183 				message.MakeEmpty();
184 				message.what = MSG_REPORT_RESULT;
185 				message.AddString("filename", fileName);
186 				message.AddRef("ref", &ref);
187 				fTarget.SendMessage(&message);
188 			}
189 			continue;
190 		}
191 
192 		if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
193 			char tempString[B_PATH_NAME_LENGTH + 32];
194 			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
195 				"the filename."), fileName);
196 			message.MakeEmpty();
197 			message.what = MSG_REPORT_ERROR;
198 			message.AddString("error", tempString);
199 			fTarget.SendMessage(&message);
200 			continue;
201 		}
202 
203 		count++;
204 
205 		// file exists, send it to xargs
206 		write(fXargsInput, fileName, strlen(fileName));
207 		write(fXargsInput, "\n", 1);
208 
209 		now = system_time();
210 		// to avoid message flood,
211 		// report progress no more than 20 times per second
212 		if (now - lastProgressReportTime > 50000) {
213 			message.MakeEmpty();
214 			message.what = MSG_REPORT_FILE_NAME;
215 			message.AddString("filename", fileName);
216 			fTarget.SendMessage(&message);
217 			lastProgressReportTime = now;
218 		}
219 	}
220 
221 	write(fXargsInput, kEOFTag, strlen(kEOFTag));
222 	write(fXargsInput, "\n", 1);
223 	close(fXargsInput);
224 
225 	printf("paths_writer stopped (%d paths).\n", count);
226 
227 	return 0;
228 }
229 
230 
231 int32
232 Grepper::_SpawnRunnerThread(void* cookie)
233 {
234 	Grepper* self = static_cast<Grepper*>(cookie);
235 	return self->_RunnerThread();
236 }
237 
238 
239 int32
240 Grepper::_RunnerThread()
241 {
242 	BMessage message;
243 	char fileName[B_PATH_NAME_LENGTH];
244 
245 	const char* argv[32];
246 	int argc = 0;
247 	argv[argc++] = "xargs";
248 
249 	// can't use yet the --null mode due to pipe issue
250 	// the xargs stdin input pipe closure is not detected
251 	// by xargs. Instead, we use eof-string mode
252 
253 	// argv[argc++] = "--null";
254 	argv[argc++] = "-E";
255 	argv[argc++] = kEOFTag;
256 
257 	// Enable parallel mode
258 	// Retrieve cpu count for to parallel xargs via -P argument
259 	char cpuCount[8];
260 	system_info sys_info;
261 	get_system_info(&sys_info);
262 	snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
263 	argv[argc++] = "-P";
264 	argv[argc++] = cpuCount;
265 
266 	// grep command driven by xargs dispatcher
267 	argv[argc++] = "grep";
268 	argv[argc++] = "-n"; // need matching line(s) number(s)
269 	argv[argc++] = "-H"; // need filename prefix
270 	if (! fCaseSensitive)
271 		argv[argc++] = "-i";
272 	if (! fRegularExpression)
273 		argv[argc++] = "-F";	 // no a regexp: force fixed string,
274 	// Add double dash argument to tell grep
275 	// it's the end of commands options
276 	argv[argc++] = "--";
277 	argv[argc++] = fPattern;
278 	argv[argc] = NULL;
279 
280 	// prepare xargs to run with stdin, stdout and stderr pipes
281 
282 	int oldStdIn, oldStdOut, oldStdErr;
283 	oldStdIn  = dup(STDIN_FILENO);
284 	oldStdOut = dup(STDOUT_FILENO);
285 	oldStdErr = dup(STDERR_FILENO);
286 
287 	int fds[2];
288 	if (pipe(fds) != 0) {
289 		message.MakeEmpty();
290 		message.what = MSG_REPORT_ERROR;
291 		message.AddString("error",
292 			B_TRANSLATE("Failed to open input pipe!"));
293 		fTarget.SendMessage(&message);
294 		return 0;
295 	}
296 	dup2(fds[0], STDIN_FILENO);
297 	close(fds[0]);
298 	fXargsInput = fds[1];	// write to in, appears on command's stdin
299 
300 	if (pipe(fds) != 0) {
301 		close(fXargsInput);
302 		message.MakeEmpty();
303 		message.what = MSG_REPORT_ERROR;
304 		message.AddString("error",
305 			B_TRANSLATE("Failed to open output pipe!"));
306 		fTarget.SendMessage(&message);
307 		return 0;
308 	}
309 	dup2(fds[1], STDOUT_FILENO);
310 	close(fds[1]);
311 	int out = fds[0]; // read from out, taken from command's stdout
312 
313 	if (pipe(fds) != 0) {
314 		close(fXargsInput);
315 		close(out);
316 		message.MakeEmpty();
317 		message.what = MSG_REPORT_ERROR;
318 		message.AddString("error",
319 			B_TRANSLATE("Failed to open errors pipe!"));
320 		fTarget.SendMessage(&message);
321 		return 0;
322 	}
323 	dup2(fds[1], STDERR_FILENO);
324 	close(fds[1]);
325 	int err = fds[0]; // read from err, taken from command's stderr
326 
327 	// "load" xargs tool
328 	thread_id xargsThread = load_image(argc, argv,
329 		const_cast<const char**>(environ));
330 	// xargsThread is suspended after loading
331 
332 	// restore our previous stdin, stdout and stderr
333 	close(STDIN_FILENO);
334 	dup(oldStdIn);
335 	close(oldStdIn);
336 	close(STDOUT_FILENO);
337 	dup(oldStdOut);
338 	close(oldStdOut);
339 	close(STDERR_FILENO);
340 	dup(oldStdErr);
341 	close(oldStdErr);
342 
343 	if (xargsThread < B_OK) {
344 		close(fXargsInput);
345 		close(out);
346 		close(err);
347 		message.MakeEmpty();
348 		message.what = MSG_REPORT_ERROR;
349 		message.AddString("error",
350 			B_TRANSLATE("Failed to start xargs program!"));
351 		fTarget.SendMessage(&message);
352 		return 0;
353 	}
354 
355 	// Listen on xargs's stdout and stderr via select()
356 	printf("Running: ");
357 	for (int i = 0; i < argc; i++) {
358 		printf("%s ", argv[i]);
359 	}
360 	printf("\n");
361 
362 	int fdl[2] = { out, err };
363 	int maxfd = 0;
364 	for (int i = 0; i < 2; i++) {
365 		if (maxfd < fdl[i])
366 			maxfd = fdl[i];
367 	}
368 
369 	fd_set readSet;
370 	struct timeval timeout = { 0, 100000 };
371 	char line[B_PATH_NAME_LENGTH * 2];
372 
373 	FILE* output = fdopen(out, "r");
374 	FILE* errors = fdopen(err, "r");
375 
376 	char currentFileName[B_PATH_NAME_LENGTH];
377 	currentFileName[0] = '\0';
378 	bool canReadOutput, canReadErrors;
379 	canReadOutput = canReadErrors = true;
380 
381 	thread_id writerThread = spawn_thread(_SpawnWriterThread,
382 		"Grep writer", B_LOW_PRIORITY, this);
383 	set_thread_priority(xargsThread, B_LOW_PRIORITY);
384 
385 	// we're ready, let's go!
386 	resume_thread(xargsThread);
387 	resume_thread(writerThread);
388 
389 	while (!fMustQuit && (canReadOutput || canReadErrors)) {
390 		FD_ZERO(&readSet);
391 		if (canReadOutput) {
392 			FD_SET(out, &readSet);
393 		}
394 		if (canReadErrors) {
395 			FD_SET(err, &readSet);
396 		}
397 
398 		int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
399 		if (result == -1 && errno == EINTR)
400 			continue;
401 		if (result == 0) {
402 			// timeout, but meanwhile fMustQuit was changed maybe...
403 			continue;
404 		}
405 		if (result < 0) {
406 			perror("select():");
407 			message.MakeEmpty();
408 			message.what = MSG_REPORT_ERROR;
409 			message.AddString("error", strerror(errno));
410 			fTarget.SendMessage(&message);
411 			break;
412 		}
413 
414 		if (canReadOutput && FD_ISSET(out, &readSet)) {
415 			if (fgets(line, sizeof(line), output) != NULL) {
416 				// parse grep output
417 				int lineNumber = -1;
418 				int textPos = -1;
419 				sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
420 				// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
421 				//		lineNumber, textPos);
422 				if (textPos > 0) {
423 					if (strcmp(fileName, currentFileName) != 0) {
424 						fTarget.SendMessage(&message);
425 
426 						strncpy(currentFileName, fileName,
427 							sizeof(currentFileName));
428 
429 						message.MakeEmpty();
430 						message.what = MSG_REPORT_RESULT;
431 						message.AddString("filename", fileName);
432 
433 						BEntry entry(fileName);
434 						entry_ref ref;
435 						entry.GetRef(&ref);
436 						message.AddRef("ref", &ref);
437 					}
438 
439 					char* text = &line[strlen(fileName)+1];
440 					// printf("[%s] %s", fileName, text);
441 					if (fEncoding > 0) {
442 						char* tempdup = strdup_to_utf8(fEncoding, text,
443 							strlen(text));
444 						message.AddString("text", tempdup);
445 						free(tempdup);
446 					} else {
447 						message.AddString("text", text);
448 					}
449 					message.AddInt32("line", lineNumber);
450 				}
451 			} else {
452 				canReadOutput = false;
453 			}
454 		}
455 		if (canReadErrors && FD_ISSET(err, &readSet)) {
456 			if (fgets(line, sizeof(line), errors) != NULL) {
457 				// printf("ERROR: %s", line);
458 				if (message.HasString("text"))
459 					fTarget.SendMessage(&message);
460 				currentFileName[0] = '\0';
461 
462 				message.MakeEmpty();
463 				message.what = MSG_REPORT_ERROR;
464 				message.AddString("error", line);
465 				fTarget.SendMessage(&message);
466 			} else {
467 				canReadErrors = false;
468 			}
469 		}
470 	}
471 
472 	// send last pending message, if any
473 	if (message.HasString("text"))
474 		fTarget.SendMessage(&message);
475 
476 	printf("Done.\n");
477 	fclose(output);
478 	fclose(errors);
479 
480 	close(out);
481 	close(err);
482 
483 	fMustQuit = true;
484 	int32 exitValue;
485 	wait_for_thread(xargsThread, &exitValue);
486 	wait_for_thread(writerThread, &exitValue);
487 
488 	message.MakeEmpty();
489 	message.what = MSG_SEARCH_FINISHED;
490 	fTarget.SendMessage(&message);
491 
492 	return 0;
493 }
494 
495 
496 void
497 Grepper::_SetPattern(const char* src)
498 {
499 	if (src == NULL)
500 		return;
501 
502 	fPattern = strdup(src);
503 }
504 
505 
506 bool
507 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
508 {
509 	char* copy = strdup(buffer);
510 	char* start = buffer;
511 	uint32 len = strlen(copy);
512 	bool result = true;
513 	for (uint32 count = 0; count < len; ++count) {
514 		if (copy[count] == '\'' || copy[count] == '\\'
515 			|| copy[count] == ' ' || copy[count] == '\n'
516 			|| copy[count] == '"')
517 			*buffer++ = '\\';
518 		if (buffer - start == bufferSize - 1) {
519 			result = false;
520 			break;
521 		}
522 		*buffer++ = copy[count];
523 	}
524 	*buffer = '\0';
525 	free(copy);
526 	return result;
527 }
528