xref: /haiku/src/apps/text_search/Grepper.cpp (revision 54624bda43f13312a491fe1c91d22834be3374f5)
1 /*
2  * Copyright (c) 1998-2007 Matthijs Hollemans
3  * Copyright (c) 2008-2017, Haiku Inc.
4  * Distributed under the terms of the MIT license.
5  *
6  * Authors:
7  *      Matthijs Holleman
8  *      Stephan Aßmus <superstippi@gmx.de>
9  *      Philippe Houdoin
10  */
11 
12 #include "Grepper.h"
13 
14 #include <errno.h>
15 #include <new>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <sys/time.h>
21 
22 #include <Catalog.h>
23 #include <Directory.h>
24 #include <image.h>
25 #include <List.h>
26 #include <Locale.h>
27 #include <NodeInfo.h>
28 #include <OS.h>
29 #include <Path.h>
30 #include <UTF8.h>
31 
32 #include "FileIterator.h"
33 #include "Model.h"
34 
35 #undef B_TRANSLATION_CONTEXT
36 #define B_TRANSLATION_CONTEXT "Grepper"
37 
38 
39 const char* kEOFTag = "//EOF";
40 
41 
42 using std::nothrow;
43 
44 char*
45 strdup_to_utf8(uint32 encode, const char* src, int32 length)
46 {
47 	int32 srcLen = length;
48 	int32 dstLen = 2 * srcLen;
49 	// TODO: stippi: Why the duplicate copy? Why not just return
50 	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51 	// enough space? Check return value of convert_to_utf8 and keep
52 	// converting if it didn't fit?
53 	char* dst = new (nothrow) char[dstLen + 1];
54 	if (dst == NULL)
55 		return NULL;
56 	int32 cookie = 0;
57 	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58 	dst[dstLen] = '\0';
59 	char* dup = strdup(dst);
60 	delete[] dst;
61 	if (srcLen != length) {
62 		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
64 	}
65 	return dup;
66 }
67 
68 
69 char*
70 strdup_from_utf8(uint32 encode, const char* src, int32 length)
71 {
72 	int32 srcLen = length;
73 	int32 dstLen = srcLen;
74 	char* dst = new (nothrow) char[dstLen + 1];
75 	if (dst == NULL)
76 		return NULL;
77 	int32 cookie = 0;
78 	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79 	// TODO: See above.
80 	dst[dstLen] = '\0';
81 	char* dup = strdup(dst);
82 	delete[] dst;
83 	if (srcLen != length) {
84 		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85 			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
86 	}
87 	return dup;
88 }
89 
90 
91 Grepper::Grepper(const char* pattern, const Model* model,
92 		const BHandler* target, FileIterator* iterator)
93 	: fPattern(NULL),
94 	  fTarget(target),
95 	  fRegularExpression(model->fRegularExpression),
96 	  fCaseSensitive(model->fCaseSensitive),
97 	  fEncoding(model->fEncoding),
98 
99 	  fIterator(iterator),
100 	  fRunnerThreadId(-1),
101 	  fXargsInput(-1),
102 	  fMustQuit(false)
103 {
104 	if (fEncoding > 0) {
105 		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106 		_SetPattern(src);
107 		free(src);
108 	} else
109 		_SetPattern(pattern);
110 }
111 
112 
113 Grepper::~Grepper()
114 {
115 	Cancel();
116 	free(fPattern);
117 	delete fIterator;
118 }
119 
120 
121 bool
122 Grepper::IsValid() const
123 {
124 	if (fIterator == NULL || !fIterator->IsValid())
125 		return false;
126 	return fPattern != NULL;
127 }
128 
129 
130 void
131 Grepper::Start()
132 {
133 	Cancel();
134 
135 	fMustQuit = false;
136 	fRunnerThreadId = spawn_thread(
137 		_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
138 
139 	resume_thread(fRunnerThreadId);
140 }
141 
142 
143 void
144 Grepper::Cancel()
145 {
146 	if (fRunnerThreadId < 0)
147 		return;
148 
149 	fMustQuit = true;
150 	int32 exitValue;
151 	wait_for_thread(fRunnerThreadId, &exitValue);
152 	fRunnerThreadId = -1;
153 }
154 
155 
156 // #pragma mark - private
157 
158 
159 int32
160 Grepper::_SpawnWriterThread(void* cookie)
161 {
162 	Grepper* self = static_cast<Grepper*>(cookie);
163 	return self->_WriterThread();
164 }
165 
166 
167 int32
168 Grepper::_WriterThread()
169 {
170 	BMessage message;
171 	char fileName[B_PATH_NAME_LENGTH*2];
172 	int count = 0;
173 	bigtime_t lastProgressReportTime = 0, now;
174 
175 	printf("paths_writer started.\n");
176 
177 	while (!fMustQuit && fIterator->GetNextName(fileName)) {
178 		BEntry entry(fileName);
179 		entry_ref ref;
180 		entry.GetRef(&ref);
181 		if (!entry.Exists()) {
182 			if (fIterator->NotifyNegatives()) {
183 				message.MakeEmpty();
184 				message.what = MSG_REPORT_RESULT;
185 				message.AddString("filename", fileName);
186 				message.AddRef("ref", &ref);
187 				fTarget.SendMessage(&message);
188 			}
189 			continue;
190 		}
191 
192 		if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
193 			char tempString[B_PATH_NAME_LENGTH + 32];
194 			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
195 				"the filename."), fileName);
196 			message.MakeEmpty();
197 			message.what = MSG_REPORT_ERROR;
198 			message.AddString("error", tempString);
199 			fTarget.SendMessage(&message);
200 			continue;
201 		}
202 
203 		count++;
204 
205 		// file exists, send it to xargs
206 		write(fXargsInput, fileName, strlen(fileName));
207 		write(fXargsInput, "\n", 1);
208 
209 		now = system_time();
210 		// to avoid message flood,
211 		// report progress no more than 20 times per second
212 		if (now - lastProgressReportTime > 50000) {
213 			message.MakeEmpty();
214 			message.what = MSG_REPORT_FILE_NAME;
215 			message.AddString("filename", fileName);
216 			fTarget.SendMessage(&message);
217 			lastProgressReportTime = now;
218 		}
219 	}
220 
221 	write(fXargsInput, kEOFTag, strlen(kEOFTag));
222 	write(fXargsInput, "\n", 1);
223 	close(fXargsInput);
224 
225 	printf("paths_writer stopped (%d paths).\n", count);
226 
227 	return 0;
228 }
229 
230 
231 int32
232 Grepper::_SpawnRunnerThread(void* cookie)
233 {
234 	Grepper* self = static_cast<Grepper*>(cookie);
235 	return self->_RunnerThread();
236 }
237 
238 
239 int32
240 Grepper::_RunnerThread()
241 {
242 	BMessage message;
243 	char fileName[B_PATH_NAME_LENGTH];
244 
245 	const char* argv[32];
246 	int argc = 0;
247 	argv[argc++] = "xargs";
248 
249 	// can't use yet the --null mode due to pipe issue
250 	// the xargs stdin input pipe closure is not detected
251 	// by xargs. Instead, we use eof-string mode
252 
253 	// argv[argc++] = "--null";
254 	argv[argc++] = "-E";
255 	argv[argc++] = kEOFTag;
256 
257 	// Enable parallel mode
258 	// Retrieve cpu count for to parallel xargs via -P argument
259 	char cpuCount[8];
260 	system_info sys_info;
261 	get_system_info(&sys_info);
262 	snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
263 	argv[argc++] = "-P";
264 	argv[argc++] = cpuCount;
265 
266 	// grep command driven by xargs dispatcher
267 	argv[argc++] = "grep";
268 	argv[argc++] = "-n"; // need matching line(s) number(s)
269 	argv[argc++] = "-H"; // need filename prefix
270 	if (! fCaseSensitive)
271 		argv[argc++] = "-i";
272 	if (! fRegularExpression)
273 		argv[argc++] = "-F";	 // no a regexp: force fixed string,
274 	argv[argc++] = fPattern;
275 	argv[argc] = NULL;
276 
277 	// prepare xargs to run with stdin, stdout and stderr pipes
278 
279 	int oldStdIn, oldStdOut, oldStdErr;
280 	oldStdIn  = dup(STDIN_FILENO);
281 	oldStdOut = dup(STDOUT_FILENO);
282 	oldStdErr = dup(STDERR_FILENO);
283 
284 	int fds[2];
285 	if (pipe(fds) != 0) {
286 		message.MakeEmpty();
287 		message.what = MSG_REPORT_ERROR;
288 		message.AddString("error",
289 			B_TRANSLATE("Failed to open input pipe!"));
290 		fTarget.SendMessage(&message);
291 		return 0;
292 	}
293 	dup2(fds[0], STDIN_FILENO);
294 	close(fds[0]);
295 	fXargsInput = fds[1];	// write to in, appears on command's stdin
296 
297 	if (pipe(fds) != 0) {
298 		close(fXargsInput);
299 		message.MakeEmpty();
300 		message.what = MSG_REPORT_ERROR;
301 		message.AddString("error",
302 			B_TRANSLATE("Failed to open output pipe!"));
303 		fTarget.SendMessage(&message);
304 		return 0;
305 	}
306 	dup2(fds[1], STDOUT_FILENO);
307 	close(fds[1]);
308 	int out = fds[0]; // read from out, taken from command's stdout
309 
310 	if (pipe(fds) != 0) {
311 		close(fXargsInput);
312 		close(out);
313 		message.MakeEmpty();
314 		message.what = MSG_REPORT_ERROR;
315 		message.AddString("error",
316 			B_TRANSLATE("Failed to open errors pipe!"));
317 		fTarget.SendMessage(&message);
318 		return 0;
319 	}
320 	dup2(fds[1], STDERR_FILENO);
321 	close(fds[1]);
322 	int err = fds[0]; // read from err, taken from command's stderr
323 
324 	// "load" xargs tool
325 	thread_id xargsThread = load_image(argc, argv,
326 		const_cast<const char**>(environ));
327 	// xargsThread is suspended after loading
328 
329 	// restore our previous stdin, stdout and stderr
330 	close(STDIN_FILENO);
331 	dup(oldStdIn);
332 	close(oldStdIn);
333 	close(STDOUT_FILENO);
334 	dup(oldStdOut);
335 	close(oldStdOut);
336 	close(STDERR_FILENO);
337 	dup(oldStdErr);
338 	close(oldStdErr);
339 
340 	if (xargsThread < B_OK) {
341 		close(fXargsInput);
342 		close(out);
343 		close(err);
344 		message.MakeEmpty();
345 		message.what = MSG_REPORT_ERROR;
346 		message.AddString("error",
347 			B_TRANSLATE("Failed to start xargs program!"));
348 		fTarget.SendMessage(&message);
349 		return 0;
350 	}
351 
352 	// Listen on xargs's stdout and stderr via select()
353 	printf("Running: ");
354 	for (int i = 0; i < argc; i++) {
355 		printf("%s ", argv[i]);
356 	}
357 	printf("\n");
358 
359 	int fdl[2] = { out, err };
360 	int maxfd = 0;
361 	for (int i = 0; i < 2; i++) {
362 		if (maxfd < fdl[i])
363 			maxfd = fdl[i];
364 	}
365 
366 	fd_set readSet;
367 	struct timeval timeout = { 0, 100000 };
368 	char line[B_PATH_NAME_LENGTH * 2];
369 
370 	FILE* output = fdopen(out, "r");
371 	FILE* errors = fdopen(err, "r");
372 
373 	char currentFileName[B_PATH_NAME_LENGTH];
374 	currentFileName[0] = '\0';
375 	bool canReadOutput, canReadErrors;
376 	canReadOutput = canReadErrors = true;
377 
378 	thread_id writerThread = spawn_thread(_SpawnWriterThread,
379 		"Grep writer", B_LOW_PRIORITY, this);
380 	set_thread_priority(xargsThread, B_LOW_PRIORITY);
381 
382 	// we're ready, let's go!
383 	resume_thread(xargsThread);
384 	resume_thread(writerThread);
385 
386 	while (!fMustQuit && (canReadOutput || canReadErrors)) {
387 		FD_ZERO(&readSet);
388 		if (canReadOutput) {
389 			FD_SET(out, &readSet);
390 		}
391 		if (canReadErrors) {
392 			FD_SET(err, &readSet);
393 		}
394 
395 		int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
396 		if (result == -1 && errno == EINTR)
397 			continue;
398 		if (result == 0) {
399 			// timeout, but meanwhile fMustQuit was changed maybe...
400 			continue;
401 		}
402 		if (result < 0) {
403 			perror("select():");
404 			message.MakeEmpty();
405 			message.what = MSG_REPORT_ERROR;
406 			message.AddString("error", strerror(errno));
407 			fTarget.SendMessage(&message);
408 			break;
409 		}
410 
411 		if (canReadOutput && FD_ISSET(out, &readSet)) {
412 			if (fgets(line, sizeof(line), output) != NULL) {
413 				// parse grep output
414 				int lineNumber = -1;
415 				int textPos = -1;
416 				sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
417 				// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
418 				//		lineNumber, textPos);
419 				if (textPos > 0) {
420 					if (strcmp(fileName, currentFileName) != 0) {
421 						fTarget.SendMessage(&message);
422 
423 						strncpy(currentFileName, fileName,
424 							sizeof(currentFileName));
425 
426 						message.MakeEmpty();
427 						message.what = MSG_REPORT_RESULT;
428 						message.AddString("filename", fileName);
429 
430 						BEntry entry(fileName);
431 						entry_ref ref;
432 						entry.GetRef(&ref);
433 						message.AddRef("ref", &ref);
434 					}
435 
436 					char* text = &line[strlen(fileName)+1];
437 					// printf("[%s] %s", fileName, text);
438 					if (fEncoding > 0) {
439 						char* tempdup = strdup_to_utf8(fEncoding, text,
440 							strlen(text));
441 						message.AddString("text", tempdup);
442 						free(tempdup);
443 					} else {
444 						message.AddString("text", text);
445 					}
446 					message.AddInt32("line", lineNumber);
447 				}
448 			} else {
449 				canReadOutput = false;
450 			}
451 		}
452 		if (canReadErrors && FD_ISSET(err, &readSet)) {
453 			if (fgets(line, sizeof(line), errors) != NULL) {
454 				// printf("ERROR: %s", line);
455 				if (message.HasString("text"))
456 					fTarget.SendMessage(&message);
457 				currentFileName[0] = '\0';
458 
459 				message.MakeEmpty();
460 				message.what = MSG_REPORT_ERROR;
461 				message.AddString("error", line);
462 				fTarget.SendMessage(&message);
463 			} else {
464 				canReadErrors = false;
465 			}
466 		}
467 	}
468 
469 	// send last pending message, if any
470 	if (message.HasString("text"))
471 		fTarget.SendMessage(&message);
472 
473 	printf("Done.\n");
474 	fclose(output);
475 	fclose(errors);
476 
477 	close(out);
478 	close(err);
479 
480 	fMustQuit = true;
481 	int32 exitValue;
482 	wait_for_thread(xargsThread, &exitValue);
483 	wait_for_thread(writerThread, &exitValue);
484 
485 	message.MakeEmpty();
486 	message.what = MSG_SEARCH_FINISHED;
487 	fTarget.SendMessage(&message);
488 
489 	return 0;
490 }
491 
492 
493 void
494 Grepper::_SetPattern(const char* src)
495 {
496 	if (src == NULL)
497 		return;
498 
499 	fPattern = strdup(src);
500 }
501 
502 
503 bool
504 Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
505 {
506 	char* copy = strdup(buffer);
507 	char* start = buffer;
508 	uint32 len = strlen(copy);
509 	bool result = true;
510 	for (uint32 count = 0; count < len; ++count) {
511 		if (copy[count] == '\'' || copy[count] == '\\'
512 			|| copy[count] == ' ' || copy[count] == '\n'
513 			|| copy[count] == '"')
514 			*buffer++ = '\\';
515 		if (buffer - start == bufferSize - 1) {
516 			result = false;
517 			break;
518 		}
519 		*buffer++ = copy[count];
520 	}
521 	*buffer = '\0';
522 	free(copy);
523 	return result;
524 }
525