xref: /haiku/src/system/runtime_loader/runtime_loader.cpp (revision 2b76973fa2401f7a5edf68e6470f3d3210cbcff3)
1 /*
2  * Copyright 2005-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2002, Manuel J. Petit. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include "runtime_loader_private.h"
11 
12 #include <string.h>
13 #include <stdlib.h>
14 #include <sys/stat.h>
15 
16 #include <algorithm>
17 
18 #include <ByteOrder.h>
19 
20 #include <directories.h>
21 #include <image_defs.h>
22 #include <syscalls.h>
23 #include <user_runtime.h>
24 #include <vm_defs.h>
25 
26 #include "elf_symbol_lookup.h"
27 
28 
29 struct user_space_program_args *gProgramArgs;
30 void *__gCommPageAddress;
31 
32 
33 static const char *
34 search_path_for_type(image_type type)
35 {
36 	const char *path = NULL;
37 
38 	// TODO: The *PATH variables should not include the standard system paths.
39 	// Instead those paths should always be used after the directories specified
40 	// via the variables.
41 	switch (type) {
42 		case B_APP_IMAGE:
43 			path = getenv("PATH");
44 			break;
45 		case B_LIBRARY_IMAGE:
46 			path = getenv("LIBRARY_PATH");
47 			break;
48 		case B_ADD_ON_IMAGE:
49 			path = getenv("ADDON_PATH");
50 			break;
51 
52 		default:
53 			return NULL;
54 	}
55 
56 	if (path != NULL)
57 		return path;
58 
59 	// The environment variables may not have been set yet - in that case,
60 	// we're returning some useful defaults.
61 	// Since the kernel does not set any variables, this is also needed
62 	// to start the root shell.
63 
64 	// TODO: The user specific paths should not be used by default.
65 	switch (type) {
66 		case B_APP_IMAGE:
67 			return kUserNonpackagedBinDirectory
68 				":" kUserBinDirectory
69 						// TODO: Remove!
70 				":" kSystemNonpackagedBinDirectory
71 				":" kGlobalBinDirectory
72 				":" kSystemAppsDirectory
73 				":" kSystemPreferencesDirectory;
74 
75 		case B_LIBRARY_IMAGE:
76 			return kAppLocalLibDirectory
77 				":" kUserNonpackagedLibDirectory
78 				":" kUserLibDirectory
79 					// TODO: Remove!
80 				":" kSystemNonpackagedLibDirectory
81 				":" kSystemLibDirectory;
82 
83 		case B_ADD_ON_IMAGE:
84 			return kAppLocalAddonsDirectory
85 				":" kUserNonpackagedAddonsDirectory
86 				":" kUserAddonsDirectory
87 					// TODO: Remove!
88 				":" kSystemNonpackagedAddonsDirectory
89 				":" kSystemAddonsDirectory;
90 
91 		default:
92 			return NULL;
93 	}
94 }
95 
96 
97 static int
98 try_open_executable(const char *dir, int dirLength, const char *name,
99 	const char *programPath, const char *abiSpecificSubDir, char *path,
100 	size_t pathLength)
101 {
102 	size_t nameLength = strlen(name);
103 	struct stat stat;
104 	status_t status;
105 
106 	// construct the path
107 	if (dirLength > 0) {
108 		char *buffer = path;
109 		size_t subDirLen = 0;
110 
111 		if (programPath == NULL)
112 			programPath = gProgramArgs->program_path;
113 
114 		if (dirLength >= 2 && strncmp(dir, "%A", 2) == 0) {
115 			// Replace %A with current app folder path (of course,
116 			// this must be the first part of the path)
117 			char *lastSlash = strrchr(programPath, '/');
118 			int bytesCopied;
119 
120 			// copy what's left (when the application name is removed)
121 			if (lastSlash != NULL) {
122 				strlcpy(buffer, programPath,
123 					std::min((long)pathLength, lastSlash + 1 - programPath));
124 			} else
125 				strlcpy(buffer, ".", pathLength);
126 
127 			bytesCopied = strlen(buffer);
128 			buffer += bytesCopied;
129 			pathLength -= bytesCopied;
130 			dir += 2;
131 			dirLength -= 2;
132 		} else if (abiSpecificSubDir != NULL) {
133 			// We're looking for a library or an add-on and the executable has
134 			// not been compiled with a compiler using the same ABI as the one
135 			// the OS has been built with. Thus we only look in subdirs
136 			// specific to that ABI.
137 			subDirLen = strlen(abiSpecificSubDir) + 1;
138 		}
139 
140 		if (dirLength + 1 + subDirLen + nameLength >= pathLength)
141 			return B_NAME_TOO_LONG;
142 
143 		memcpy(buffer, dir, dirLength);
144 		buffer[dirLength] = '/';
145 		if (subDirLen > 0) {
146 			memcpy(buffer + dirLength + 1, abiSpecificSubDir, subDirLen - 1);
147 			buffer[dirLength + subDirLen] = '/';
148 		}
149 		strcpy(buffer + dirLength + 1 + subDirLen, name);
150 	} else {
151 		if (nameLength >= pathLength)
152 			return B_NAME_TOO_LONG;
153 
154 		strcpy(path + dirLength + 1, name);
155 	}
156 
157 	TRACE(("runtime_loader: try_open_container(): %s\n", path));
158 
159 	// Test if the target is a symbolic link, and correct the path in this case
160 
161 	status = _kern_read_stat(-1, path, false, &stat, sizeof(struct stat));
162 	if (status < B_OK)
163 		return status;
164 
165 	if (S_ISLNK(stat.st_mode)) {
166 		char buffer[PATH_MAX];
167 		size_t length = PATH_MAX - 1;
168 		char *lastSlash;
169 
170 		// it's a link, indeed
171 		status = _kern_read_link(-1, path, buffer, &length);
172 		if (status < B_OK)
173 			return status;
174 		buffer[length] = '\0';
175 
176 		lastSlash = strrchr(path, '/');
177 		if (buffer[0] != '/' && lastSlash != NULL) {
178 			// relative path
179 			strlcpy(lastSlash + 1, buffer, lastSlash + 1 - path + pathLength);
180 		} else
181 			strlcpy(path, buffer, pathLength);
182 	}
183 
184 	return _kern_open(-1, path, O_RDONLY, 0);
185 }
186 
187 
188 static int
189 search_executable_in_path_list(const char *name, const char *pathList,
190 	int pathListLen, const char *programPath, const char *abiSpecificSubDir,
191 	char *pathBuffer, size_t pathBufferLength)
192 {
193 	const char *pathListEnd = pathList + pathListLen;
194 	status_t status = B_ENTRY_NOT_FOUND;
195 
196 	TRACE(("runtime_loader: search_container_in_path_list() %s in %.*s\n", name,
197 		pathListLen, pathList));
198 
199 	while (pathListLen > 0) {
200 		const char *pathEnd = pathList;
201 		int fd;
202 
203 		// find the next ':' or run till the end of the string
204 		while (pathEnd < pathListEnd && *pathEnd != ':')
205 			pathEnd++;
206 
207 		fd = try_open_executable(pathList, pathEnd - pathList, name,
208 			programPath, abiSpecificSubDir, pathBuffer, pathBufferLength);
209 		if (fd >= 0) {
210 			// see if it's a dir
211 			struct stat stat;
212 			status = _kern_read_stat(fd, NULL, true, &stat, sizeof(struct stat));
213 			if (status == B_OK) {
214 				if (!S_ISDIR(stat.st_mode))
215 					return fd;
216 				status = B_IS_A_DIRECTORY;
217 			}
218 			_kern_close(fd);
219 		}
220 
221 		pathListLen = pathListEnd - pathEnd - 1;
222 		pathList = pathEnd + 1;
223 	}
224 
225 	return status;
226 }
227 
228 
229 int
230 open_executable(char *name, image_type type, const char *rpath,
231 	const char *programPath, const char *abiSpecificSubDir)
232 {
233 	char buffer[PATH_MAX];
234 	int fd = B_ENTRY_NOT_FOUND;
235 
236 	if (strchr(name, '/')) {
237 		// the name already contains a path, we don't have to search for it
238 		fd = _kern_open(-1, name, O_RDONLY, 0);
239 		if (fd >= 0 || type == B_APP_IMAGE)
240 			return fd;
241 
242 		// can't search harder an absolute path add-on name!
243 		if (type == B_ADD_ON_IMAGE && name[0] == '/')
244 			return fd;
245 
246 		// Even though ELF specs don't say this, we give shared libraries
247 		// and relative path based add-ons another chance and look
248 		// them up in the usual search paths - at
249 		// least that seems to be what BeOS does, and since it doesn't hurt...
250 		if (type == B_LIBRARY_IMAGE) {
251 			// For library (but not add-on), strip any path from name.
252 			// Relative path of add-on is kept.
253 			const char* paths = strrchr(name, '/') + 1;
254 			memmove(name, paths, strlen(paths) + 1);
255 		}
256 	}
257 
258 	// try rpath (DT_RPATH)
259 	if (rpath != NULL) {
260 		// It consists of a colon-separated search path list. Optionally a
261 		// second search path list follows, separated from the first by a
262 		// semicolon.
263 		const char *semicolon = strchr(rpath, ';');
264 		const char *firstList = (semicolon ? rpath : NULL);
265 		const char *secondList = (semicolon ? semicolon + 1 : rpath);
266 			// If there is no ';', we set only secondList to simplify things.
267 		if (firstList) {
268 			fd = search_executable_in_path_list(name, firstList,
269 				semicolon - firstList, programPath, NULL, buffer,
270 				sizeof(buffer));
271 		}
272 		if (fd < 0) {
273 			fd = search_executable_in_path_list(name, secondList,
274 				strlen(secondList), programPath, NULL, buffer, sizeof(buffer));
275 		}
276 	}
277 
278 	// If not found yet, let's evaluate the system path variables to find the
279 	// shared object.
280 	if (fd < 0) {
281 		if (const char *paths = search_path_for_type(type)) {
282 			fd = search_executable_in_path_list(name, paths, strlen(paths),
283 				programPath, abiSpecificSubDir, buffer, sizeof(buffer));
284 		}
285 	}
286 
287 	if (fd >= 0) {
288 		// we found it, copy path!
289 		TRACE(("runtime_loader: open_executable(%s): found at %s\n", name, buffer));
290 		strlcpy(name, buffer, PATH_MAX);
291 	}
292 
293 	return fd;
294 }
295 
296 
297 /*!
298 	Applies haiku-specific fixes to a shebang line.
299 */
300 static void
301 fixup_shebang(char *invoker)
302 {
303 	char *current = invoker;
304 	while (*current == ' ' || *current == '\t') {
305 		++current;
306 	}
307 
308 	char *commandStart = current;
309 	while (*current != ' ' && *current != '\t' && *current != '\0') {
310 		++current;
311 	}
312 
313 	// replace /usr/bin/env with /bin/env
314 	if (memcmp(commandStart, "/usr/bin/env", current - commandStart) == 0)
315 		memmove(commandStart, commandStart + 4, strlen(commandStart + 4) + 1);
316 }
317 
318 
319 /*!
320 	Tests if there is an executable file at the provided path. It will
321 	also test if the file has a valid ELF header or is a shell script.
322 	Even if the runtime loader does not need to be able to deal with
323 	both types, the caller will give scripts a proper treatment.
324 */
325 status_t
326 test_executable(const char *name, char *invoker)
327 {
328 	char path[B_PATH_NAME_LENGTH];
329 	char buffer[B_FILE_NAME_LENGTH];
330 		// must be large enough to hold the ELF header
331 	status_t status;
332 	ssize_t length;
333 	int fd;
334 
335 	if (name == NULL)
336 		return B_BAD_VALUE;
337 
338 	strlcpy(path, name, sizeof(path));
339 
340 	fd = open_executable(path, B_APP_IMAGE, NULL, NULL, NULL);
341 	if (fd < B_OK)
342 		return fd;
343 
344 	// see if it's executable at all
345 	status = _kern_access(-1, path, X_OK, false);
346 	if (status != B_OK)
347 		goto out;
348 
349 	// read and verify the ELF header
350 
351 	length = _kern_read(fd, 0, buffer, sizeof(buffer));
352 	if (length < 0) {
353 		status = length;
354 		goto out;
355 	}
356 
357 	status = elf_verify_header(buffer, length);
358 	if (status == B_NOT_AN_EXECUTABLE) {
359 		// test for shell scripts
360 		if (!strncmp(buffer, "#!", 2)) {
361 			char *end;
362 			buffer[min_c((size_t)length, sizeof(buffer) - 1)] = '\0';
363 
364 			end = strchr(buffer, '\n');
365 			if (end == NULL) {
366 				status = E2BIG;
367 				goto out;
368 			} else
369 				end[0] = '\0';
370 
371 			if (invoker) {
372 				strcpy(invoker, buffer + 2);
373 				fixup_shebang(invoker);
374 			}
375 
376 			status = B_OK;
377 		}
378 	} else if (status == B_OK) {
379 		elf_ehdr *elfHeader = (elf_ehdr *)buffer;
380 		if (elfHeader->e_entry == 0) {
381 			// we don't like to open shared libraries
382 			status = B_NOT_AN_EXECUTABLE;
383 		} else if (invoker)
384 			invoker[0] = '\0';
385 	}
386 
387 out:
388 	_kern_close(fd);
389 	return status;
390 }
391 
392 
393 static bool
394 determine_x86_abi(int fd, const Elf32_Ehdr& elfHeader, bool& _isGcc2)
395 {
396 	// Unless we're a little-endian CPU, don't bother. We're not x86, so it
397 	// doesn't matter all that much whether we can determine the correct gcc
398 	// ABI. This saves the code below from having to deal with endianess
399 	// conversion.
400 #if B_HOST_IS_LENDIAN
401 
402 	// Since we don't want to load the complete image, we can't use the
403 	// functions that normally determine the Haiku version and ABI. Instead
404 	// we'll load the symbol and string tables and resolve the ABI symbol
405 	// manually.
406 
407 	// map the file into memory
408 	struct stat st;
409 	if (_kern_read_stat(fd, NULL, true, &st, sizeof(st)) != B_OK)
410 		return false;
411 
412 	void* fileBaseAddress;
413 	area_id area = _kern_map_file("mapped file", &fileBaseAddress,
414 		B_ANY_ADDRESS, st.st_size, B_READ_AREA, REGION_NO_PRIVATE_MAP, false,
415 		fd, 0);
416 	if (area < 0)
417 		return false;
418 
419 	struct AreaDeleter {
420 		AreaDeleter(area_id area)
421 			:
422 			fArea(area)
423 		{
424 		}
425 
426 		~AreaDeleter()
427 		{
428 			_kern_delete_area(fArea);
429 		}
430 
431 	private:
432 		area_id	fArea;
433 	} areaDeleter(area);
434 
435 	// get the section headers
436 	if (elfHeader.e_shoff == 0 || elfHeader.e_shentsize < sizeof(Elf32_Shdr))
437 		return false;
438 
439 	size_t sectionHeadersSize = elfHeader.e_shentsize * elfHeader.e_shnum;
440 	if (elfHeader.e_shoff + (off_t)sectionHeadersSize > st.st_size)
441 		return false;
442 
443 	void* sectionHeaders = (uint8*)fileBaseAddress + elfHeader.e_shoff;
444 
445 	// find the sections we need
446 	uint32* symbolHash = NULL;
447 	uint32 symbolHashSize = 0;
448 	uint32 symbolHashChainSize = 0;
449 	Elf32_Sym* symbolTable = NULL;
450 	uint32 symbolTableSize = 0;
451 	const char* stringTable = NULL;
452 	off_t stringTableSize = 0;
453 
454 	for (int32 i = 0; i < elfHeader.e_shnum; i++) {
455 		Elf32_Shdr* sectionHeader
456 			= (Elf32_Shdr*)((uint8*)sectionHeaders + i * elfHeader.e_shentsize);
457 		if ((off_t)sectionHeader->sh_offset + (off_t)sectionHeader->sh_size
458 				> st.st_size) {
459 			continue;
460 		}
461 
462 		void* sectionAddress = (uint8*)fileBaseAddress
463 			+ sectionHeader->sh_offset;
464 
465 		switch (sectionHeader->sh_type) {
466 			case SHT_HASH:
467 				symbolHash = (uint32*)sectionAddress;
468 				if (sectionHeader->sh_size < (off_t)sizeof(symbolHash[0]))
469 					return false;
470 				symbolHashSize = symbolHash[0];
471 				symbolHashChainSize
472 					= sectionHeader->sh_size / sizeof(symbolHash[0]);
473 				if (symbolHashChainSize < symbolHashSize + 2)
474 					return false;
475 				symbolHashChainSize -= symbolHashSize + 2;
476 				break;
477 			case SHT_DYNSYM:
478 				symbolTable = (Elf32_Sym*)sectionAddress;
479 				symbolTableSize = sectionHeader->sh_size;
480 				break;
481 			case SHT_STRTAB:
482 				// .shstrtab has the same type as .dynstr, but it isn't loaded
483 				// into memory.
484 				if (sectionHeader->sh_addr == 0)
485 					continue;
486 				stringTable = (const char*)sectionAddress;
487 				stringTableSize = (off_t)sectionHeader->sh_size;
488 				break;
489 			default:
490 				continue;
491 		}
492 	}
493 
494 	if (symbolHash == NULL || symbolTable == NULL || stringTable == NULL)
495 		return false;
496 	uint32 symbolCount
497 		= std::min(symbolTableSize / (uint32)sizeof(Elf32_Sym),
498 			symbolHashChainSize);
499 	if (symbolCount < symbolHashSize)
500 		return false;
501 
502 	// look up the ABI symbol
503 	const char* name = B_SHARED_OBJECT_HAIKU_ABI_VARIABLE_NAME;
504 	size_t nameLength = strlen(name);
505 	uint32 bucket = elf_hash(name) % symbolHashSize;
506 
507 	for (uint32 i = symbolHash[bucket + 2]; i < symbolCount && i != STN_UNDEF;
508 		i = symbolHash[2 + symbolHashSize + i]) {
509 		Elf32_Sym* symbol = symbolTable + i;
510 		if (symbol->st_shndx != SHN_UNDEF
511 			&& ((symbol->Bind() == STB_GLOBAL) || (symbol->Bind() == STB_WEAK))
512 			&& symbol->Type() == STT_OBJECT
513 			&& (off_t)symbol->st_name + (off_t)nameLength < stringTableSize
514 			&& strcmp(stringTable + symbol->st_name, name) == 0) {
515 			if (symbol->st_value > 0 && symbol->st_size >= sizeof(uint32)
516 				&& symbol->st_shndx < elfHeader.e_shnum) {
517 				Elf32_Shdr* sectionHeader = (Elf32_Shdr*)((uint8*)sectionHeaders
518 					+ symbol->st_shndx * elfHeader.e_shentsize);
519 				if (symbol->st_value >= sectionHeader->sh_addr
520 					&& symbol->st_value
521 						<= sectionHeader->sh_addr + sectionHeader->sh_size) {
522 					off_t fileOffset = symbol->st_value - sectionHeader->sh_addr
523 						+ sectionHeader->sh_offset;
524 					if (fileOffset + (off_t)sizeof(uint32) <= st.st_size) {
525 						uint32 abi
526 							= *(uint32*)((uint8*)fileBaseAddress + fileOffset);
527 						_isGcc2 = (abi & B_HAIKU_ABI_MAJOR)
528 							== B_HAIKU_ABI_GCC_2;
529 						return true;
530 					}
531 				}
532 			}
533 
534 			return false;
535 		}
536 	}
537 
538 	// ABI symbol not found. That means the object pre-dates its introduction
539 	// in Haiku. So this is most likely gcc 2. We don't fall back to reading
540 	// the comment sections to verify.
541 	_isGcc2 = true;
542 	return true;
543 #else	// not little endian
544 	return false;
545 #endif
546 }
547 
548 
549 static status_t
550 get_executable_architecture(int fd, const char** _architecture)
551 {
552 	// Read the ELF header. We read the 32 bit header. Generally the e_machine
553 	// field is the last one that interests us and the 64 bit header is still
554 	// identical at that point.
555 	Elf32_Ehdr elfHeader;
556 	ssize_t bytesRead = _kern_read(fd, 0, &elfHeader, sizeof(elfHeader));
557 	if (bytesRead < 0)
558 		return bytesRead;
559 	if ((size_t)bytesRead != sizeof(elfHeader))
560 		return B_NOT_AN_EXECUTABLE;
561 
562 	// check whether this is indeed an ELF file
563 	if (memcmp(elfHeader.e_ident, ELF_MAGIC, 4) != 0)
564 		return B_NOT_AN_EXECUTABLE;
565 
566 	// check the architecture
567 	uint16 machine = elfHeader.e_machine;
568 	if ((elfHeader.e_ident[EI_DATA] == ELFDATA2LSB) != (B_HOST_IS_LENDIAN != 0))
569 		machine = (machine >> 8) | (machine << 8);
570 
571 	const char* architecture = NULL;
572 	switch (machine) {
573 		case EM_386:
574 		case EM_486:
575 		{
576 			bool isGcc2;
577 			if (determine_x86_abi(fd, elfHeader, isGcc2) && isGcc2)
578 				architecture = "x86_gcc2";
579 			else
580 				architecture = "x86";
581 			break;
582 		}
583 		case EM_68K:
584 			architecture = "m68k";
585 			break;
586 		case EM_PPC:
587 			architecture = "ppc";
588 			break;
589 		case EM_ARM:
590 			architecture = "arm";
591 			break;
592 		case EM_X86_64:
593 			architecture = "x86_64";
594 			break;
595 	}
596 
597 	if (architecture == NULL)
598 		return B_NOT_SUPPORTED;
599 
600 	*_architecture = architecture;
601 	return B_OK;
602 }
603 
604 
605 status_t
606 get_executable_architecture(const char* path, const char** _architecture)
607 {
608 	int fd = _kern_open(-1, path, O_RDONLY, 0);
609 	if (fd < 0)
610 		return fd;
611 
612 	status_t error = get_executable_architecture(fd, _architecture);
613 
614 	_kern_close(fd);
615 	return error;
616 }
617 
618 
619 /*!
620 	This is the main entry point of the runtime loader as
621 	specified by its ld-script.
622 */
623 int
624 runtime_loader(void* _args, void* commpage)
625 {
626 	void *entry = NULL;
627 	int returnCode;
628 
629 	gProgramArgs = (struct user_space_program_args *)_args;
630 	__gCommPageAddress = commpage;
631 
632 	// Relocate the args and env arrays -- they are organized in a contiguous
633 	// buffer which the kernel just copied into user space without adjusting the
634 	// pointers.
635 	{
636 		int32 i;
637 		addr_t relocationOffset = 0;
638 
639 		if (gProgramArgs->arg_count > 0)
640 			relocationOffset = (addr_t)gProgramArgs->args[0];
641 		else if (gProgramArgs->env_count > 0)
642 			relocationOffset = (addr_t)gProgramArgs->env[0];
643 
644 		// That's basically: <new buffer address> - <old buffer address>.
645 		// It looks a little complicated, since we don't have the latter one at
646 		// hand and thus need to reconstruct it (<first string pointer> -
647 		// <arguments + environment array sizes>).
648 		relocationOffset = (addr_t)gProgramArgs->args - relocationOffset
649 			+ (gProgramArgs->arg_count + gProgramArgs->env_count + 2)
650 				* sizeof(char*);
651 
652 		for (i = 0; i < gProgramArgs->arg_count; i++)
653 			gProgramArgs->args[i] += relocationOffset;
654 
655 		for (i = 0; i < gProgramArgs->env_count; i++)
656 			gProgramArgs->env[i] += relocationOffset;
657 	}
658 
659 #if DEBUG_RLD
660 	close(0); open("/dev/console", 0); /* stdin   */
661 	close(1); open("/dev/console", 0); /* stdout  */
662 	close(2); open("/dev/console", 0); /* stderr  */
663 #endif
664 
665 	if (heap_init() < B_OK)
666 		return 1;
667 
668 	rldexport_init();
669 	rldelf_init();
670 
671 	load_program(gProgramArgs->program_path, &entry);
672 
673 	if (entry == NULL)
674 		return -1;
675 
676 	// call the program entry point (usually _start())
677 	returnCode = ((int (*)(int, void *, void *))entry)(gProgramArgs->arg_count,
678 		gProgramArgs->args, gProgramArgs->env);
679 
680 	terminate_program();
681 
682 	return returnCode;
683 }
684