xref: /haiku/src/system/kernel/arch/x86/arch_thread.cpp (revision 3904a8dba0df1065db019e58a491c712cdf9cd83)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <arch/thread.h>
11 
12 #include <string.h>
13 
14 #include <arch/user_debugger.h>
15 #include <arch_cpu.h>
16 #include <cpu.h>
17 #include <debug.h>
18 #include <kernel.h>
19 #include <ksignal.h>
20 #include <int.h>
21 #include <team.h>
22 #include <thread.h>
23 #include <tls.h>
24 #include <tracing.h>
25 #include <vm/vm_types.h>
26 #include <vm/VMAddressSpace.h>
27 
28 #include "x86_paging.h"
29 #include "x86_syscalls.h"
30 #include "X86VMTranslationMap.h"
31 
32 
33 //#define TRACE_ARCH_THREAD
34 #ifdef TRACE_ARCH_THREAD
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x) ;
38 #endif
39 
40 
41 #ifdef SYSCALL_TRACING
42 
43 namespace SyscallTracing {
44 
45 class RestartSyscall : public AbstractTraceEntry {
46 	public:
47 		RestartSyscall()
48 		{
49 			Initialized();
50 		}
51 
52 		virtual void AddDump(TraceOutput& out)
53 		{
54 			out.Print("syscall restart");
55 		}
56 };
57 
58 }
59 
60 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
61 
62 #else
63 #	define TSYSCALL(x)
64 #endif	// SYSCALL_TRACING
65 
66 
67 // from arch_interrupts.S
68 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset);
69 extern "C" void i386_restore_frame_from_syscall(struct iframe frame);
70 
71 // from arch_cpu.c
72 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
73 extern bool gHasSSE;
74 
75 static struct arch_thread sInitialState _ALIGNED(16);
76 	// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
77 
78 
79 status_t
80 arch_thread_init(struct kernel_args *args)
81 {
82 	// save one global valid FPU state; it will be copied in the arch dependent
83 	// part of each new thread
84 
85 	asm volatile ("clts; fninit; fnclex;");
86 	if (gHasSSE)
87 		i386_fxsave(sInitialState.fpu_state);
88 	else
89 		i386_fnsave(sInitialState.fpu_state);
90 
91 	return B_OK;
92 }
93 
94 
95 static struct iframe *
96 find_previous_iframe(struct thread *thread, addr_t frame)
97 {
98 	// iterate backwards through the stack frames, until we hit an iframe
99 	while (frame >= thread->kernel_stack_base
100 		&& frame < thread->kernel_stack_top) {
101 		addr_t previousFrame = *(addr_t*)frame;
102 		if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) {
103 			if (previousFrame == 0)
104 				return NULL;
105 			return (struct iframe*)frame;
106 		}
107 
108 		frame = previousFrame;
109 	}
110 
111 	return NULL;
112 }
113 
114 
115 static struct iframe*
116 get_previous_iframe(struct iframe* frame)
117 {
118 	if (frame == NULL)
119 		return NULL;
120 
121 	return find_previous_iframe(thread_get_current_thread(), frame->ebp);
122 }
123 
124 
125 /*!
126 	Returns the current iframe structure of the running thread.
127 	This function must only be called in a context where it's actually
128 	sure that such iframe exists; ie. from syscalls, but usually not
129 	from standard kernel threads.
130 */
131 static struct iframe*
132 get_current_iframe(void)
133 {
134 	return find_previous_iframe(thread_get_current_thread(), x86_read_ebp());
135 }
136 
137 
138 /*!
139 	\brief Returns the current thread's topmost (i.e. most recent)
140 	userland->kernel transition iframe (usually the first one, save for
141 	interrupts in signal handlers).
142 	\return The iframe, or \c NULL, if there is no such iframe (e.g. when
143 			the thread is a kernel thread).
144 */
145 struct iframe *
146 i386_get_user_iframe(void)
147 {
148 	struct iframe* frame = get_current_iframe();
149 
150 	while (frame != NULL) {
151 		if (IFRAME_IS_USER(frame))
152 			return frame;
153 		frame = get_previous_iframe(frame);
154 	}
155 
156 	return NULL;
157 }
158 
159 
160 /*!	\brief Like i386_get_user_iframe(), just for the given thread.
161 	The thread must not be running and the threads spinlock must be held.
162 */
163 struct iframe *
164 i386_get_thread_user_iframe(struct thread *thread)
165 {
166 	if (thread->state == B_THREAD_RUNNING)
167 		return NULL;
168 
169 	// read %ebp from the thread's stack stored by a pushad
170 	addr_t ebp = thread->arch_info.current_stack.esp[2];
171 
172 	// find the user iframe
173 	struct iframe *frame = find_previous_iframe(thread, ebp);
174 
175 	while (frame != NULL) {
176 		if (IFRAME_IS_USER(frame))
177 			return frame;
178 		frame = get_previous_iframe(frame);
179 	}
180 
181 	return NULL;
182 }
183 
184 
185 struct iframe *
186 i386_get_current_iframe(void)
187 {
188 	return get_current_iframe();
189 }
190 
191 
192 void *
193 x86_next_page_directory(struct thread *from, struct thread *to)
194 {
195 	VMAddressSpace* toAddressSpace = to->team->address_space;
196 	if (from->team->address_space == toAddressSpace) {
197 		// don't change the pgdir, same address space
198 		return NULL;
199 	}
200 
201 	if (toAddressSpace == NULL)
202 		toAddressSpace = VMAddressSpace::Kernel();
203 
204 	return i386_translation_map_get_pgdir(toAddressSpace->TranslationMap());
205 }
206 
207 
208 static inline void
209 set_fs_register(uint32 segment)
210 {
211 	asm("movl %0,%%fs" :: "r" (segment));
212 }
213 
214 
215 static void
216 set_tls_context(struct thread *thread)
217 {
218 	int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT;
219 
220 	set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage);
221 	set_fs_register((entry << 3) | DPL_USER);
222 }
223 
224 
225 void
226 x86_restart_syscall(struct iframe* frame)
227 {
228 	struct thread* thread = thread_get_current_thread();
229 
230 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
231 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
232 
233 	frame->eax = frame->orig_eax;
234 	frame->edx = frame->orig_edx;
235 	frame->eip -= 2;
236 		// undoes the "int $99"/"sysenter"/"syscall" instruction
237 		// (so that it'll be executed again)
238 
239 	TSYSCALL(RestartSyscall());
240 }
241 
242 
243 static uint32 *
244 get_signal_stack(struct thread *thread, struct iframe *frame, int signal)
245 {
246 	// use the alternate signal stack if we should and can
247 	if (thread->signal_stack_enabled
248 		&& (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0
249 		&& (frame->user_esp < thread->signal_stack_base
250 			|| frame->user_esp >= thread->signal_stack_base
251 				+ thread->signal_stack_size)) {
252 		return (uint32 *)(thread->signal_stack_base
253 			+ thread->signal_stack_size);
254 	}
255 
256 	return (uint32 *)frame->user_esp;
257 }
258 
259 
260 //	#pragma mark -
261 
262 
263 status_t
264 arch_team_init_team_struct(struct team *p, bool kernel)
265 {
266 	return B_OK;
267 }
268 
269 
270 status_t
271 arch_thread_init_thread_struct(struct thread *thread)
272 {
273 	// set up an initial state (stack & fpu)
274 	memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread));
275 	return B_OK;
276 }
277 
278 
279 status_t
280 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void),
281 	void (*entry_func)(void), void (*exit_func)(void))
282 {
283 	addr_t *kstack = (addr_t *)t->kernel_stack_base;
284 	addr_t *kstack_top = (addr_t *)t->kernel_stack_top;
285 	int i;
286 
287 	TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n",
288 		kstack, start_func, entry_func));
289 
290 	// clear the kernel stack
291 #ifdef DEBUG_KERNEL_STACKS
292 #	ifdef STACK_GROWS_DOWNWARDS
293 	memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0,
294 		KERNEL_STACK_SIZE);
295 #	else
296 	memset(kstack, 0, KERNEL_STACK_SIZE);
297 #	endif
298 #else
299 	memset(kstack, 0, KERNEL_STACK_SIZE);
300 #endif
301 
302 	// set the final return address to be thread_kthread_exit
303 	kstack_top--;
304 	*kstack_top = (unsigned int)exit_func;
305 
306 	// set the return address to be the start of the first function
307 	kstack_top--;
308 	*kstack_top = (unsigned int)start_func;
309 
310 	// set the return address to be the start of the entry (thread setup)
311 	// function
312 	kstack_top--;
313 	*kstack_top = (unsigned int)entry_func;
314 
315 	// simulate pushfl
316 //	kstack_top--;
317 //	*kstack_top = 0x00; // interrupts still disabled after the switch
318 
319 	// simulate initial popad
320 	for (i = 0; i < 8; i++) {
321 		kstack_top--;
322 		*kstack_top = 0;
323 	}
324 
325 	// save the stack position
326 	t->arch_info.current_stack.esp = kstack_top;
327 	t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG;
328 
329 	return B_OK;
330 }
331 
332 
333 /** Initializes the user-space TLS local storage pointer in
334  *	the thread structure, and the reserved TLS slots.
335  *
336  *	Is called from _create_user_thread_kentry().
337  */
338 
339 status_t
340 arch_thread_init_tls(struct thread *thread)
341 {
342 	uint32 tls[TLS_USER_THREAD_SLOT + 1];
343 
344 	thread->user_local_storage = thread->user_stack_base
345 		+ thread->user_stack_size;
346 
347 	// initialize default TLS fields
348 	memset(tls, 0, sizeof(tls));
349 	tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage;
350 	tls[TLS_THREAD_ID_SLOT] = thread->id;
351 	tls[TLS_USER_THREAD_SLOT] = (addr_t)thread->user_thread;
352 
353 	return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls));
354 }
355 
356 
357 void
358 arch_thread_context_switch(struct thread *from, struct thread *to)
359 {
360 	i386_set_tss_and_kstack(to->kernel_stack_top);
361 	x86_set_syscall_stack(to->kernel_stack_top);
362 
363 	// set TLS GDT entry to the current thread - since this action is
364 	// dependent on the current CPU, we have to do it here
365 	if (to->user_local_storage != 0)
366 		set_tls_context(to);
367 
368 	struct cpu_ent* cpuData = to->cpu;
369 	vm_translation_map_arch_info* activeMap
370 		= cpuData->arch.active_translation_map;
371 	VMAddressSpace* toAddressSpace = to->team->address_space;
372 
373 	addr_t newPageDirectory;
374 	vm_translation_map_arch_info* toMap;
375 	if (toAddressSpace != NULL
376 		&& (toMap = static_cast<X86VMTranslationMap*>(
377 				toAddressSpace->TranslationMap())->ArchData()) != activeMap) {
378 		// update on which CPUs the address space is used
379 		int cpu = cpuData->cpu_num;
380 		atomic_and(&activeMap->active_on_cpus, ~((uint32)1 << cpu));
381 		atomic_or(&toMap->active_on_cpus, (uint32)1 << cpu);
382 
383 		activeMap->RemoveReference();
384 			// this might causes the map to be deferred deleted - ie. it won't
385 			// be deleted when it is still in use
386 
387 		// assign the new map to the CPU
388 		toMap->AddReference();
389 		cpuData->arch.active_translation_map = toMap;
390 
391 		// get the new page directory
392 		newPageDirectory = (addr_t)toMap->pgdir_phys;
393 	} else {
394 		newPageDirectory = 0;
395 			// this means no change
396 	}
397 
398 	gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
399 	i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
400 }
401 
402 
403 void
404 arch_thread_dump_info(void *info)
405 {
406 	struct arch_thread *at = (struct arch_thread *)info;
407 
408 	kprintf("\tesp: %p\n", at->current_stack.esp);
409 	kprintf("\tss: %p\n", at->current_stack.ss);
410 	kprintf("\tfpu_state at %p\n", at->fpu_state);
411 }
412 
413 
414 /** Sets up initial thread context and enters user space
415  */
416 
417 status_t
418 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1,
419 	void *args2)
420 {
421 	addr_t stackTop = t->user_stack_base + t->user_stack_size;
422 	uint32 codeSize = (addr_t)x86_end_userspace_thread_exit
423 		- (addr_t)x86_userspace_thread_exit;
424 	uint32 args[3];
425 
426 	TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n",
427 		entry, args1, args2, stackTop));
428 
429 	// copy the little stub that calls exit_thread() when the thread entry
430 	// function returns, as well as the arguments of the entry function
431 	stackTop -= codeSize;
432 
433 	if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK)
434 		return B_BAD_ADDRESS;
435 
436 	args[0] = stackTop;
437 	args[1] = (uint32)args1;
438 	args[2] = (uint32)args2;
439 	stackTop -= sizeof(args);
440 
441 	if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK)
442 		return B_BAD_ADDRESS;
443 
444 	thread_at_kernel_exit();
445 		// also disables interrupts
446 
447 	// install user breakpoints, if any
448 	if ((t->flags & THREAD_FLAGS_BREAKPOINTS_DEFINED) != 0)
449 		x86_init_user_debug_at_kernel_exit(NULL);
450 
451 	i386_set_tss_and_kstack(t->kernel_stack_top);
452 
453 	// set the CPU dependent GDT entry for TLS
454 	set_tls_context(t);
455 
456 	x86_set_syscall_stack(t->kernel_stack_top);
457 	x86_enter_userspace(entry, stackTop);
458 
459 	return B_OK;
460 		// never gets here
461 }
462 
463 
464 bool
465 arch_on_signal_stack(struct thread *thread)
466 {
467 	struct iframe *frame = get_current_iframe();
468 
469 	return frame->user_esp >= thread->signal_stack_base
470 		&& frame->user_esp < thread->signal_stack_base
471 			+ thread->signal_stack_size;
472 }
473 
474 
475 status_t
476 arch_setup_signal_frame(struct thread *thread, struct sigaction *action,
477 	int signal, int signalMask)
478 {
479 	struct iframe *frame = get_current_iframe();
480 	if (!IFRAME_IS_USER(frame)) {
481 		panic("arch_setup_signal_frame(): No user iframe!");
482 		return B_BAD_VALUE;
483 	}
484 
485 	uint32 *signalCode;
486 	uint32 *userRegs;
487 	struct vregs regs;
488 	uint32 buffer[6];
489 	status_t status;
490 
491 	// start stuffing stuff on the user stack
492 	uint32* userStack = get_signal_stack(thread, frame, signal);
493 
494 	// copy syscall restart info onto the user stack
495 	userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4;
496 	uint32 threadFlags = atomic_and(&thread->flags,
497 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
498 	if (user_memcpy(userStack, &threadFlags, 4) < B_OK
499 		|| user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK
500 		|| user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK)
501 		return B_BAD_ADDRESS;
502 	status = user_memcpy(userStack + 3, thread->syscall_restart.parameters,
503 		sizeof(thread->syscall_restart.parameters));
504 	if (status < B_OK)
505 		return status;
506 
507 	// store the saved regs onto the user stack
508 	regs.eip = frame->eip;
509 	regs.eflags = frame->flags;
510 	regs.eax = frame->eax;
511 	regs.ecx = frame->ecx;
512 	regs.edx = frame->edx;
513 	regs.ebp = frame->ebp;
514 	regs.esp = frame->esp;
515 	regs._reserved_1 = frame->user_esp;
516 	regs._reserved_2[0] = frame->edi;
517 	regs._reserved_2[1] = frame->esi;
518 	regs._reserved_2[2] = frame->ebx;
519 	i386_fnsave((void *)(&regs.xregs));
520 
521 	userStack -= (sizeof(struct vregs) + 3) / 4;
522 	userRegs = userStack;
523 	status = user_memcpy(userRegs, &regs, sizeof(regs));
524 	if (status < B_OK)
525 		return status;
526 
527 	// now store a code snippet on the stack
528 	userStack -= ((uint32)i386_end_return_from_signal + 3
529 		- (uint32)i386_return_from_signal) / 4;
530 	signalCode = userStack;
531 	status = user_memcpy(signalCode, (const void *)&i386_return_from_signal,
532 		((uint32)i386_end_return_from_signal
533 			- (uint32)i386_return_from_signal));
534 	if (status < B_OK)
535 		return status;
536 
537 	// now set up the final part
538 	buffer[0] = (uint32)signalCode;	// return address when sa_handler done
539 	buffer[1] = signal;				// arguments to sa_handler
540 	buffer[2] = (uint32)action->sa_userdata;
541 	buffer[3] = (uint32)userRegs;
542 
543 	buffer[4] = signalMask;			// Old signal mask to restore
544 	buffer[5] = (uint32)userRegs;	// Int frame + extra regs to restore
545 
546 	userStack -= sizeof(buffer) / 4;
547 
548 	status = user_memcpy(userStack, buffer, sizeof(buffer));
549 	if (status < B_OK)
550 		return status;
551 
552 	frame->user_esp = (uint32)userStack;
553 	frame->eip = (uint32)action->sa_handler;
554 
555 	return B_OK;
556 }
557 
558 
559 int64
560 arch_restore_signal_frame(void)
561 {
562 	struct thread *thread = thread_get_current_thread();
563 	struct iframe *frame = get_current_iframe();
564 	int32 signalMask;
565 	uint32 *userStack;
566 	struct vregs* regsPointer;
567 	struct vregs regs;
568 
569 	TRACE(("### arch_restore_signal_frame: entry\n"));
570 
571 	userStack = (uint32 *)frame->user_esp;
572 	if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK
573 		|| user_memcpy(&regsPointer, &userStack[1], 4) < B_OK
574 		|| user_memcpy(&regs, regsPointer, sizeof(vregs)) < B_OK) {
575 		return B_BAD_ADDRESS;
576 	}
577 
578 	uint32* syscallRestartInfo
579 		= (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4;
580 	uint32 threadFlags;
581 	if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK
582 		|| user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK
583 		|| user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK
584 		|| user_memcpy(thread->syscall_restart.parameters,
585 			syscallRestartInfo + 3,
586 			sizeof(thread->syscall_restart.parameters)) < B_OK) {
587 		return B_BAD_ADDRESS;
588 	}
589 
590 	// set restart/64bit return value flags from previous syscall
591 	atomic_and(&thread->flags,
592 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
593 	atomic_or(&thread->flags, threadFlags
594 		& (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
595 
596 	// TODO: Verify that just restoring the old signal mask is right! Bash for
597 	// instance changes the procmask in a signal handler. Those changes are
598 	// lost the way we do it.
599 	atomic_set(&thread->sig_block_mask, signalMask);
600 	update_current_thread_signals_flag();
601 
602 	frame->eip = regs.eip;
603 	frame->flags = regs.eflags;
604 	frame->eax = regs.eax;
605 	frame->ecx = regs.ecx;
606 	frame->edx = regs.edx;
607 	frame->ebp = regs.ebp;
608 	frame->esp = regs.esp;
609 	frame->user_esp = regs._reserved_1;
610 	frame->edi = regs._reserved_2[0];
611 	frame->esi = regs._reserved_2[1];
612 	frame->ebx = regs._reserved_2[2];
613 
614 	i386_frstor((void *)(&regs.xregs));
615 
616 	TRACE(("### arch_restore_signal_frame: exit\n"));
617 
618 	return (int64)frame->eax | ((int64)frame->edx << 32);
619 }
620 
621 
622 /**	Saves everything needed to restore the frame in the child fork in the
623  *	arch_fork_arg structure to be passed to arch_restore_fork_frame().
624  *	Also makes sure to return the right value.
625  */
626 
627 void
628 arch_store_fork_frame(struct arch_fork_arg *arg)
629 {
630 	struct iframe *frame = get_current_iframe();
631 
632 	// we need to copy the threads current iframe
633 	arg->iframe = *frame;
634 
635 	// we also want fork() to return 0 for the child
636 	arg->iframe.eax = 0;
637 }
638 
639 
640 /** Restores the frame from a forked team as specified by the provided
641  *	arch_fork_arg structure.
642  *	Needs to be called from within the child team, ie. instead of
643  *	arch_thread_enter_uspace() as thread "starter".
644  *	This function does not return to the caller, but will enter userland
645  *	in the child team at the same position where the parent team left of.
646  */
647 
648 void
649 arch_restore_fork_frame(struct arch_fork_arg *arg)
650 {
651 	struct thread *thread = thread_get_current_thread();
652 
653 	disable_interrupts();
654 
655 	i386_set_tss_and_kstack(thread->kernel_stack_top);
656 
657 	// set the CPU dependent GDT entry for TLS (set the current %fs register)
658 	set_tls_context(thread);
659 
660 	i386_restore_frame_from_syscall(arg->iframe);
661 }
662 
663 
664 void
665 arch_syscall_64_bit_return_value(void)
666 {
667 	struct thread* thread = thread_get_current_thread();
668 	atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN);
669 }
670