xref: /haiku/src/system/kernel/arch/x86/arch_thread.cpp (revision 526e86ac79f9899c43e2def55f82754a5c449f8a)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 
10 #include <arch/thread.h>
11 
12 #include <string.h>
13 
14 #include <arch/user_debugger.h>
15 #include <arch_cpu.h>
16 #include <cpu.h>
17 #include <debug.h>
18 #include <kernel.h>
19 #include <ksignal.h>
20 #include <int.h>
21 #include <team.h>
22 #include <thread.h>
23 #include <tls.h>
24 #include <tracing.h>
25 #include <vm/vm_types.h>
26 #include <vm/VMAddressSpace.h>
27 
28 #include "paging/X86PagingStructures.h"
29 #include "paging/X86VMTranslationMap.h"
30 #include "x86_syscalls.h"
31 
32 
33 //#define TRACE_ARCH_THREAD
34 #ifdef TRACE_ARCH_THREAD
35 #	define TRACE(x) dprintf x
36 #else
37 #	define TRACE(x) ;
38 #endif
39 
40 
41 #ifdef SYSCALL_TRACING
42 
43 namespace SyscallTracing {
44 
45 class RestartSyscall : public AbstractTraceEntry {
46 	public:
47 		RestartSyscall()
48 		{
49 			Initialized();
50 		}
51 
52 		virtual void AddDump(TraceOutput& out)
53 		{
54 			out.Print("syscall restart");
55 		}
56 };
57 
58 }
59 
60 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
61 
62 #else
63 #	define TSYSCALL(x)
64 #endif	// SYSCALL_TRACING
65 
66 
67 // from arch_interrupts.S
68 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset);
69 extern "C" void i386_restore_frame_from_syscall(struct iframe frame);
70 
71 // from arch_cpu.c
72 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
73 extern bool gHasSSE;
74 
75 static struct arch_thread sInitialState _ALIGNED(16);
76 	// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
77 
78 
79 status_t
80 arch_thread_init(struct kernel_args *args)
81 {
82 	// save one global valid FPU state; it will be copied in the arch dependent
83 	// part of each new thread
84 
85 	asm volatile ("clts; fninit; fnclex;");
86 	if (gHasSSE)
87 		i386_fxsave(sInitialState.fpu_state);
88 	else
89 		i386_fnsave(sInitialState.fpu_state);
90 
91 	return B_OK;
92 }
93 
94 
95 static struct iframe *
96 find_previous_iframe(struct thread *thread, addr_t frame)
97 {
98 	// iterate backwards through the stack frames, until we hit an iframe
99 	while (frame >= thread->kernel_stack_base
100 		&& frame < thread->kernel_stack_top) {
101 		addr_t previousFrame = *(addr_t*)frame;
102 		if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) {
103 			if (previousFrame == 0)
104 				return NULL;
105 			return (struct iframe*)frame;
106 		}
107 
108 		frame = previousFrame;
109 	}
110 
111 	return NULL;
112 }
113 
114 
115 static struct iframe*
116 get_previous_iframe(struct iframe* frame)
117 {
118 	if (frame == NULL)
119 		return NULL;
120 
121 	return find_previous_iframe(thread_get_current_thread(), frame->ebp);
122 }
123 
124 
125 /*!
126 	Returns the current iframe structure of the running thread.
127 	This function must only be called in a context where it's actually
128 	sure that such iframe exists; ie. from syscalls, but usually not
129 	from standard kernel threads.
130 */
131 static struct iframe*
132 get_current_iframe(void)
133 {
134 	return find_previous_iframe(thread_get_current_thread(), x86_read_ebp());
135 }
136 
137 
138 /*!
139 	\brief Returns the current thread's topmost (i.e. most recent)
140 	userland->kernel transition iframe (usually the first one, save for
141 	interrupts in signal handlers).
142 	\return The iframe, or \c NULL, if there is no such iframe (e.g. when
143 			the thread is a kernel thread).
144 */
145 struct iframe *
146 i386_get_user_iframe(void)
147 {
148 	struct iframe* frame = get_current_iframe();
149 
150 	while (frame != NULL) {
151 		if (IFRAME_IS_USER(frame))
152 			return frame;
153 		frame = get_previous_iframe(frame);
154 	}
155 
156 	return NULL;
157 }
158 
159 
160 /*!	\brief Like i386_get_user_iframe(), just for the given thread.
161 	The thread must not be running and the threads spinlock must be held.
162 */
163 struct iframe *
164 i386_get_thread_user_iframe(struct thread *thread)
165 {
166 	if (thread->state == B_THREAD_RUNNING)
167 		return NULL;
168 
169 	// read %ebp from the thread's stack stored by a pushad
170 	addr_t ebp = thread->arch_info.current_stack.esp[2];
171 
172 	// find the user iframe
173 	struct iframe *frame = find_previous_iframe(thread, ebp);
174 
175 	while (frame != NULL) {
176 		if (IFRAME_IS_USER(frame))
177 			return frame;
178 		frame = get_previous_iframe(frame);
179 	}
180 
181 	return NULL;
182 }
183 
184 
185 struct iframe *
186 i386_get_current_iframe(void)
187 {
188 	return get_current_iframe();
189 }
190 
191 
192 uint32
193 x86_next_page_directory(struct thread *from, struct thread *to)
194 {
195 	VMAddressSpace* toAddressSpace = to->team->address_space;
196 	if (from->team->address_space == toAddressSpace) {
197 		// don't change the pgdir, same address space
198 		return 0;
199 	}
200 
201 	if (toAddressSpace == NULL)
202 		toAddressSpace = VMAddressSpace::Kernel();
203 
204 	return static_cast<X86VMTranslationMap*>(toAddressSpace->TranslationMap())
205 		->PagingStructures()->pgdir_phys;
206 }
207 
208 
209 static inline void
210 set_fs_register(uint32 segment)
211 {
212 	asm("movl %0,%%fs" :: "r" (segment));
213 }
214 
215 
216 static void
217 set_tls_context(struct thread *thread)
218 {
219 	int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT;
220 
221 	set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage);
222 	set_fs_register((entry << 3) | DPL_USER);
223 }
224 
225 
226 void
227 x86_restart_syscall(struct iframe* frame)
228 {
229 	struct thread* thread = thread_get_current_thread();
230 
231 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
232 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
233 
234 	frame->eax = frame->orig_eax;
235 	frame->edx = frame->orig_edx;
236 	frame->eip -= 2;
237 		// undoes the "int $99"/"sysenter"/"syscall" instruction
238 		// (so that it'll be executed again)
239 
240 	TSYSCALL(RestartSyscall());
241 }
242 
243 
244 static uint32 *
245 get_signal_stack(struct thread *thread, struct iframe *frame, int signal)
246 {
247 	// use the alternate signal stack if we should and can
248 	if (thread->signal_stack_enabled
249 		&& (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0
250 		&& (frame->user_esp < thread->signal_stack_base
251 			|| frame->user_esp >= thread->signal_stack_base
252 				+ thread->signal_stack_size)) {
253 		return (uint32 *)(thread->signal_stack_base
254 			+ thread->signal_stack_size);
255 	}
256 
257 	return (uint32 *)frame->user_esp;
258 }
259 
260 
261 //	#pragma mark -
262 
263 
264 status_t
265 arch_team_init_team_struct(struct team *p, bool kernel)
266 {
267 	return B_OK;
268 }
269 
270 
271 status_t
272 arch_thread_init_thread_struct(struct thread *thread)
273 {
274 	// set up an initial state (stack & fpu)
275 	memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread));
276 	return B_OK;
277 }
278 
279 
280 status_t
281 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void),
282 	void (*entry_func)(void), void (*exit_func)(void))
283 {
284 	addr_t *kstack = (addr_t *)t->kernel_stack_base;
285 	addr_t *kstack_top = (addr_t *)t->kernel_stack_top;
286 	int i;
287 
288 	TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n",
289 		kstack, start_func, entry_func));
290 
291 	// clear the kernel stack
292 #ifdef DEBUG_KERNEL_STACKS
293 #	ifdef STACK_GROWS_DOWNWARDS
294 	memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0,
295 		KERNEL_STACK_SIZE);
296 #	else
297 	memset(kstack, 0, KERNEL_STACK_SIZE);
298 #	endif
299 #else
300 	memset(kstack, 0, KERNEL_STACK_SIZE);
301 #endif
302 
303 	// set the final return address to be thread_kthread_exit
304 	kstack_top--;
305 	*kstack_top = (unsigned int)exit_func;
306 
307 	// set the return address to be the start of the first function
308 	kstack_top--;
309 	*kstack_top = (unsigned int)start_func;
310 
311 	// set the return address to be the start of the entry (thread setup)
312 	// function
313 	kstack_top--;
314 	*kstack_top = (unsigned int)entry_func;
315 
316 	// simulate pushfl
317 //	kstack_top--;
318 //	*kstack_top = 0x00; // interrupts still disabled after the switch
319 
320 	// simulate initial popad
321 	for (i = 0; i < 8; i++) {
322 		kstack_top--;
323 		*kstack_top = 0;
324 	}
325 
326 	// save the stack position
327 	t->arch_info.current_stack.esp = kstack_top;
328 	t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG;
329 
330 	return B_OK;
331 }
332 
333 
334 /** Initializes the user-space TLS local storage pointer in
335  *	the thread structure, and the reserved TLS slots.
336  *
337  *	Is called from _create_user_thread_kentry().
338  */
339 
340 status_t
341 arch_thread_init_tls(struct thread *thread)
342 {
343 	uint32 tls[TLS_USER_THREAD_SLOT + 1];
344 
345 	thread->user_local_storage = thread->user_stack_base
346 		+ thread->user_stack_size;
347 
348 	// initialize default TLS fields
349 	memset(tls, 0, sizeof(tls));
350 	tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage;
351 	tls[TLS_THREAD_ID_SLOT] = thread->id;
352 	tls[TLS_USER_THREAD_SLOT] = (addr_t)thread->user_thread;
353 
354 	return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls));
355 }
356 
357 
358 void
359 arch_thread_context_switch(struct thread *from, struct thread *to)
360 {
361 	i386_set_tss_and_kstack(to->kernel_stack_top);
362 	x86_set_syscall_stack(to->kernel_stack_top);
363 
364 	// set TLS GDT entry to the current thread - since this action is
365 	// dependent on the current CPU, we have to do it here
366 	if (to->user_local_storage != 0)
367 		set_tls_context(to);
368 
369 	struct cpu_ent* cpuData = to->cpu;
370 	X86PagingStructures* activePagingStructures
371 		= cpuData->arch.active_paging_structures;
372 	VMAddressSpace* toAddressSpace = to->team->address_space;
373 
374 	X86PagingStructures* toPagingStructures;
375 	if (toAddressSpace != NULL
376 		&& (toPagingStructures = static_cast<X86VMTranslationMap*>(
377 				toAddressSpace->TranslationMap())->PagingStructures())
378 					!= activePagingStructures) {
379 		// update on which CPUs the address space is used
380 		int cpu = cpuData->cpu_num;
381 		atomic_and(&activePagingStructures->active_on_cpus,
382 			~((uint32)1 << cpu));
383 		atomic_or(&toPagingStructures->active_on_cpus, (uint32)1 << cpu);
384 
385 		// assign the new paging structures to the CPU
386 		toPagingStructures->AddReference();
387 		cpuData->arch.active_paging_structures = toPagingStructures;
388 
389 		// set the page directory, if it changes
390 		uint32 newPageDirectory = toPagingStructures->pgdir_phys;
391 		if (newPageDirectory != activePagingStructures->pgdir_phys)
392 			x86_swap_pgdir(newPageDirectory);
393 
394 		// This CPU no longer uses the previous paging structures.
395 		activePagingStructures->RemoveReference();
396 	}
397 
398 	gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
399 	x86_context_switch(&from->arch_info, &to->arch_info);
400 }
401 
402 
403 void
404 arch_thread_dump_info(void *info)
405 {
406 	struct arch_thread *at = (struct arch_thread *)info;
407 
408 	kprintf("\tesp: %p\n", at->current_stack.esp);
409 	kprintf("\tss: %p\n", at->current_stack.ss);
410 	kprintf("\tfpu_state at %p\n", at->fpu_state);
411 }
412 
413 
414 /** Sets up initial thread context and enters user space
415  */
416 
417 status_t
418 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1,
419 	void *args2)
420 {
421 	addr_t stackTop = t->user_stack_base + t->user_stack_size;
422 	uint32 codeSize = (addr_t)x86_end_userspace_thread_exit
423 		- (addr_t)x86_userspace_thread_exit;
424 	uint32 args[3];
425 
426 	TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n",
427 		entry, args1, args2, stackTop));
428 
429 	// copy the little stub that calls exit_thread() when the thread entry
430 	// function returns, as well as the arguments of the entry function
431 	stackTop -= codeSize;
432 
433 	if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK)
434 		return B_BAD_ADDRESS;
435 
436 	args[0] = stackTop;
437 	args[1] = (uint32)args1;
438 	args[2] = (uint32)args2;
439 	stackTop -= sizeof(args);
440 
441 	if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK)
442 		return B_BAD_ADDRESS;
443 
444 	thread_at_kernel_exit();
445 		// also disables interrupts
446 
447 	// install user breakpoints, if any
448 	if ((t->flags & THREAD_FLAGS_BREAKPOINTS_DEFINED) != 0)
449 		x86_init_user_debug_at_kernel_exit(NULL);
450 
451 	i386_set_tss_and_kstack(t->kernel_stack_top);
452 
453 	// set the CPU dependent GDT entry for TLS
454 	set_tls_context(t);
455 
456 	x86_set_syscall_stack(t->kernel_stack_top);
457 	x86_enter_userspace(entry, stackTop);
458 
459 	return B_OK;
460 		// never gets here
461 }
462 
463 
464 bool
465 arch_on_signal_stack(struct thread *thread)
466 {
467 	struct iframe *frame = get_current_iframe();
468 
469 	return frame->user_esp >= thread->signal_stack_base
470 		&& frame->user_esp < thread->signal_stack_base
471 			+ thread->signal_stack_size;
472 }
473 
474 
475 status_t
476 arch_setup_signal_frame(struct thread *thread, struct sigaction *action,
477 	int signal, int signalMask)
478 {
479 	struct iframe *frame = get_current_iframe();
480 	if (!IFRAME_IS_USER(frame)) {
481 		panic("arch_setup_signal_frame(): No user iframe!");
482 		return B_BAD_VALUE;
483 	}
484 
485 	uint32 *signalCode;
486 	uint32 *userRegs;
487 	struct vregs regs;
488 	uint32 buffer[6];
489 	status_t status;
490 
491 	// start stuffing stuff on the user stack
492 	uint32* userStack = get_signal_stack(thread, frame, signal);
493 
494 	// copy syscall restart info onto the user stack
495 	userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4;
496 	uint32 threadFlags = atomic_and(&thread->flags,
497 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
498 	if (user_memcpy(userStack, &threadFlags, 4) < B_OK
499 		|| user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK
500 		|| user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK)
501 		return B_BAD_ADDRESS;
502 	status = user_memcpy(userStack + 3, thread->syscall_restart.parameters,
503 		sizeof(thread->syscall_restart.parameters));
504 	if (status < B_OK)
505 		return status;
506 
507 	// store the saved regs onto the user stack
508 	regs.eip = frame->eip;
509 	regs.eflags = frame->flags;
510 	regs.eax = frame->eax;
511 	regs.ecx = frame->ecx;
512 	regs.edx = frame->edx;
513 	regs.ebp = frame->ebp;
514 	regs.esp = frame->esp;
515 	regs._reserved_1 = frame->user_esp;
516 	regs._reserved_2[0] = frame->edi;
517 	regs._reserved_2[1] = frame->esi;
518 	regs._reserved_2[2] = frame->ebx;
519 	i386_fnsave((void *)(&regs.xregs));
520 
521 	userStack -= (sizeof(struct vregs) + 3) / 4;
522 	userRegs = userStack;
523 	status = user_memcpy(userRegs, &regs, sizeof(regs));
524 	if (status < B_OK)
525 		return status;
526 
527 	// now store a code snippet on the stack
528 	userStack -= ((uint32)i386_end_return_from_signal + 3
529 		- (uint32)i386_return_from_signal) / 4;
530 	signalCode = userStack;
531 	status = user_memcpy(signalCode, (const void *)&i386_return_from_signal,
532 		((uint32)i386_end_return_from_signal
533 			- (uint32)i386_return_from_signal));
534 	if (status < B_OK)
535 		return status;
536 
537 	// now set up the final part
538 	buffer[0] = (uint32)signalCode;	// return address when sa_handler done
539 	buffer[1] = signal;				// arguments to sa_handler
540 	buffer[2] = (uint32)action->sa_userdata;
541 	buffer[3] = (uint32)userRegs;
542 
543 	buffer[4] = signalMask;			// Old signal mask to restore
544 	buffer[5] = (uint32)userRegs;	// Int frame + extra regs to restore
545 
546 	userStack -= sizeof(buffer) / 4;
547 
548 	status = user_memcpy(userStack, buffer, sizeof(buffer));
549 	if (status < B_OK)
550 		return status;
551 
552 	frame->user_esp = (uint32)userStack;
553 	frame->eip = (uint32)action->sa_handler;
554 
555 	return B_OK;
556 }
557 
558 
559 int64
560 arch_restore_signal_frame(void)
561 {
562 	struct thread *thread = thread_get_current_thread();
563 	struct iframe *frame = get_current_iframe();
564 	int32 signalMask;
565 	uint32 *userStack;
566 	struct vregs* regsPointer;
567 	struct vregs regs;
568 
569 	TRACE(("### arch_restore_signal_frame: entry\n"));
570 
571 	userStack = (uint32 *)frame->user_esp;
572 	if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK
573 		|| user_memcpy(&regsPointer, &userStack[1], 4) < B_OK
574 		|| user_memcpy(&regs, regsPointer, sizeof(vregs)) < B_OK) {
575 		return B_BAD_ADDRESS;
576 	}
577 
578 	uint32* syscallRestartInfo
579 		= (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4;
580 	uint32 threadFlags;
581 	if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK
582 		|| user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK
583 		|| user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK
584 		|| user_memcpy(thread->syscall_restart.parameters,
585 			syscallRestartInfo + 3,
586 			sizeof(thread->syscall_restart.parameters)) < B_OK) {
587 		return B_BAD_ADDRESS;
588 	}
589 
590 	// set restart/64bit return value flags from previous syscall
591 	atomic_and(&thread->flags,
592 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
593 	atomic_or(&thread->flags, threadFlags
594 		& (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
595 
596 	// TODO: Verify that just restoring the old signal mask is right! Bash for
597 	// instance changes the procmask in a signal handler. Those changes are
598 	// lost the way we do it.
599 	atomic_set(&thread->sig_block_mask, signalMask);
600 	update_current_thread_signals_flag();
601 
602 	frame->eip = regs.eip;
603 	frame->flags = regs.eflags;
604 	frame->eax = regs.eax;
605 	frame->ecx = regs.ecx;
606 	frame->edx = regs.edx;
607 	frame->ebp = regs.ebp;
608 	frame->esp = regs.esp;
609 	frame->user_esp = regs._reserved_1;
610 	frame->edi = regs._reserved_2[0];
611 	frame->esi = regs._reserved_2[1];
612 	frame->ebx = regs._reserved_2[2];
613 
614 	i386_frstor((void *)(&regs.xregs));
615 
616 	TRACE(("### arch_restore_signal_frame: exit\n"));
617 
618 	return (int64)frame->eax | ((int64)frame->edx << 32);
619 }
620 
621 
622 /**	Saves everything needed to restore the frame in the child fork in the
623  *	arch_fork_arg structure to be passed to arch_restore_fork_frame().
624  *	Also makes sure to return the right value.
625  */
626 
627 void
628 arch_store_fork_frame(struct arch_fork_arg *arg)
629 {
630 	struct iframe *frame = get_current_iframe();
631 
632 	// we need to copy the threads current iframe
633 	arg->iframe = *frame;
634 
635 	// we also want fork() to return 0 for the child
636 	arg->iframe.eax = 0;
637 }
638 
639 
640 /** Restores the frame from a forked team as specified by the provided
641  *	arch_fork_arg structure.
642  *	Needs to be called from within the child team, ie. instead of
643  *	arch_thread_enter_uspace() as thread "starter".
644  *	This function does not return to the caller, but will enter userland
645  *	in the child team at the same position where the parent team left of.
646  */
647 
648 void
649 arch_restore_fork_frame(struct arch_fork_arg *arg)
650 {
651 	struct thread *thread = thread_get_current_thread();
652 
653 	disable_interrupts();
654 
655 	i386_set_tss_and_kstack(thread->kernel_stack_top);
656 
657 	// set the CPU dependent GDT entry for TLS (set the current %fs register)
658 	set_tls_context(thread);
659 
660 	i386_restore_frame_from_syscall(arg->iframe);
661 }
662 
663 
664 void
665 arch_syscall_64_bit_return_value(void)
666 {
667 	struct thread* thread = thread_get_current_thread();
668 	atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN);
669 }
670