xref: /haiku/src/system/kernel/arch/x86/arch_thread.cpp (revision 03187b607b2b5eec7ee059f1ead09bdba14991fb)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 #include <arch/thread.h>
10 
11 #include <string.h>
12 
13 #include <arch/user_debugger.h>
14 #include <arch_cpu.h>
15 #include <cpu.h>
16 #include <debug.h>
17 #include <kernel.h>
18 #include <ksignal.h>
19 #include <int.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tls.h>
23 #include <tracing.h>
24 #include <vm_address_space.h>
25 #include <vm_types.h>
26 
27 #include "x86_paging.h"
28 
29 
30 //#define TRACE_ARCH_THREAD
31 #ifdef TRACE_ARCH_THREAD
32 #	define TRACE(x) dprintf x
33 #else
34 #	define TRACE(x) ;
35 #endif
36 
37 
38 #ifdef SYSCALL_TRACING
39 
40 namespace SyscallTracing {
41 
42 class RestartSyscall : public AbstractTraceEntry {
43 	public:
44 		RestartSyscall()
45 		{
46 			Initialized();
47 		}
48 
49 		virtual void AddDump(TraceOutput& out)
50 		{
51 			out.Print("syscall restart");
52 		}
53 };
54 
55 }
56 
57 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
58 
59 #else
60 #	define TSYSCALL(x)
61 #endif	// SYSCALL_TRACING
62 
63 
64 // from arch_interrupts.S
65 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset);
66 extern "C" void i386_restore_frame_from_syscall(struct iframe frame);
67 
68 // from arch_cpu.c
69 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
70 extern bool gHasSSE;
71 
72 static struct arch_thread sInitialState _ALIGNED(16);
73 	// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
74 
75 
76 status_t
77 arch_thread_init(struct kernel_args *args)
78 {
79 	// save one global valid FPU state; it will be copied in the arch dependent
80 	// part of each new thread
81 
82 	asm volatile ("clts; fninit; fnclex;");
83 	if (gHasSSE)
84 		i386_fxsave(sInitialState.fpu_state);
85 	else
86 		i386_fnsave(sInitialState.fpu_state);
87 
88 	return B_OK;
89 }
90 
91 
92 static struct iframe *
93 find_previous_iframe(struct thread *thread, addr_t frame)
94 {
95 	// iterate backwards through the stack frames, until we hit an iframe
96 	while (frame >= thread->kernel_stack_base
97 		&& frame < thread->kernel_stack_top) {
98 		addr_t previousFrame = *(addr_t*)frame;
99 		if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) {
100 			if (previousFrame == 0)
101 				return NULL;
102 			return (struct iframe*)frame;
103 		}
104 
105 		frame = previousFrame;
106 	}
107 
108 	return NULL;
109 }
110 
111 
112 static struct iframe*
113 get_previous_iframe(struct iframe* frame)
114 {
115 	if (frame == NULL)
116 		return NULL;
117 
118 	return find_previous_iframe(thread_get_current_thread(), frame->ebp);
119 }
120 
121 
122 /*!
123 	Returns the current iframe structure of the running thread.
124 	This function must only be called in a context where it's actually
125 	sure that such iframe exists; ie. from syscalls, but usually not
126 	from standard kernel threads.
127 */
128 static struct iframe*
129 get_current_iframe(void)
130 {
131 	return find_previous_iframe(thread_get_current_thread(), x86_read_ebp());
132 }
133 
134 
135 /*!
136 	\brief Returns the current thread's topmost (i.e. most recent)
137 	userland->kernel transition iframe (usually the first one, save for
138 	interrupts in signal handlers).
139 	\return The iframe, or \c NULL, if there is no such iframe (e.g. when
140 			the thread is a kernel thread).
141 */
142 struct iframe *
143 i386_get_user_iframe(void)
144 {
145 	struct iframe* frame = get_current_iframe();
146 
147 	while (frame != NULL) {
148 		if (IFRAME_IS_USER(frame))
149 			return frame;
150 		frame = get_previous_iframe(frame);
151 	}
152 
153 	return NULL;
154 }
155 
156 
157 /*!	\brief Like i386_get_user_iframe(), just for the given thread.
158 	The thread must not be running and the threads spinlock must be held.
159 */
160 struct iframe *
161 i386_get_thread_user_iframe(struct thread *thread)
162 {
163 	if (thread->state == B_THREAD_RUNNING)
164 		return NULL;
165 
166 	// read %ebp from the thread's stack stored by a pushad
167 	addr_t ebp = thread->arch_info.current_stack.esp[2];
168 
169 	// find the user iframe
170 	struct iframe *frame = find_previous_iframe(thread, ebp);
171 
172 	while (frame != NULL) {
173 		if (IFRAME_IS_USER(frame))
174 			return frame;
175 		frame = get_previous_iframe(frame);
176 	}
177 
178 	return NULL;
179 }
180 
181 
182 struct iframe *
183 i386_get_current_iframe(void)
184 {
185 	return get_current_iframe();
186 }
187 
188 
189 void *
190 x86_next_page_directory(struct thread *from, struct thread *to)
191 {
192 	vm_address_space* toAddressSpace = to->team->address_space;
193 	if (from->team->address_space == toAddressSpace) {
194 		// don't change the pgdir, same address space
195 		return NULL;
196 	}
197 
198 	if (toAddressSpace == NULL)
199 		toAddressSpace = vm_kernel_address_space();
200 
201 	return i386_translation_map_get_pgdir(&toAddressSpace->translation_map);
202 }
203 
204 
205 static inline void
206 set_fs_register(uint32 segment)
207 {
208 	asm("movl %0,%%fs" :: "r" (segment));
209 }
210 
211 
212 static void
213 set_tls_context(struct thread *thread)
214 {
215 	int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT;
216 
217 	set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage);
218 	set_fs_register((entry << 3) | DPL_USER);
219 }
220 
221 
222 void
223 x86_restart_syscall(struct iframe* frame)
224 {
225 	struct thread* thread = thread_get_current_thread();
226 
227 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
228 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
229 
230 	frame->eax = frame->orig_eax;
231 	frame->edx = frame->orig_edx;
232 	frame->eip -= 2;
233 		// undoes the "int $99"/"sysenter"/"syscall" instruction
234 		// (so that it'll be executed again)
235 
236 	TSYSCALL(RestartSyscall());
237 }
238 
239 
240 static uint32 *
241 get_signal_stack(struct thread *thread, struct iframe *frame, int signal)
242 {
243 	// use the alternate signal stack if we should and can
244 	if (thread->signal_stack_enabled
245 		&& (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0
246 		&& (frame->user_esp < thread->signal_stack_base
247 			|| frame->user_esp >= thread->signal_stack_base
248 				+ thread->signal_stack_size)) {
249 		return (uint32 *)(thread->signal_stack_base
250 			+ thread->signal_stack_size);
251 	}
252 
253 	return (uint32 *)frame->user_esp;
254 }
255 
256 
257 //	#pragma mark -
258 
259 
260 status_t
261 arch_team_init_team_struct(struct team *p, bool kernel)
262 {
263 	return B_OK;
264 }
265 
266 
267 status_t
268 arch_thread_init_thread_struct(struct thread *thread)
269 {
270 	// set up an initial state (stack & fpu)
271 	memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread));
272 	return B_OK;
273 }
274 
275 
276 status_t
277 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void),
278 	void (*entry_func)(void), void (*exit_func)(void))
279 {
280 	addr_t *kstack = (addr_t *)t->kernel_stack_base;
281 	addr_t *kstack_top = (addr_t *)t->kernel_stack_top;
282 	int i;
283 
284 	TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n",
285 		kstack, start_func, entry_func));
286 
287 	// clear the kernel stack
288 #ifdef DEBUG_KERNEL_STACKS
289 #	ifdef STACK_GROWS_DOWNWARDS
290 	memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0,
291 		KERNEL_STACK_SIZE);
292 #	else
293 	memset(kstack, 0, KERNEL_STACK_SIZE);
294 #	endif
295 #else
296 	memset(kstack, 0, KERNEL_STACK_SIZE);
297 #endif
298 
299 	// set the final return address to be thread_kthread_exit
300 	kstack_top--;
301 	*kstack_top = (unsigned int)exit_func;
302 
303 	// set the return address to be the start of the first function
304 	kstack_top--;
305 	*kstack_top = (unsigned int)start_func;
306 
307 	// set the return address to be the start of the entry (thread setup)
308 	// function
309 	kstack_top--;
310 	*kstack_top = (unsigned int)entry_func;
311 
312 	// simulate pushfl
313 //	kstack_top--;
314 //	*kstack_top = 0x00; // interrupts still disabled after the switch
315 
316 	// simulate initial popad
317 	for (i = 0; i < 8; i++) {
318 		kstack_top--;
319 		*kstack_top = 0;
320 	}
321 
322 	// save the stack position
323 	t->arch_info.current_stack.esp = kstack_top;
324 	t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG;
325 
326 	return B_OK;
327 }
328 
329 
330 /** Initializes the user-space TLS local storage pointer in
331  *	the thread structure, and the reserved TLS slots.
332  *
333  *	Is called from _create_user_thread_kentry().
334  */
335 
336 status_t
337 arch_thread_init_tls(struct thread *thread)
338 {
339 	uint32 tls[TLS_USER_THREAD_SLOT + 1];
340 
341 	thread->user_local_storage = thread->user_stack_base
342 		+ thread->user_stack_size;
343 
344 	// initialize default TLS fields
345 	memset(tls, 0, sizeof(tls));
346 	tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage;
347 	tls[TLS_THREAD_ID_SLOT] = thread->id;
348 	tls[TLS_USER_THREAD_SLOT] = (addr_t)thread->user_thread;
349 
350 	return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls));
351 }
352 
353 
354 void
355 arch_thread_switch_kstack_and_call(struct thread *t, addr_t new_kstack,
356 	void (*func)(void *), void *arg)
357 {
358 	i386_switch_stack_and_call(new_kstack, func, arg);
359 }
360 
361 
362 void
363 arch_thread_context_switch(struct thread *from, struct thread *to)
364 {
365 	i386_set_tss_and_kstack(to->kernel_stack_top);
366 
367 	// set TLS GDT entry to the current thread - since this action is
368 	// dependent on the current CPU, we have to do it here
369 	if (to->user_local_storage != 0)
370 		set_tls_context(to);
371 
372 	struct cpu_ent* cpuData = to->cpu;
373 	vm_translation_map_arch_info* activeMap
374 		= cpuData->arch.active_translation_map;
375 	vm_address_space* toAddressSpace = to->team->address_space;
376 
377 	addr_t newPageDirectory;
378 	vm_translation_map_arch_info* toMap;
379 	if (toAddressSpace != NULL
380 		&& (toMap = toAddressSpace->translation_map.arch_data) != activeMap) {
381 		// update on which CPUs the address space is used
382 		int cpu = cpuData->cpu_num;
383 		atomic_and(&activeMap->active_on_cpus, ~((uint32)1 << cpu));
384 		atomic_or(&toMap->active_on_cpus, (uint32)1 << cpu);
385 
386 		// assign the new map to the CPU
387 		toMap->AddReference();
388 		cpuData->arch.active_translation_map = toMap;
389 
390 		// get the new page directory
391 		newPageDirectory = (addr_t)toMap->pgdir_phys;
392 	} else {
393 		activeMap = NULL;
394 		newPageDirectory = 0;
395 	}
396 
397 	gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
398 	i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
399 
400 	if (activeMap != NULL)
401 		activeMap->RemoveReference();
402 }
403 
404 
405 void
406 arch_thread_dump_info(void *info)
407 {
408 	struct arch_thread *at = (struct arch_thread *)info;
409 
410 	kprintf("\tesp: %p\n", at->current_stack.esp);
411 	kprintf("\tss: %p\n", at->current_stack.ss);
412 	kprintf("\tfpu_state at %p\n", at->fpu_state);
413 }
414 
415 
416 /** Sets up initial thread context and enters user space
417  */
418 
419 status_t
420 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1,
421 	void *args2)
422 {
423 	addr_t stackTop = t->user_stack_base + t->user_stack_size;
424 	uint32 codeSize = (addr_t)x86_end_userspace_thread_exit
425 		- (addr_t)x86_userspace_thread_exit;
426 	uint32 args[3];
427 
428 	TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n",
429 		entry, args1, args2, stackTop));
430 
431 	// copy the little stub that calls exit_thread() when the thread entry
432 	// function returns, as well as the arguments of the entry function
433 	stackTop -= codeSize;
434 
435 	if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK)
436 		return B_BAD_ADDRESS;
437 
438 	args[0] = stackTop;
439 	args[1] = (uint32)args1;
440 	args[2] = (uint32)args2;
441 	stackTop -= sizeof(args);
442 
443 	if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK)
444 		return B_BAD_ADDRESS;
445 
446 	thread_at_kernel_exit();
447 		// also disables interrupts
448 
449 	i386_set_tss_and_kstack(t->kernel_stack_top);
450 
451 	// set the CPU dependent GDT entry for TLS
452 	set_tls_context(t);
453 
454 	x86_enter_userspace(entry, stackTop);
455 
456 	return B_OK;
457 		// never gets here
458 }
459 
460 
461 bool
462 arch_on_signal_stack(struct thread *thread)
463 {
464 	struct iframe *frame = get_current_iframe();
465 
466 	return frame->user_esp >= thread->signal_stack_base
467 		&& frame->user_esp < thread->signal_stack_base
468 			+ thread->signal_stack_size;
469 }
470 
471 
472 status_t
473 arch_setup_signal_frame(struct thread *thread, struct sigaction *action,
474 	int signal, int signalMask)
475 {
476 	struct iframe *frame = get_current_iframe();
477 	if (!IFRAME_IS_USER(frame)) {
478 		panic("arch_setup_signal_frame(): No user iframe!");
479 		return B_BAD_VALUE;
480 	}
481 
482 	uint32 *signalCode;
483 	uint32 *userRegs;
484 	struct vregs regs;
485 	uint32 buffer[6];
486 	status_t status;
487 
488 	// start stuffing stuff on the user stack
489 	uint32* userStack = get_signal_stack(thread, frame, signal);
490 
491 	// copy syscall restart info onto the user stack
492 	userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4;
493 	uint32 threadFlags = atomic_and(&thread->flags,
494 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
495 	if (user_memcpy(userStack, &threadFlags, 4) < B_OK
496 		|| user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK
497 		|| user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK)
498 		return B_BAD_ADDRESS;
499 	status = user_memcpy(userStack + 3, thread->syscall_restart.parameters,
500 		sizeof(thread->syscall_restart.parameters));
501 	if (status < B_OK)
502 		return status;
503 
504 	// store the saved regs onto the user stack
505 	regs.eip = frame->eip;
506 	regs.eflags = frame->flags;
507 	regs.eax = frame->eax;
508 	regs.ecx = frame->ecx;
509 	regs.edx = frame->edx;
510 	regs.ebp = frame->ebp;
511 	regs.esp = frame->esp;
512 	regs._reserved_1 = frame->user_esp;
513 	regs._reserved_2[0] = frame->edi;
514 	regs._reserved_2[1] = frame->esi;
515 	regs._reserved_2[2] = frame->ebx;
516 	i386_fnsave((void *)(&regs.xregs));
517 
518 	userStack -= (sizeof(struct vregs) + 3) / 4;
519 	userRegs = userStack;
520 	status = user_memcpy(userRegs, &regs, sizeof(regs));
521 	if (status < B_OK)
522 		return status;
523 
524 	// now store a code snippet on the stack
525 	userStack -= ((uint32)i386_end_return_from_signal + 3
526 		- (uint32)i386_return_from_signal) / 4;
527 	signalCode = userStack;
528 	status = user_memcpy(signalCode, (const void *)&i386_return_from_signal,
529 		((uint32)i386_end_return_from_signal
530 			- (uint32)i386_return_from_signal));
531 	if (status < B_OK)
532 		return status;
533 
534 	// now set up the final part
535 	buffer[0] = (uint32)signalCode;	// return address when sa_handler done
536 	buffer[1] = signal;				// arguments to sa_handler
537 	buffer[2] = (uint32)action->sa_userdata;
538 	buffer[3] = (uint32)userRegs;
539 
540 	buffer[4] = signalMask;			// Old signal mask to restore
541 	buffer[5] = (uint32)userRegs;	// Int frame + extra regs to restore
542 
543 	userStack -= sizeof(buffer) / 4;
544 
545 	status = user_memcpy(userStack, buffer, sizeof(buffer));
546 	if (status < B_OK)
547 		return status;
548 
549 	frame->user_esp = (uint32)userStack;
550 	frame->eip = (uint32)action->sa_handler;
551 
552 	return B_OK;
553 }
554 
555 
556 int64
557 arch_restore_signal_frame(void)
558 {
559 	struct thread *thread = thread_get_current_thread();
560 	struct iframe *frame = get_current_iframe();
561 	int32 signalMask;
562 	uint32 *userStack;
563 	struct vregs* regsPointer;
564 	struct vregs regs;
565 
566 	TRACE(("### arch_restore_signal_frame: entry\n"));
567 
568 	userStack = (uint32 *)frame->user_esp;
569 	if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK
570 		|| user_memcpy(&regsPointer, &userStack[1], 4) < B_OK
571 		|| user_memcpy(&regs, regsPointer, sizeof(vregs)) < B_OK) {
572 		return B_BAD_ADDRESS;
573 	}
574 
575 	uint32* syscallRestartInfo
576 		= (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4;
577 	uint32 threadFlags;
578 	if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK
579 		|| user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK
580 		|| user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK
581 		|| user_memcpy(thread->syscall_restart.parameters,
582 			syscallRestartInfo + 3,
583 			sizeof(thread->syscall_restart.parameters)) < B_OK) {
584 		return B_BAD_ADDRESS;
585 	}
586 
587 	// set restart/64bit return value flags from previous syscall
588 	atomic_and(&thread->flags,
589 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
590 	atomic_or(&thread->flags, threadFlags
591 		& (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
592 
593 	// TODO: Verify that just restoring the old signal mask is right! Bash for
594 	// instance changes the procmask in a signal handler. Those changes are
595 	// lost the way we do it.
596 	atomic_set(&thread->sig_block_mask, signalMask);
597 	update_current_thread_signals_flag();
598 
599 	frame->eip = regs.eip;
600 	frame->flags = regs.eflags;
601 	frame->eax = regs.eax;
602 	frame->ecx = regs.ecx;
603 	frame->edx = regs.edx;
604 	frame->ebp = regs.ebp;
605 	frame->esp = regs.esp;
606 	frame->user_esp = regs._reserved_1;
607 	frame->edi = regs._reserved_2[0];
608 	frame->esi = regs._reserved_2[1];
609 	frame->ebx = regs._reserved_2[2];
610 
611 	i386_frstor((void *)(&regs.xregs));
612 
613 	TRACE(("### arch_restore_signal_frame: exit\n"));
614 
615 	return (int64)frame->eax | ((int64)frame->edx << 32);
616 }
617 
618 
619 /**	Saves everything needed to restore the frame in the child fork in the
620  *	arch_fork_arg structure to be passed to arch_restore_fork_frame().
621  *	Also makes sure to return the right value.
622  */
623 
624 void
625 arch_store_fork_frame(struct arch_fork_arg *arg)
626 {
627 	struct iframe *frame = get_current_iframe();
628 
629 	// we need to copy the threads current iframe
630 	arg->iframe = *frame;
631 
632 	// we also want fork() to return 0 for the child
633 	arg->iframe.eax = 0;
634 }
635 
636 
637 /** Restores the frame from a forked team as specified by the provided
638  *	arch_fork_arg structure.
639  *	Needs to be called from within the child team, ie. instead of
640  *	arch_thread_enter_uspace() as thread "starter".
641  *	This function does not return to the caller, but will enter userland
642  *	in the child team at the same position where the parent team left of.
643  */
644 
645 void
646 arch_restore_fork_frame(struct arch_fork_arg *arg)
647 {
648 	struct thread *thread = thread_get_current_thread();
649 
650 	disable_interrupts();
651 
652 	i386_set_tss_and_kstack(thread->kernel_stack_top);
653 
654 	// set the CPU dependent GDT entry for TLS (set the current %fs register)
655 	set_tls_context(thread);
656 
657 	i386_restore_frame_from_syscall(arg->iframe);
658 }
659 
660 
661 void
662 arch_syscall_64_bit_return_value(void)
663 {
664 	struct thread* thread = thread_get_current_thread();
665 	atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN);
666 }
667