xref: /haiku/src/system/kernel/arch/x86/arch_thread.cpp (revision 82a8a20999118b748396cf16a33c47c3b0c0222d)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 #include <arch/thread.h>
10 
11 #include <string.h>
12 
13 #include <arch/user_debugger.h>
14 #include <arch_cpu.h>
15 #include <cpu.h>
16 #include <debug.h>
17 #include <kernel.h>
18 #include <ksignal.h>
19 #include <int.h>
20 #include <team.h>
21 #include <thread.h>
22 #include <tls.h>
23 #include <tracing.h>
24 #include <vm_address_space.h>
25 #include <vm_types.h>
26 
27 #include "x86_paging.h"
28 
29 
30 //#define TRACE_ARCH_THREAD
31 #ifdef TRACE_ARCH_THREAD
32 #	define TRACE(x) dprintf x
33 #else
34 #	define TRACE(x) ;
35 #endif
36 
37 
38 #ifdef SYSCALL_TRACING
39 
40 namespace SyscallTracing {
41 
42 class RestartSyscall : public AbstractTraceEntry {
43 	public:
44 		RestartSyscall()
45 		{
46 			Initialized();
47 		}
48 
49 		virtual void AddDump(TraceOutput& out)
50 		{
51 			out.Print("syscall restart");
52 		}
53 };
54 
55 }
56 
57 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
58 
59 #else
60 #	define TSYSCALL(x)
61 #endif	// SYSCALL_TRACING
62 
63 
64 // from arch_interrupts.S
65 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset);
66 extern "C" void i386_restore_frame_from_syscall(struct iframe frame);
67 
68 // from arch_cpu.c
69 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
70 extern bool gHasSSE;
71 
72 static struct arch_thread sInitialState _ALIGNED(16);
73 	// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
74 
75 
76 status_t
77 arch_thread_init(struct kernel_args *args)
78 {
79 	// save one global valid FPU state; it will be copied in the arch dependent
80 	// part of each new thread
81 
82 	asm volatile ("clts; fninit; fnclex;");
83 	if (gHasSSE)
84 		i386_fxsave(sInitialState.fpu_state);
85 	else
86 		i386_fnsave(sInitialState.fpu_state);
87 
88 	return B_OK;
89 }
90 
91 
92 static struct iframe *
93 find_previous_iframe(struct thread *thread, addr_t frame)
94 {
95 	// iterate backwards through the stack frames, until we hit an iframe
96 	while (frame >= thread->kernel_stack_base
97 		&& frame < thread->kernel_stack_top) {
98 		addr_t previousFrame = *(addr_t*)frame;
99 		if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) {
100 			if (previousFrame == 0)
101 				return NULL;
102 			return (struct iframe*)frame;
103 		}
104 
105 		frame = previousFrame;
106 	}
107 
108 	return NULL;
109 }
110 
111 
112 static struct iframe*
113 get_previous_iframe(struct iframe* frame)
114 {
115 	if (frame == NULL)
116 		return NULL;
117 
118 	return find_previous_iframe(thread_get_current_thread(), frame->ebp);
119 }
120 
121 
122 /*!
123 	Returns the current iframe structure of the running thread.
124 	This function must only be called in a context where it's actually
125 	sure that such iframe exists; ie. from syscalls, but usually not
126 	from standard kernel threads.
127 */
128 static struct iframe*
129 get_current_iframe(void)
130 {
131 	return find_previous_iframe(thread_get_current_thread(), x86_read_ebp());
132 }
133 
134 
135 /*!
136 	\brief Returns the current thread's topmost (i.e. most recent)
137 	userland->kernel transition iframe (usually the first one, save for
138 	interrupts in signal handlers).
139 	\return The iframe, or \c NULL, if there is no such iframe (e.g. when
140 			the thread is a kernel thread).
141 */
142 struct iframe *
143 i386_get_user_iframe(void)
144 {
145 	struct iframe* frame = get_current_iframe();
146 
147 	while (frame != NULL) {
148 		if (IFRAME_IS_USER(frame))
149 			return frame;
150 		frame = get_previous_iframe(frame);
151 	}
152 
153 	return NULL;
154 }
155 
156 
157 /*!	\brief Like i386_get_user_iframe(), just for the given thread.
158 	The thread must not be running and the threads spinlock must be held.
159 */
160 struct iframe *
161 i386_get_thread_user_iframe(struct thread *thread)
162 {
163 	if (thread->state == B_THREAD_RUNNING)
164 		return NULL;
165 
166 	// read %ebp from the thread's stack stored by a pushad
167 	addr_t ebp = thread->arch_info.current_stack.esp[2];
168 
169 	// find the user iframe
170 	struct iframe *frame = find_previous_iframe(thread, ebp);
171 
172 	while (frame != NULL) {
173 		if (IFRAME_IS_USER(frame))
174 			return frame;
175 		frame = get_previous_iframe(frame);
176 	}
177 
178 	return NULL;
179 }
180 
181 
182 struct iframe *
183 i386_get_current_iframe(void)
184 {
185 	return get_current_iframe();
186 }
187 
188 
189 void *
190 x86_next_page_directory(struct thread *from, struct thread *to)
191 {
192 	vm_address_space* toAddressSpace = to->team->address_space;
193 	if (from->team->address_space == toAddressSpace) {
194 		// don't change the pgdir, same address space
195 		return NULL;
196 	}
197 
198 	if (toAddressSpace == NULL)
199 		toAddressSpace = vm_kernel_address_space();
200 
201 	return i386_translation_map_get_pgdir(&toAddressSpace->translation_map);
202 }
203 
204 
205 static inline void
206 set_fs_register(uint32 segment)
207 {
208 	asm("movl %0,%%fs" :: "r" (segment));
209 }
210 
211 
212 static void
213 set_tls_context(struct thread *thread)
214 {
215 	int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT;
216 
217 	set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage);
218 	set_fs_register((entry << 3) | DPL_USER);
219 }
220 
221 
222 void
223 x86_restart_syscall(struct iframe* frame)
224 {
225 	struct thread* thread = thread_get_current_thread();
226 
227 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
228 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
229 
230 	frame->eax = frame->orig_eax;
231 	frame->edx = frame->orig_edx;
232 	frame->eip -= 2;
233 		// undoes the "int $99"/"sysenter"/"syscall" instruction
234 		// (so that it'll be executed again)
235 
236 	TSYSCALL(RestartSyscall());
237 }
238 
239 
240 static uint32 *
241 get_signal_stack(struct thread *thread, struct iframe *frame, int signal)
242 {
243 	// use the alternate signal stack if we should and can
244 	if (thread->signal_stack_enabled
245 		&& (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0
246 		&& (frame->user_esp < thread->signal_stack_base
247 			|| frame->user_esp >= thread->signal_stack_base
248 				+ thread->signal_stack_size)) {
249 		return (uint32 *)(thread->signal_stack_base
250 			+ thread->signal_stack_size);
251 	}
252 
253 	return (uint32 *)frame->user_esp;
254 }
255 
256 
257 //	#pragma mark -
258 
259 
260 status_t
261 arch_team_init_team_struct(struct team *p, bool kernel)
262 {
263 	return B_OK;
264 }
265 
266 
267 status_t
268 arch_thread_init_thread_struct(struct thread *thread)
269 {
270 	// set up an initial state (stack & fpu)
271 	memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread));
272 	return B_OK;
273 }
274 
275 
276 status_t
277 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void),
278 	void (*entry_func)(void), void (*exit_func)(void))
279 {
280 	addr_t *kstack = (addr_t *)t->kernel_stack_base;
281 	addr_t *kstack_top = (addr_t *)t->kernel_stack_top;
282 	int i;
283 
284 	TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n",
285 		kstack, start_func, entry_func));
286 
287 	// clear the kernel stack
288 #ifdef DEBUG_KERNEL_STACKS
289 #	ifdef STACK_GROWS_DOWNWARDS
290 	memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0,
291 		KERNEL_STACK_SIZE);
292 #	else
293 	memset(kstack, 0, KERNEL_STACK_SIZE);
294 #	endif
295 #else
296 	memset(kstack, 0, KERNEL_STACK_SIZE);
297 #endif
298 
299 	// set the final return address to be thread_kthread_exit
300 	kstack_top--;
301 	*kstack_top = (unsigned int)exit_func;
302 
303 	// set the return address to be the start of the first function
304 	kstack_top--;
305 	*kstack_top = (unsigned int)start_func;
306 
307 	// set the return address to be the start of the entry (thread setup)
308 	// function
309 	kstack_top--;
310 	*kstack_top = (unsigned int)entry_func;
311 
312 	// simulate pushfl
313 //	kstack_top--;
314 //	*kstack_top = 0x00; // interrupts still disabled after the switch
315 
316 	// simulate initial popad
317 	for (i = 0; i < 8; i++) {
318 		kstack_top--;
319 		*kstack_top = 0;
320 	}
321 
322 	// save the stack position
323 	t->arch_info.current_stack.esp = kstack_top;
324 	t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG;
325 
326 	return B_OK;
327 }
328 
329 
330 /** Initializes the user-space TLS local storage pointer in
331  *	the thread structure, and the reserved TLS slots.
332  *
333  *	Is called from _create_user_thread_kentry().
334  */
335 
336 status_t
337 arch_thread_init_tls(struct thread *thread)
338 {
339 	uint32 tls[TLS_USER_THREAD_SLOT + 1];
340 
341 	thread->user_local_storage = thread->user_stack_base
342 		+ thread->user_stack_size;
343 
344 	// initialize default TLS fields
345 	memset(tls, 0, sizeof(tls));
346 	tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage;
347 	tls[TLS_THREAD_ID_SLOT] = thread->id;
348 	tls[TLS_USER_THREAD_SLOT] = (addr_t)thread->user_thread;
349 
350 	return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls));
351 }
352 
353 
354 void
355 arch_thread_switch_kstack_and_call(struct thread *t, addr_t new_kstack,
356 	void (*func)(void *), void *arg)
357 {
358 	i386_switch_stack_and_call(new_kstack, func, arg);
359 }
360 
361 
362 void
363 arch_thread_context_switch(struct thread *from, struct thread *to)
364 {
365 	i386_set_tss_and_kstack(to->kernel_stack_top);
366 
367 	// set TLS GDT entry to the current thread - since this action is
368 	// dependent on the current CPU, we have to do it here
369 	if (to->user_local_storage != 0)
370 		set_tls_context(to);
371 
372 	struct cpu_ent* cpuData = to->cpu;
373 	vm_translation_map_arch_info* activeMap
374 		= cpuData->arch.active_translation_map;
375 	vm_address_space* toAddressSpace = to->team->address_space;
376 
377 	addr_t newPageDirectory;
378 	vm_translation_map_arch_info* toMap;
379 	if (toAddressSpace != NULL
380 		&& (toMap = toAddressSpace->translation_map.arch_data) != activeMap) {
381 		// update on which CPUs the address space is used
382 		int cpu = cpuData->cpu_num;
383 		atomic_and(&activeMap->active_on_cpus, ~((uint32)1 << cpu));
384 		atomic_or(&toMap->active_on_cpus, (uint32)1 << cpu);
385 
386 		// assign the new map to the CPU
387 		activeMap->RemoveReference();
388 		toMap->AddReference();
389 		cpuData->arch.active_translation_map = toMap;
390 
391 		// get the new page directory
392 		newPageDirectory = (addr_t)toMap->pgdir_phys;
393 	} else
394 		newPageDirectory = 0;
395 
396 	gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
397 	i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
398 }
399 
400 
401 void
402 arch_thread_dump_info(void *info)
403 {
404 	struct arch_thread *at = (struct arch_thread *)info;
405 
406 	kprintf("\tesp: %p\n", at->current_stack.esp);
407 	kprintf("\tss: %p\n", at->current_stack.ss);
408 	kprintf("\tfpu_state at %p\n", at->fpu_state);
409 }
410 
411 
412 /** Sets up initial thread context and enters user space
413  */
414 
415 status_t
416 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1,
417 	void *args2)
418 {
419 	addr_t stackTop = t->user_stack_base + t->user_stack_size;
420 	uint32 codeSize = (addr_t)x86_end_userspace_thread_exit
421 		- (addr_t)x86_userspace_thread_exit;
422 	uint32 args[3];
423 
424 	TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n",
425 		entry, args1, args2, stackTop));
426 
427 	// copy the little stub that calls exit_thread() when the thread entry
428 	// function returns, as well as the arguments of the entry function
429 	stackTop -= codeSize;
430 
431 	if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK)
432 		return B_BAD_ADDRESS;
433 
434 	args[0] = stackTop;
435 	args[1] = (uint32)args1;
436 	args[2] = (uint32)args2;
437 	stackTop -= sizeof(args);
438 
439 	if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK)
440 		return B_BAD_ADDRESS;
441 
442 	thread_at_kernel_exit();
443 		// also disables interrupts
444 
445 	i386_set_tss_and_kstack(t->kernel_stack_top);
446 
447 	// set the CPU dependent GDT entry for TLS
448 	set_tls_context(t);
449 
450 	x86_enter_userspace(entry, stackTop);
451 
452 	return B_OK;
453 		// never gets here
454 }
455 
456 
457 bool
458 arch_on_signal_stack(struct thread *thread)
459 {
460 	struct iframe *frame = get_current_iframe();
461 
462 	return frame->user_esp >= thread->signal_stack_base
463 		&& frame->user_esp < thread->signal_stack_base
464 			+ thread->signal_stack_size;
465 }
466 
467 
468 status_t
469 arch_setup_signal_frame(struct thread *thread, struct sigaction *action,
470 	int signal, int signalMask)
471 {
472 	struct iframe *frame = get_current_iframe();
473 	if (!IFRAME_IS_USER(frame)) {
474 		panic("arch_setup_signal_frame(): No user iframe!");
475 		return B_BAD_VALUE;
476 	}
477 
478 	uint32 *signalCode;
479 	uint32 *userRegs;
480 	struct vregs regs;
481 	uint32 buffer[6];
482 	status_t status;
483 
484 	// start stuffing stuff on the user stack
485 	uint32* userStack = get_signal_stack(thread, frame, signal);
486 
487 	// copy syscall restart info onto the user stack
488 	userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4;
489 	uint32 threadFlags = atomic_and(&thread->flags,
490 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
491 	if (user_memcpy(userStack, &threadFlags, 4) < B_OK
492 		|| user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK
493 		|| user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK)
494 		return B_BAD_ADDRESS;
495 	status = user_memcpy(userStack + 3, thread->syscall_restart.parameters,
496 		sizeof(thread->syscall_restart.parameters));
497 	if (status < B_OK)
498 		return status;
499 
500 	// store the saved regs onto the user stack
501 	regs.eip = frame->eip;
502 	regs.eflags = frame->flags;
503 	regs.eax = frame->eax;
504 	regs.ecx = frame->ecx;
505 	regs.edx = frame->edx;
506 	regs.ebp = frame->ebp;
507 	regs.esp = frame->esp;
508 	regs._reserved_1 = frame->user_esp;
509 	regs._reserved_2[0] = frame->edi;
510 	regs._reserved_2[1] = frame->esi;
511 	regs._reserved_2[2] = frame->ebx;
512 	i386_fnsave((void *)(&regs.xregs));
513 
514 	userStack -= (sizeof(struct vregs) + 3) / 4;
515 	userRegs = userStack;
516 	status = user_memcpy(userRegs, &regs, sizeof(regs));
517 	if (status < B_OK)
518 		return status;
519 
520 	// now store a code snippet on the stack
521 	userStack -= ((uint32)i386_end_return_from_signal + 3
522 		- (uint32)i386_return_from_signal) / 4;
523 	signalCode = userStack;
524 	status = user_memcpy(signalCode, (const void *)&i386_return_from_signal,
525 		((uint32)i386_end_return_from_signal
526 			- (uint32)i386_return_from_signal));
527 	if (status < B_OK)
528 		return status;
529 
530 	// now set up the final part
531 	buffer[0] = (uint32)signalCode;	// return address when sa_handler done
532 	buffer[1] = signal;				// arguments to sa_handler
533 	buffer[2] = (uint32)action->sa_userdata;
534 	buffer[3] = (uint32)userRegs;
535 
536 	buffer[4] = signalMask;			// Old signal mask to restore
537 	buffer[5] = (uint32)userRegs;	// Int frame + extra regs to restore
538 
539 	userStack -= sizeof(buffer) / 4;
540 
541 	status = user_memcpy(userStack, buffer, sizeof(buffer));
542 	if (status < B_OK)
543 		return status;
544 
545 	frame->user_esp = (uint32)userStack;
546 	frame->eip = (uint32)action->sa_handler;
547 
548 	return B_OK;
549 }
550 
551 
552 int64
553 arch_restore_signal_frame(void)
554 {
555 	struct thread *thread = thread_get_current_thread();
556 	struct iframe *frame = get_current_iframe();
557 	int32 signalMask;
558 	uint32 *userStack;
559 	struct vregs* regsPointer;
560 	struct vregs regs;
561 
562 	TRACE(("### arch_restore_signal_frame: entry\n"));
563 
564 	userStack = (uint32 *)frame->user_esp;
565 	if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK
566 		|| user_memcpy(&regsPointer, &userStack[1], 4) < B_OK
567 		|| user_memcpy(&regs, regsPointer, sizeof(vregs)) < B_OK) {
568 		return B_BAD_ADDRESS;
569 	}
570 
571 	uint32* syscallRestartInfo
572 		= (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4;
573 	uint32 threadFlags;
574 	if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK
575 		|| user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK
576 		|| user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK
577 		|| user_memcpy(thread->syscall_restart.parameters,
578 			syscallRestartInfo + 3,
579 			sizeof(thread->syscall_restart.parameters)) < B_OK) {
580 		return B_BAD_ADDRESS;
581 	}
582 
583 	// set restart/64bit return value flags from previous syscall
584 	atomic_and(&thread->flags,
585 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
586 	atomic_or(&thread->flags, threadFlags
587 		& (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
588 
589 	// TODO: Verify that just restoring the old signal mask is right! Bash for
590 	// instance changes the procmask in a signal handler. Those changes are
591 	// lost the way we do it.
592 	atomic_set(&thread->sig_block_mask, signalMask);
593 	update_current_thread_signals_flag();
594 
595 	frame->eip = regs.eip;
596 	frame->flags = regs.eflags;
597 	frame->eax = regs.eax;
598 	frame->ecx = regs.ecx;
599 	frame->edx = regs.edx;
600 	frame->ebp = regs.ebp;
601 	frame->esp = regs.esp;
602 	frame->user_esp = regs._reserved_1;
603 	frame->edi = regs._reserved_2[0];
604 	frame->esi = regs._reserved_2[1];
605 	frame->ebx = regs._reserved_2[2];
606 
607 	i386_frstor((void *)(&regs.xregs));
608 
609 	TRACE(("### arch_restore_signal_frame: exit\n"));
610 
611 	return (int64)frame->eax | ((int64)frame->edx << 32);
612 }
613 
614 
615 /**	Saves everything needed to restore the frame in the child fork in the
616  *	arch_fork_arg structure to be passed to arch_restore_fork_frame().
617  *	Also makes sure to return the right value.
618  */
619 
620 void
621 arch_store_fork_frame(struct arch_fork_arg *arg)
622 {
623 	struct iframe *frame = get_current_iframe();
624 
625 	// we need to copy the threads current iframe
626 	arg->iframe = *frame;
627 
628 	// we also want fork() to return 0 for the child
629 	arg->iframe.eax = 0;
630 }
631 
632 
633 /** Restores the frame from a forked team as specified by the provided
634  *	arch_fork_arg structure.
635  *	Needs to be called from within the child team, ie. instead of
636  *	arch_thread_enter_uspace() as thread "starter".
637  *	This function does not return to the caller, but will enter userland
638  *	in the child team at the same position where the parent team left of.
639  */
640 
641 void
642 arch_restore_fork_frame(struct arch_fork_arg *arg)
643 {
644 	struct thread *thread = thread_get_current_thread();
645 
646 	disable_interrupts();
647 
648 	i386_set_tss_and_kstack(thread->kernel_stack_top);
649 
650 	// set the CPU dependent GDT entry for TLS (set the current %fs register)
651 	set_tls_context(thread);
652 
653 	i386_restore_frame_from_syscall(arg->iframe);
654 }
655 
656 
657 void
658 arch_syscall_64_bit_return_value(void)
659 {
660 	struct thread* thread = thread_get_current_thread();
661 	atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN);
662 }
663