xref: /haiku/src/system/kernel/arch/x86/arch_thread.cpp (revision 89755088d790ff4fe36f8aa77dacb2bd15507108)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 #include <arch/thread.h>
10 
11 #include <arch/user_debugger.h>
12 #include <arch_cpu.h>
13 #include <debug.h>
14 #include <kernel.h>
15 #include <ksignal.h>
16 #include <int.h>
17 #include <thread.h>
18 #include <tls.h>
19 #include <tracing.h>
20 #include <vm_address_space.h>
21 #include <vm_types.h>
22 
23 #include <string.h>
24 
25 
26 //#define TRACE_ARCH_THREAD
27 #ifdef TRACE_ARCH_THREAD
28 #	define TRACE(x) dprintf x
29 #else
30 #	define TRACE(x) ;
31 #endif
32 
33 
34 #ifdef SYSCALL_TRACING
35 
36 namespace SyscallTracing {
37 
38 class RestartSyscall : public AbstractTraceEntry {
39 	public:
40 		RestartSyscall()
41 		{
42 			Initialized();
43 		}
44 
45 		virtual void AddDump(TraceOutput& out)
46 		{
47 			out.Print("syscall restart");
48 		}
49 };
50 
51 }
52 
53 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
54 
55 #else
56 #	define TSYSCALL(x)
57 #endif	// SYSCALL_TRACING
58 
59 
60 // from arch_interrupts.S
61 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset);
62 extern "C" void i386_restore_frame_from_syscall(struct iframe frame);
63 
64 // from arch_cpu.c
65 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState);
66 extern bool gHasSSE;
67 
68 static struct arch_thread sInitialState _ALIGNED(16);
69 	// the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it
70 
71 
72 status_t
73 arch_thread_init(struct kernel_args *args)
74 {
75 	// save one global valid FPU state; it will be copied in the arch dependent
76 	// part of each new thread
77 
78 	asm volatile ("clts; fninit; fnclex;");
79 	if (gHasSSE)
80 		i386_fxsave(sInitialState.fpu_state);
81 	else
82 		i386_fnsave(sInitialState.fpu_state);
83 
84 	return B_OK;
85 }
86 
87 
88 static struct iframe *
89 find_previous_iframe(addr_t frame)
90 {
91 	struct thread *thread = thread_get_current_thread();
92 
93 	// iterate backwards through the stack frames, until we hit an iframe
94 	while (frame >= thread->kernel_stack_base
95 		&& frame < thread->kernel_stack_base + KERNEL_STACK_SIZE) {
96 		addr_t previousFrame = *(addr_t*)frame;
97 		if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) {
98 			if (previousFrame == 0)
99 				return NULL;
100 			return (struct iframe*)frame;
101 		}
102 
103 		frame = previousFrame;
104 	}
105 
106 	return NULL;
107 }
108 
109 
110 static struct iframe*
111 get_previous_iframe(struct iframe* frame)
112 {
113 	if (frame == NULL)
114 		return NULL;
115 
116 	return find_previous_iframe(frame->ebp);
117 }
118 
119 
120 /*!
121 	Returns the current iframe structure of the running thread.
122 	This function must only be called in a context where it's actually
123 	sure that such iframe exists; ie. from syscalls, but usually not
124 	from standard kernel threads.
125 */
126 static struct iframe*
127 get_current_iframe(void)
128 {
129 	return find_previous_iframe(x86_read_ebp());
130 }
131 
132 
133 /*!
134 	\brief Returns the current thread's topmost (i.e. most recent)
135 	userland->kernel transition iframe (usually the first one, save for
136 	interrupts in signal handlers).
137 	\return The iframe, or \c NULL, if there is no such iframe (e.g. when
138 			the thread is a kernel thread).
139 */
140 struct iframe *
141 i386_get_user_iframe(void)
142 {
143 	struct iframe* frame = get_current_iframe();
144 
145 	while (frame != NULL) {
146 		if (frame->cs == USER_CODE_SEG)
147 			return frame;
148 		frame = get_previous_iframe(frame);
149 	}
150 
151 	return NULL;
152 }
153 
154 
155 void *
156 x86_next_page_directory(struct thread *from, struct thread *to)
157 {
158 	if (from->team->address_space != NULL && to->team->address_space != NULL) {
159 		// they are both user space threads
160 		if (from->team == to->team) {
161 			// dont change the pgdir, same address space
162 			return NULL;
163 		}
164 		// switching to a new address space
165 		return i386_translation_map_get_pgdir(&to->team->address_space->translation_map);
166 	} else if (from->team->address_space == NULL && to->team->address_space == NULL) {
167 		// they must both be kernel space threads
168 		return NULL;
169 	} else if (to->team->address_space == NULL) {
170 		// the one we're switching to is kernel space
171 		return i386_translation_map_get_pgdir(&vm_kernel_address_space()->translation_map);
172 	}
173 
174 	return i386_translation_map_get_pgdir(&to->team->address_space->translation_map);
175 }
176 
177 
178 static inline void
179 set_fs_register(uint32 segment)
180 {
181 	asm("movl %0,%%fs" :: "r" (segment));
182 }
183 
184 
185 static void
186 set_tls_context(struct thread *thread)
187 {
188 	int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT;
189 
190 	set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage);
191 	set_fs_register((entry << 3) | DPL_USER);
192 }
193 
194 
195 void
196 x86_restart_syscall(struct iframe* frame)
197 {
198 	struct thread* thread = thread_get_current_thread();
199 
200 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
201 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
202 
203 	frame->eax = frame->orig_eax;
204 	frame->edx = frame->orig_edx;
205 	frame->eip -= 2;
206 		// undoes the "int $99"/"sysenter"/"syscall" instruction
207 		// (so that it'll be executed again)
208 
209 	TSYSCALL(RestartSyscall());
210 }
211 
212 
213 static uint32 *
214 get_signal_stack(struct thread *thread, struct iframe *frame, int signal)
215 {
216 	// use the alternate signal stack if we should and can
217 	if (thread->signal_stack_enabled
218 		&& (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0
219 		&& (frame->user_esp < thread->signal_stack_base
220 			|| frame->user_esp >= thread->signal_stack_base
221 				+ thread->signal_stack_size)) {
222 		return (uint32 *)(thread->signal_stack_base
223 			+ thread->signal_stack_size);
224 	}
225 
226 	return (uint32 *)frame->user_esp;
227 }
228 
229 
230 //	#pragma mark -
231 
232 
233 status_t
234 arch_team_init_team_struct(struct team *p, bool kernel)
235 {
236 	return B_OK;
237 }
238 
239 
240 status_t
241 arch_thread_init_thread_struct(struct thread *thread)
242 {
243 	// set up an initial state (stack & fpu)
244 	memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread));
245 	return B_OK;
246 }
247 
248 
249 status_t
250 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void),
251 	void (*entry_func)(void), void (*exit_func)(void))
252 {
253 	addr_t *kstack = (addr_t *)t->kernel_stack_base;
254 	addr_t *kstack_top = kstack + KERNEL_STACK_SIZE / sizeof(addr_t);
255 	int i;
256 
257 	TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n",
258 		kstack, start_func, entry_func));
259 
260 	// clear the kernel stack
261 #ifdef DEBUG_KERNEL_STACKS
262 #	ifdef STACK_GROWS_DOWNWARDS
263 	memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0,
264 		KERNEL_STACK_SIZE - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE);
265 #	else
266 	memset(kstack, 0, KERNEL_STACK_SIZE - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE);
267 #	endif
268 #else
269 	memset(kstack, 0, KERNEL_STACK_SIZE);
270 #endif
271 
272 	// set the final return address to be thread_kthread_exit
273 	kstack_top--;
274 	*kstack_top = (unsigned int)exit_func;
275 
276 	// set the return address to be the start of the first function
277 	kstack_top--;
278 	*kstack_top = (unsigned int)start_func;
279 
280 	// set the return address to be the start of the entry (thread setup)
281 	// function
282 	kstack_top--;
283 	*kstack_top = (unsigned int)entry_func;
284 
285 	// simulate pushfl
286 //	kstack_top--;
287 //	*kstack_top = 0x00; // interrupts still disabled after the switch
288 
289 	// simulate initial popad
290 	for (i = 0; i < 8; i++) {
291 		kstack_top--;
292 		*kstack_top = 0;
293 	}
294 
295 	// save the stack position
296 	t->arch_info.current_stack.esp = kstack_top;
297 	t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG;
298 
299 	return B_OK;
300 }
301 
302 
303 /** Initializes the user-space TLS local storage pointer in
304  *	the thread structure, and the reserved TLS slots.
305  *
306  *	Is called from _create_user_thread_kentry().
307  */
308 
309 status_t
310 arch_thread_init_tls(struct thread *thread)
311 {
312 	uint32 tls[TLS_THREAD_ID_SLOT + 1];
313 	int32 i;
314 
315 	thread->user_local_storage = thread->user_stack_base
316 		+ thread->user_stack_size;
317 
318 	// initialize default TLS fields
319 	tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage;
320 	tls[TLS_THREAD_ID_SLOT] = thread->id;
321 
322 	return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls));
323 }
324 
325 
326 void
327 arch_thread_switch_kstack_and_call(struct thread *t, addr_t new_kstack,
328 	void (*func)(void *), void *arg)
329 {
330 	i386_switch_stack_and_call(new_kstack, func, arg);
331 }
332 
333 
334 void
335 arch_thread_context_switch(struct thread *from, struct thread *to)
336 {
337 	addr_t newPageDirectory;
338 
339 #if 0
340 	int i;
341 
342 	dprintf("arch_thread_context_switch: cpu %d 0x%x -> 0x%x, aspace 0x%x -> 0x%x, old stack = 0x%x:0x%x, stack = 0x%x:0x%x\n",
343 		smp_get_current_cpu(), t_from->id, t_to->id,
344 		t_from->team->address_space, t_to->team->address_space,
345 		t_from->arch_info.current_stack.ss, t_from->arch_info.current_stack.esp,
346 		t_to->arch_info.current_stack.ss, t_to->arch_info.current_stack.esp);
347 #endif
348 #if 0
349 	for (i = 0; i < 11; i++)
350 		dprintf("*esp[%d] (0x%x) = 0x%x\n", i, ((unsigned int *)new_at->esp + i), *((unsigned int *)new_at->esp + i));
351 #endif
352 	i386_set_tss_and_kstack(to->kernel_stack_base + KERNEL_STACK_SIZE);
353 
354 	// set TLS GDT entry to the current thread - since this action is
355 	// dependent on the current CPU, we have to do it here
356 	if (to->user_local_storage != 0)
357 		set_tls_context(to);
358 
359 	newPageDirectory = (addr_t)x86_next_page_directory(from, to);
360 
361 	if ((newPageDirectory % B_PAGE_SIZE) != 0)
362 		panic("arch_thread_context_switch: bad pgdir 0x%lx\n", newPageDirectory);
363 
364 	gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state);
365 	i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory);
366 }
367 
368 
369 void
370 arch_thread_dump_info(void *info)
371 {
372 	struct arch_thread *at = (struct arch_thread *)info;
373 
374 	kprintf("\tesp: %p\n", at->current_stack.esp);
375 	kprintf("\tss: %p\n", at->current_stack.ss);
376 	kprintf("\tfpu_state at %p\n", at->fpu_state);
377 }
378 
379 
380 /** Sets up initial thread context and enters user space
381  */
382 
383 status_t
384 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1,
385 	void *args2)
386 {
387 	addr_t stackTop = t->user_stack_base + t->user_stack_size;
388 	uint32 codeSize = (addr_t)x86_end_userspace_thread_exit
389 		- (addr_t)x86_userspace_thread_exit;
390 	uint32 args[3];
391 
392 	TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n",
393 		entry, args1, args2, stackTop));
394 
395 	// copy the little stub that calls exit_thread() when the thread entry
396 	// function returns, as well as the arguments of the entry function
397 	stackTop -= codeSize;
398 
399 	if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK)
400 		return B_BAD_ADDRESS;
401 
402 	args[0] = stackTop;
403 	args[1] = (uint32)args1;
404 	args[2] = (uint32)args2;
405 	stackTop -= sizeof(args);
406 
407 	if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK)
408 		return B_BAD_ADDRESS;
409 
410 	disable_interrupts();
411 
412 	i386_set_tss_and_kstack(t->kernel_stack_base + KERNEL_STACK_SIZE);
413 
414 	// set the CPU dependent GDT entry for TLS
415 	set_tls_context(t);
416 
417 	x86_enter_userspace(entry, stackTop);
418 
419 	return B_OK;
420 		// never gets here
421 }
422 
423 
424 bool
425 arch_on_signal_stack(struct thread *thread)
426 {
427 	struct iframe *frame = get_current_iframe();
428 
429 	return frame->user_esp >= thread->signal_stack_base
430 		&& frame->user_esp < thread->signal_stack_base
431 			+ thread->signal_stack_size;
432 }
433 
434 
435 status_t
436 arch_setup_signal_frame(struct thread *thread, struct sigaction *action,
437 	int signal, int signalMask)
438 {
439 	struct iframe *frame = get_current_iframe();
440 	uint32 *signalCode;
441 	uint32 *userRegs;
442 	struct vregs regs;
443 	uint32 buffer[6];
444 	status_t status;
445 
446 	// start stuffing stuff on the user stack
447 	uint32* userStack = get_signal_stack(thread, frame, signal);
448 
449 	// copy syscall restart info onto the user stack
450 	userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4;
451 	uint32 threadFlags = atomic_and(&thread->flags,
452 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
453 	if (user_memcpy(userStack, &threadFlags, 4) < B_OK
454 		|| user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK
455 		|| user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK)
456 		return B_BAD_ADDRESS;
457 	status = user_memcpy(userStack + 3, thread->syscall_restart.parameters,
458 		sizeof(thread->syscall_restart.parameters));
459 	if (status < B_OK)
460 		return status;
461 
462 	// store the saved regs onto the user stack
463 	regs.eip = frame->eip;
464 	regs.eflags = frame->flags;
465 	regs.eax = frame->eax;
466 	regs.ecx = frame->ecx;
467 	regs.edx = frame->edx;
468 	regs.esp = frame->esp;
469 	regs._reserved_1 = frame->user_esp;
470 	regs._reserved_2[0] = frame->edi;
471 	regs._reserved_2[1] = frame->esi;
472 	regs._reserved_2[2] = frame->ebp;
473 	i386_fnsave((void *)(&regs.xregs));
474 
475 	userStack -= (sizeof(struct vregs) + 3) / 4;
476 	userRegs = userStack;
477 	status = user_memcpy(userRegs, &regs, sizeof(regs));
478 	if (status < B_OK)
479 		return status;
480 
481 	// now store a code snippet on the stack
482 	userStack -= ((uint32)i386_end_return_from_signal + 3
483 		- (uint32)i386_return_from_signal) / 4;
484 	signalCode = userStack;
485 	status = user_memcpy(signalCode, (const void *)&i386_return_from_signal,
486 		((uint32)i386_end_return_from_signal
487 			- (uint32)i386_return_from_signal));
488 	if (status < B_OK)
489 		return status;
490 
491 	// now set up the final part
492 	buffer[0] = (uint32)signalCode;	// return address when sa_handler done
493 	buffer[1] = signal;				// arguments to sa_handler
494 	buffer[2] = (uint32)action->sa_userdata;
495 	buffer[3] = (uint32)userRegs;
496 
497 	buffer[4] = signalMask;			// Old signal mask to restore
498 	buffer[5] = (uint32)userRegs;	// Int frame + extra regs to restore
499 
500 	userStack -= sizeof(buffer) / 4;
501 
502 	status = user_memcpy(userStack, buffer, sizeof(buffer));
503 	if (status < B_OK)
504 		return status;
505 
506 	frame->user_esp = (uint32)userStack;
507 	frame->eip = (uint32)action->sa_handler;
508 
509 	return B_OK;
510 }
511 
512 
513 int64
514 arch_restore_signal_frame(void)
515 {
516 	struct thread *thread = thread_get_current_thread();
517 	struct iframe *frame = get_current_iframe();
518 	int32 signalMask;
519 	uint32 *userStack;
520 	struct vregs* regsPointer;
521 	struct vregs regs;
522 
523 	TRACE(("### arch_restore_signal_frame: entry\n"));
524 
525 	userStack = (uint32 *)frame->user_esp;
526 	if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK
527 		|| user_memcpy(&regsPointer, &userStack[1], 4) < B_OK
528 		|| user_memcpy(&regs, regsPointer, sizeof(vregs)) < B_OK) {
529 		return B_BAD_ADDRESS;
530 	}
531 
532 	uint32* syscallRestartInfo
533 		= (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4;
534 	uint32 threadFlags;
535 	if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK
536 		|| user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK
537 		|| user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK
538 		|| user_memcpy(thread->syscall_restart.parameters,
539 			syscallRestartInfo + 3,
540 			sizeof(thread->syscall_restart.parameters)) < B_OK) {
541 		return B_BAD_ADDRESS;
542 	}
543 
544 	// set restart/64bit return value flags from previous syscall
545 	atomic_and(&thread->flags,
546 		~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
547 	atomic_or(&thread->flags, threadFlags
548 		& (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN));
549 
550 	// TODO: Verify that just restoring the old signal mask is right! Bash for
551 	// instance changes the procmask in a signal handler. Those changes are
552 	// lost the way we do it.
553 	atomic_set(&thread->sig_block_mask, signalMask);
554 	update_current_thread_signals_flag();
555 
556 	frame->eip = regs.eip;
557 	frame->flags = regs.eflags;
558 	frame->eax = regs.eax;
559 	frame->ecx = regs.ecx;
560 	frame->edx = regs.edx;
561 	frame->esp = regs.esp;
562 	frame->user_esp = regs._reserved_1;
563 	frame->edi = regs._reserved_2[0];
564 	frame->esi = regs._reserved_2[1];
565 	frame->ebp = regs._reserved_2[2];
566 
567 	i386_frstor((void *)(&regs.xregs));
568 
569 	TRACE(("### arch_restore_signal_frame: exit\n"));
570 
571 	return (int64)frame->eax | ((int64)frame->edx << 32);
572 }
573 
574 
575 /**	Saves everything needed to restore the frame in the child fork in the
576  *	arch_fork_arg structure to be passed to arch_restore_fork_frame().
577  *	Also makes sure to return the right value.
578  */
579 
580 void
581 arch_store_fork_frame(struct arch_fork_arg *arg)
582 {
583 	struct iframe *frame = get_current_iframe();
584 
585 	// we need to copy the threads current iframe
586 	arg->iframe = *frame;
587 
588 	// we also want fork() to return 0 for the child
589 	arg->iframe.eax = 0;
590 }
591 
592 
593 /** Restores the frame from a forked team as specified by the provided
594  *	arch_fork_arg structure.
595  *	Needs to be called from within the child team, ie. instead of
596  *	arch_thread_enter_uspace() as thread "starter".
597  *	This function does not return to the caller, but will enter userland
598  *	in the child team at the same position where the parent team left of.
599  */
600 
601 void
602 arch_restore_fork_frame(struct arch_fork_arg *arg)
603 {
604 	struct thread *thread = thread_get_current_thread();
605 
606 	disable_interrupts();
607 
608 	i386_set_tss_and_kstack(thread->kernel_stack_base + KERNEL_STACK_SIZE);
609 
610 	// set the CPU dependent GDT entry for TLS (set the current %fs register)
611 	set_tls_context(thread);
612 
613 	i386_restore_frame_from_syscall(arg->iframe);
614 }
615 
616 
617 void
618 arch_syscall_64_bit_return_value(void)
619 {
620 	struct thread* thread = thread_get_current_thread();
621 	atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN);
622 }
623