xref: /haiku/src/system/kernel/arch/x86/64/thread.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com.
3  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include <arch/thread.h>
13 
14 #include <string.h>
15 
16 #include <arch_thread_defs.h>
17 #include <commpage.h>
18 #include <cpu.h>
19 #include <debug.h>
20 #include <generic_syscall.h>
21 #include <kernel.h>
22 #include <ksignal.h>
23 #include <int.h>
24 #include <team.h>
25 #include <thread.h>
26 #include <tls.h>
27 #include <tracing.h>
28 #include <util/Random.h>
29 #include <vm/vm_types.h>
30 #include <vm/VMAddressSpace.h>
31 
32 #include "paging/X86PagingStructures.h"
33 #include "paging/X86VMTranslationMap.h"
34 
35 
36 //#define TRACE_ARCH_THREAD
37 #ifdef TRACE_ARCH_THREAD
38 #	define TRACE(x...) dprintf(x)
39 #else
40 #	define TRACE(x...) ;
41 #endif
42 
43 
44 #ifdef SYSCALL_TRACING
45 
46 namespace SyscallTracing {
47 
48 class RestartSyscall : public AbstractTraceEntry {
49 	public:
50 		RestartSyscall()
51 		{
52 			Initialized();
53 		}
54 
55 		virtual void AddDump(TraceOutput& out)
56 		{
57 			out.Print("syscall restart");
58 		}
59 };
60 
61 }
62 
63 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
64 
65 #else
66 #	define TSYSCALL(x)
67 #endif	// SYSCALL_TRACING
68 
69 
70 extern "C" void x86_64_thread_entry();
71 
72 // Initial thread saved state.
73 static arch_thread sInitialState _ALIGNED(64);
74 uint16 gFPUControlDefault;
75 uint32 gFPUMXCSRDefault;
76 extern uint64 gFPUSaveLength;
77 extern bool gHasXsave;
78 extern bool gHasXsavec;
79 
80 
81 void
82 x86_restart_syscall(iframe* frame)
83 {
84 	Thread* thread = thread_get_current_thread();
85 
86 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
87 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
88 
89 	// Get back the original system call number and modify the frame to
90 	// re-execute the syscall instruction.
91 	frame->ax = frame->orig_rax;
92 	frame->ip -= 2;
93 
94 	TSYSCALL(RestartSyscall());
95 }
96 
97 
98 void
99 x86_set_tls_context(Thread* thread)
100 {
101 	// Set FS segment base address to the TLS segment.
102 	x86_write_msr(IA32_MSR_FS_BASE, thread->user_local_storage);
103 	x86_write_msr(IA32_MSR_KERNEL_GS_BASE, thread->arch_info.user_gs_base);
104 }
105 
106 
107 static addr_t
108 arch_randomize_stack_pointer(addr_t value)
109 {
110 	static_assert(MAX_RANDOM_VALUE >= B_PAGE_SIZE - 1,
111 		"randomization range is too big");
112 	value -= random_value() & (B_PAGE_SIZE - 1);
113 	return (value & ~addr_t(0xf)) - 8;
114 		// This means, result % 16 == 8, which is what rsp should adhere to
115 		// when a function is entered for the stack to be considered aligned to
116 		// 16 byte.
117 }
118 
119 
120 static uint8*
121 get_signal_stack(Thread* thread, iframe* frame, struct sigaction* action,
122 	size_t spaceNeeded)
123 {
124 	// Use the alternate signal stack if we should and can.
125 	if (thread->signal_stack_enabled
126 			&& (action->sa_flags & SA_ONSTACK) != 0
127 			&& (frame->user_sp < thread->signal_stack_base
128 				|| frame->user_sp >= thread->signal_stack_base
129 					+ thread->signal_stack_size)) {
130 		addr_t stackTop = thread->signal_stack_base + thread->signal_stack_size;
131 		return (uint8*)arch_randomize_stack_pointer(stackTop - spaceNeeded);
132 	}
133 
134 	// We are going to use the stack that we are already on. We must not touch
135 	// the red zone (128 byte area below the stack pointer, reserved for use
136 	// by functions to store temporary data and guaranteed not to be modified
137 	// by signal handlers).
138 	return (uint8*)((frame->user_sp - 128 - spaceNeeded) & ~addr_t(0xf)) - 8;
139 		// align stack pointer (cf. arch_randomize_stack_pointer())
140 }
141 
142 
143 static status_t
144 arch_thread_control(const char* subsystem, uint32 function, void* buffer,
145 	size_t bufferSize)
146 {
147 	switch (function) {
148 		case THREAD_SET_GS_BASE:
149 		{
150 			uint64 base;
151 			if (bufferSize != sizeof(base))
152 				return B_BAD_VALUE;
153 
154 			if (!IS_USER_ADDRESS(buffer)
155 				|| user_memcpy(&base, buffer, sizeof(base)) < B_OK) {
156 				return B_BAD_ADDRESS;
157 			}
158 
159 			Thread* thread = thread_get_current_thread();
160 			thread->arch_info.user_gs_base = base;
161 			x86_write_msr(IA32_MSR_KERNEL_GS_BASE, base);
162 			return B_OK;
163 		}
164 	}
165 	return B_BAD_HANDLER;
166 }
167 
168 
169 //	#pragma mark -
170 
171 
172 status_t
173 arch_thread_init(kernel_args* args)
174 {
175 	// Save one global valid FPU state; it will be copied in the arch dependent
176 	// part of each new thread.
177 	if (gHasXsave || gHasXsavec) {
178 		memset(sInitialState.fpu_state, 0, gFPUSaveLength);
179 		if (gHasXsavec) {
180 			asm volatile (
181 				"clts;"		\
182 				"fninit;"	\
183 				"fnclex;"	\
184 				"movl $0x7,%%eax;"	\
185 				"movl $0x0,%%edx;"	\
186 				"xsavec64 %0"
187 				:: "m" (sInitialState.fpu_state));
188 		} else {
189 			asm volatile (
190 				"clts;"		\
191 				"fninit;"	\
192 				"fnclex;"	\
193 				"movl $0x7,%%eax;"	\
194 				"movl $0x0,%%edx;"	\
195 				"xsave64 %0"
196 				:: "m" (sInitialState.fpu_state));
197 		}
198 	} else {
199 		asm volatile (
200 			"clts;"		\
201 			"fninit;"	\
202 			"fnclex;"	\
203 			"fxsaveq %0"
204 			:: "m" (sInitialState.fpu_state));
205 	}
206 	gFPUControlDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.control;
207 	gFPUMXCSRDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.mxcsr;
208 
209 	register_generic_syscall(THREAD_SYSCALLS, arch_thread_control, 1, 0);
210 
211 	return B_OK;
212 }
213 
214 
215 status_t
216 arch_thread_init_thread_struct(Thread* thread)
217 {
218 	// Copy the initial saved FPU state to the new thread.
219 	memcpy(&thread->arch_info, &sInitialState, sizeof(arch_thread));
220 
221 	// Initialise the current thread pointer.
222 	thread->arch_info.thread = thread;
223 
224 	return B_OK;
225 }
226 
227 
228 /*!	Prepares the given thread's kernel stack for executing its entry function.
229 
230 	\param thread The thread.
231 	\param stack The usable bottom of the thread's kernel stack.
232 	\param stackTop The usable top of the thread's kernel stack.
233 	\param function The entry function the thread shall execute.
234 	\param data Pointer to be passed to the entry function.
235 */
236 void
237 arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop,
238 	void (*function)(void*), const void* data)
239 {
240 	uintptr_t* stackTop = static_cast<uintptr_t*>(_stackTop);
241 
242 	TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: "
243 		"%p\n", _stackTop, function, data);
244 
245 	// Save the stack top for system call entry.
246 	thread->arch_info.syscall_rsp = (uint64*)thread->kernel_stack_top;
247 
248 	thread->arch_info.instruction_pointer
249 		= reinterpret_cast<uintptr_t>(x86_64_thread_entry);
250 
251 	*--stackTop = uintptr_t(data);
252 	*--stackTop = uintptr_t(function);
253 
254 	// Save the stack position.
255 	thread->arch_info.current_stack = stackTop;
256 }
257 
258 
259 void
260 arch_thread_dump_info(void* info)
261 {
262 	arch_thread* thread = (arch_thread*)info;
263 
264 	kprintf("\trsp: %p\n", thread->current_stack);
265 	kprintf("\tsyscall_rsp: %p\n", thread->syscall_rsp);
266 	kprintf("\tuser_rsp: %p\n", thread->user_rsp);
267 	kprintf("\tfpu_state at %p\n", thread->fpu_state);
268 }
269 
270 
271 /*!	Sets up initial thread context and enters user space
272 */
273 status_t
274 arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1,
275 	void* args2)
276 {
277 	addr_t stackTop = thread->user_stack_base + thread->user_stack_size;
278 	addr_t codeAddr;
279 
280 	TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, "
281 		"stackTop %#lx\n", entry, args1, args2, stackTop);
282 
283 	stackTop = arch_randomize_stack_pointer(stackTop - sizeof(codeAddr));
284 
285 	// Copy the address of the stub that calls exit_thread() when the thread
286 	// entry function returns to the top of the stack to act as the return
287 	// address. The stub is inside commpage.
288 	addr_t commPageAddress = (addr_t)thread->team->commpage_address;
289 	arch_cpu_enable_user_access();
290 	codeAddr = ((addr_t*)commPageAddress)[COMMPAGE_ENTRY_X86_THREAD_EXIT]
291 		+ commPageAddress;
292 	arch_cpu_disable_user_access();
293 	if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr))
294 			!= B_OK)
295 		return B_BAD_ADDRESS;
296 
297 	// Prepare the user iframe.
298 	iframe frame = {};
299 	frame.type = IFRAME_TYPE_SYSCALL;
300 	frame.si = (uint64)args2;
301 	frame.di = (uint64)args1;
302 	frame.ip = entry;
303 	frame.cs = USER_CODE_SELECTOR;
304 	frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT;
305 	frame.sp = stackTop;
306 	frame.ss = USER_DATA_SELECTOR;
307 
308 	// Return to userland. Never returns.
309 	x86_initial_return_to_userland(thread, &frame);
310 
311 	return B_OK;
312 }
313 
314 
315 /*!	Sets up the user iframe for invoking a signal handler.
316 
317 	The function fills in the remaining fields of the given \a signalFrameData,
318 	copies it to the thread's userland stack (the one on which the signal shall
319 	be handled), and sets up the user iframe so that when returning to userland
320 	a wrapper function is executed that calls the user-defined signal handler.
321 	When the signal handler returns, the wrapper function shall call the
322 	"restore signal frame" syscall with the (possibly modified) signal frame
323 	data.
324 
325 	The following fields of the \a signalFrameData structure still need to be
326 	filled in:
327 	- \c context.uc_stack: The stack currently used by the thread.
328 	- \c context.uc_mcontext: The current userland state of the registers.
329 	- \c syscall_restart_return_value: Architecture specific use. On x86_64 the
330 		value of rax which is overwritten by the syscall return value.
331 
332 	Furthermore the function needs to set \c thread->user_signal_context to the
333 	userland pointer to the \c ucontext_t on the user stack.
334 
335 	\param thread The current thread.
336 	\param action The signal action specified for the signal to be handled.
337 	\param signalFrameData A partially initialized structure of all the data
338 		that need to be copied to userland.
339 	\return \c B_OK on success, another error code, if something goes wrong.
340 */
341 status_t
342 arch_setup_signal_frame(Thread* thread, struct sigaction* action,
343 	struct signal_frame_data* signalFrameData)
344 {
345 	iframe* frame = x86_get_current_iframe();
346 	if (!IFRAME_IS_USER(frame)) {
347 		panic("arch_setup_signal_frame(): No user iframe!");
348 		return B_BAD_VALUE;
349 	}
350 
351 	// Store the register state.
352 	signalFrameData->context.uc_mcontext.rax = frame->ax;
353 	signalFrameData->context.uc_mcontext.rbx = frame->bx;
354 	signalFrameData->context.uc_mcontext.rcx = frame->cx;
355 	signalFrameData->context.uc_mcontext.rdx = frame->dx;
356 	signalFrameData->context.uc_mcontext.rdi = frame->di;
357 	signalFrameData->context.uc_mcontext.rsi = frame->si;
358 	signalFrameData->context.uc_mcontext.rbp = frame->bp;
359 	signalFrameData->context.uc_mcontext.r8 = frame->r8;
360 	signalFrameData->context.uc_mcontext.r9 = frame->r9;
361 	signalFrameData->context.uc_mcontext.r10 = frame->r10;
362 	signalFrameData->context.uc_mcontext.r11 = frame->r11;
363 	signalFrameData->context.uc_mcontext.r12 = frame->r12;
364 	signalFrameData->context.uc_mcontext.r13 = frame->r13;
365 	signalFrameData->context.uc_mcontext.r14 = frame->r14;
366 	signalFrameData->context.uc_mcontext.r15 = frame->r15;
367 	signalFrameData->context.uc_mcontext.rsp = frame->user_sp;
368 	signalFrameData->context.uc_mcontext.rip = frame->ip;
369 	signalFrameData->context.uc_mcontext.rflags = frame->flags;
370 
371 	if (frame->fpu != nullptr) {
372 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu,
373 			gFPUSaveLength);
374 	} else {
375 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
376 			sInitialState.fpu_state, gFPUSaveLength);
377 	}
378 
379 	// Fill in signalFrameData->context.uc_stack.
380 	signal_get_user_stack(frame->user_sp, &signalFrameData->context.uc_stack);
381 
382 	// Store syscall_restart_return_value.
383 	signalFrameData->syscall_restart_return_value = frame->orig_rax;
384 
385 	// Get the stack to use and copy the frame data to it.
386 	uint8* userStack = get_signal_stack(thread, frame, action,
387 		sizeof(*signalFrameData) + sizeof(frame->ip));
388 
389 	signal_frame_data* userSignalFrameData
390 		= (signal_frame_data*)(userStack + sizeof(frame->ip));
391 
392 	if (user_memcpy(userSignalFrameData, signalFrameData,
393 			sizeof(*signalFrameData)) != B_OK) {
394 		return B_BAD_ADDRESS;
395 	}
396 
397 	// Copy a return address to the stack so that backtraces will be correct.
398 	if (user_memcpy(userStack, &frame->ip, sizeof(frame->ip)) != B_OK)
399 		return B_BAD_ADDRESS;
400 
401 	// Update Thread::user_signal_context, now that everything seems to have
402 	// gone fine.
403 	thread->user_signal_context = &userSignalFrameData->context;
404 
405 	// Set up the iframe to execute the signal handler wrapper on our prepared
406 	// stack. First argument points to the frame data.
407 	addr_t* commPageAddress = (addr_t*)thread->team->commpage_address;
408 	frame->user_sp = (addr_t)userStack;
409 	arch_cpu_enable_user_access();
410 	frame->ip = commPageAddress[COMMPAGE_ENTRY_X86_SIGNAL_HANDLER]
411 		+ (addr_t)commPageAddress;
412 	arch_cpu_disable_user_access();
413 	frame->di = (addr_t)userSignalFrameData;
414 	frame->flags &= ~(uint64)(X86_EFLAGS_TRAP | X86_EFLAGS_DIRECTION);
415 
416 	return B_OK;
417 }
418 
419 
420 int64
421 arch_restore_signal_frame(struct signal_frame_data* signalFrameData)
422 {
423 	iframe* frame = x86_get_current_iframe();
424 
425 	frame->orig_rax = signalFrameData->syscall_restart_return_value;
426 	frame->ax = signalFrameData->context.uc_mcontext.rax;
427 	frame->bx = signalFrameData->context.uc_mcontext.rbx;
428 	frame->cx = signalFrameData->context.uc_mcontext.rcx;
429 	frame->dx = signalFrameData->context.uc_mcontext.rdx;
430 	frame->di = signalFrameData->context.uc_mcontext.rdi;
431 	frame->si = signalFrameData->context.uc_mcontext.rsi;
432 	frame->bp = signalFrameData->context.uc_mcontext.rbp;
433 	frame->r8 = signalFrameData->context.uc_mcontext.r8;
434 	frame->r9 = signalFrameData->context.uc_mcontext.r9;
435 	frame->r10 = signalFrameData->context.uc_mcontext.r10;
436 	frame->r11 = signalFrameData->context.uc_mcontext.r11;
437 	frame->r12 = signalFrameData->context.uc_mcontext.r12;
438 	frame->r13 = signalFrameData->context.uc_mcontext.r13;
439 	frame->r14 = signalFrameData->context.uc_mcontext.r14;
440 	frame->r15 = signalFrameData->context.uc_mcontext.r15;
441 	frame->user_sp = signalFrameData->context.uc_mcontext.rsp;
442 	frame->ip = signalFrameData->context.uc_mcontext.rip;
443 	frame->flags = (frame->flags & ~(uint64)X86_EFLAGS_USER_FLAGS)
444 		| (signalFrameData->context.uc_mcontext.rflags & X86_EFLAGS_USER_FLAGS);
445 
446 	Thread* thread = thread_get_current_thread();
447 
448 	memcpy(thread->arch_info.fpu_state,
449 		(void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength);
450 	frame->fpu = &thread->arch_info.fpu_state;
451 
452 	// The syscall return code overwrites frame->ax with the return value of
453 	// the syscall, need to return it here to ensure the correct value is
454 	// restored.
455 	return frame->ax;
456 }
457