xref: /haiku/src/system/kernel/arch/x86/64/thread.cpp (revision 52f7c9389475e19fc21487b38064b4390eeb6fea)
1 /*
2  * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com.
3  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include <arch/thread.h>
13 
14 #include <string.h>
15 
16 #include <arch_thread_defs.h>
17 #include <commpage.h>
18 #include <cpu.h>
19 #include <debug.h>
20 #include <generic_syscall.h>
21 #include <kernel.h>
22 #include <ksignal.h>
23 #include <int.h>
24 #include <team.h>
25 #include <thread.h>
26 #include <tls.h>
27 #include <tracing.h>
28 #include <util/Random.h>
29 #include <vm/vm_types.h>
30 #include <vm/VMAddressSpace.h>
31 
32 #include "paging/X86PagingStructures.h"
33 #include "paging/X86VMTranslationMap.h"
34 
35 
36 //#define TRACE_ARCH_THREAD
37 #ifdef TRACE_ARCH_THREAD
38 #	define TRACE(x...) dprintf(x)
39 #else
40 #	define TRACE(x...) ;
41 #endif
42 
43 
44 #ifdef SYSCALL_TRACING
45 
46 namespace SyscallTracing {
47 
48 class RestartSyscall : public AbstractTraceEntry {
49 	public:
50 		RestartSyscall()
51 		{
52 			Initialized();
53 		}
54 
55 		virtual void AddDump(TraceOutput& out)
56 		{
57 			out.Print("syscall restart");
58 		}
59 };
60 
61 }
62 
63 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
64 
65 #else
66 #	define TSYSCALL(x)
67 #endif	// SYSCALL_TRACING
68 
69 
70 extern "C" void x86_64_thread_entry();
71 
72 // Initial thread saved state.
73 static arch_thread sInitialState _ALIGNED(64);
74 extern uint64 gFPUSaveLength;
75 extern bool gHasXsave;
76 extern bool gHasXsavec;
77 
78 
79 void
80 x86_restart_syscall(iframe* frame)
81 {
82 	Thread* thread = thread_get_current_thread();
83 
84 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
85 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
86 
87 	// Get back the original system call number and modify the frame to
88 	// re-execute the syscall instruction.
89 	frame->ax = frame->orig_rax;
90 	frame->ip -= 2;
91 
92 	TSYSCALL(RestartSyscall());
93 }
94 
95 
96 void
97 x86_set_tls_context(Thread* thread)
98 {
99 	// Set FS segment base address to the TLS segment.
100 	x86_write_msr(IA32_MSR_FS_BASE, thread->user_local_storage);
101 	x86_write_msr(IA32_MSR_KERNEL_GS_BASE, thread->arch_info.user_gs_base);
102 }
103 
104 
105 static addr_t
106 arch_randomize_stack_pointer(addr_t value)
107 {
108 	static_assert(MAX_RANDOM_VALUE >= B_PAGE_SIZE - 1,
109 		"randomization range is too big");
110 	value -= random_value() & (B_PAGE_SIZE - 1);
111 	return (value & ~addr_t(0xf)) - 8;
112 		// This means, result % 16 == 8, which is what rsp should adhere to
113 		// when a function is entered for the stack to be considered aligned to
114 		// 16 byte.
115 }
116 
117 
118 static uint8*
119 get_signal_stack(Thread* thread, iframe* frame, struct sigaction* action,
120 	size_t spaceNeeded)
121 {
122 	// Use the alternate signal stack if we should and can.
123 	if (thread->signal_stack_enabled
124 			&& (action->sa_flags & SA_ONSTACK) != 0
125 			&& (frame->user_sp < thread->signal_stack_base
126 				|| frame->user_sp >= thread->signal_stack_base
127 					+ thread->signal_stack_size)) {
128 		addr_t stackTop = thread->signal_stack_base + thread->signal_stack_size;
129 		return (uint8*)arch_randomize_stack_pointer(stackTop - spaceNeeded);
130 	}
131 
132 	// We are going to use the stack that we are already on. We must not touch
133 	// the red zone (128 byte area below the stack pointer, reserved for use
134 	// by functions to store temporary data and guaranteed not to be modified
135 	// by signal handlers).
136 	return (uint8*)((frame->user_sp - 128 - spaceNeeded) & ~addr_t(0xf)) - 8;
137 		// align stack pointer (cf. arch_randomize_stack_pointer())
138 }
139 
140 
141 static status_t
142 arch_thread_control(const char* subsystem, uint32 function, void* buffer,
143 	size_t bufferSize)
144 {
145 	switch (function) {
146 		case THREAD_SET_GS_BASE:
147 		{
148 			uint64 base;
149 			if (bufferSize != sizeof(base))
150 				return B_BAD_VALUE;
151 
152 			if (!IS_USER_ADDRESS(buffer)
153 				|| user_memcpy(&base, buffer, sizeof(base)) < B_OK) {
154 				return B_BAD_ADDRESS;
155 			}
156 
157 			Thread* thread = thread_get_current_thread();
158 			thread->arch_info.user_gs_base = base;
159 			x86_write_msr(IA32_MSR_KERNEL_GS_BASE, base);
160 			return B_OK;
161 		}
162 	}
163 	return B_BAD_HANDLER;
164 }
165 
166 
167 //	#pragma mark -
168 
169 
170 status_t
171 arch_thread_init(kernel_args* args)
172 {
173 	// Save one global valid FPU state; it will be copied in the arch dependent
174 	// part of each new thread.
175 	if (gHasXsave || gHasXsavec) {
176 		memset(sInitialState.fpu_state, 0, gFPUSaveLength);
177 		if (gHasXsavec) {
178 			asm volatile (
179 				"clts;"		\
180 				"fninit;"	\
181 				"fnclex;"	\
182 				"movl $0x7,%%eax;"	\
183 				"movl $0x0,%%edx;"	\
184 				"xsavec64 %0"
185 				:: "m" (sInitialState.fpu_state));
186 		} else {
187 			asm volatile (
188 				"clts;"		\
189 				"fninit;"	\
190 				"fnclex;"	\
191 				"movl $0x7,%%eax;"	\
192 				"movl $0x0,%%edx;"	\
193 				"xsave64 %0"
194 				:: "m" (sInitialState.fpu_state));
195 		}
196 	} else {
197 		asm volatile (
198 			"clts;"		\
199 			"fninit;"	\
200 			"fnclex;"	\
201 			"fxsaveq %0"
202 			:: "m" (sInitialState.fpu_state));
203 	}
204 
205 	register_generic_syscall(THREAD_SYSCALLS, arch_thread_control, 1, 0);
206 
207 	return B_OK;
208 }
209 
210 
211 status_t
212 arch_thread_init_thread_struct(Thread* thread)
213 {
214 	// Copy the initial saved FPU state to the new thread.
215 	memcpy(&thread->arch_info, &sInitialState, sizeof(arch_thread));
216 
217 	// Initialise the current thread pointer.
218 	thread->arch_info.thread = thread;
219 
220 	return B_OK;
221 }
222 
223 
224 /*!	Prepares the given thread's kernel stack for executing its entry function.
225 
226 	\param thread The thread.
227 	\param stack The usable bottom of the thread's kernel stack.
228 	\param stackTop The usable top of the thread's kernel stack.
229 	\param function The entry function the thread shall execute.
230 	\param data Pointer to be passed to the entry function.
231 */
232 void
233 arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop,
234 	void (*function)(void*), const void* data)
235 {
236 	uintptr_t* stackTop = static_cast<uintptr_t*>(_stackTop);
237 
238 	TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: "
239 		"%p\n", _stackTop, function, data);
240 
241 	// Save the stack top for system call entry.
242 	thread->arch_info.syscall_rsp = (uint64*)thread->kernel_stack_top;
243 
244 	thread->arch_info.instruction_pointer
245 		= reinterpret_cast<uintptr_t>(x86_64_thread_entry);
246 
247 	*--stackTop = uintptr_t(data);
248 	*--stackTop = uintptr_t(function);
249 
250 	// Save the stack position.
251 	thread->arch_info.current_stack = stackTop;
252 }
253 
254 
255 void
256 arch_thread_dump_info(void* info)
257 {
258 	arch_thread* thread = (arch_thread*)info;
259 
260 	kprintf("\trsp: %p\n", thread->current_stack);
261 	kprintf("\tsyscall_rsp: %p\n", thread->syscall_rsp);
262 	kprintf("\tuser_rsp: %p\n", thread->user_rsp);
263 	kprintf("\tfpu_state at %p\n", thread->fpu_state);
264 }
265 
266 
267 /*!	Sets up initial thread context and enters user space
268 */
269 status_t
270 arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1,
271 	void* args2)
272 {
273 	addr_t stackTop = thread->user_stack_base + thread->user_stack_size;
274 	addr_t codeAddr;
275 
276 	TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, "
277 		"stackTop %#lx\n", entry, args1, args2, stackTop);
278 
279 	stackTop = arch_randomize_stack_pointer(stackTop - sizeof(codeAddr));
280 
281 	// Copy the address of the stub that calls exit_thread() when the thread
282 	// entry function returns to the top of the stack to act as the return
283 	// address. The stub is inside commpage.
284 	addr_t commPageAddress = (addr_t)thread->team->commpage_address;
285 	set_ac();
286 	codeAddr = ((addr_t*)commPageAddress)[COMMPAGE_ENTRY_X86_THREAD_EXIT]
287 		+ commPageAddress;
288 	clear_ac();
289 	if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr))
290 			!= B_OK)
291 		return B_BAD_ADDRESS;
292 
293 	// Prepare the user iframe.
294 	iframe frame = {};
295 	frame.type = IFRAME_TYPE_SYSCALL;
296 	frame.si = (uint64)args2;
297 	frame.di = (uint64)args1;
298 	frame.ip = entry;
299 	frame.cs = USER_CODE_SELECTOR;
300 	frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT;
301 	frame.sp = stackTop;
302 	frame.ss = USER_DATA_SELECTOR;
303 
304 	// Return to userland. Never returns.
305 	x86_initial_return_to_userland(thread, &frame);
306 
307 	return B_OK;
308 }
309 
310 
311 /*!	Sets up the user iframe for invoking a signal handler.
312 
313 	The function fills in the remaining fields of the given \a signalFrameData,
314 	copies it to the thread's userland stack (the one on which the signal shall
315 	be handled), and sets up the user iframe so that when returning to userland
316 	a wrapper function is executed that calls the user-defined signal handler.
317 	When the signal handler returns, the wrapper function shall call the
318 	"restore signal frame" syscall with the (possibly modified) signal frame
319 	data.
320 
321 	The following fields of the \a signalFrameData structure still need to be
322 	filled in:
323 	- \c context.uc_stack: The stack currently used by the thread.
324 	- \c context.uc_mcontext: The current userland state of the registers.
325 	- \c syscall_restart_return_value: Architecture specific use. On x86_64 the
326 		value of rax which is overwritten by the syscall return value.
327 
328 	Furthermore the function needs to set \c thread->user_signal_context to the
329 	userland pointer to the \c ucontext_t on the user stack.
330 
331 	\param thread The current thread.
332 	\param action The signal action specified for the signal to be handled.
333 	\param signalFrameData A partially initialized structure of all the data
334 		that need to be copied to userland.
335 	\return \c B_OK on success, another error code, if something goes wrong.
336 */
337 status_t
338 arch_setup_signal_frame(Thread* thread, struct sigaction* action,
339 	struct signal_frame_data* signalFrameData)
340 {
341 	iframe* frame = x86_get_current_iframe();
342 	if (!IFRAME_IS_USER(frame)) {
343 		panic("arch_setup_signal_frame(): No user iframe!");
344 		return B_BAD_VALUE;
345 	}
346 
347 	// Store the register state.
348 	signalFrameData->context.uc_mcontext.rax = frame->ax;
349 	signalFrameData->context.uc_mcontext.rbx = frame->bx;
350 	signalFrameData->context.uc_mcontext.rcx = frame->cx;
351 	signalFrameData->context.uc_mcontext.rdx = frame->dx;
352 	signalFrameData->context.uc_mcontext.rdi = frame->di;
353 	signalFrameData->context.uc_mcontext.rsi = frame->si;
354 	signalFrameData->context.uc_mcontext.rbp = frame->bp;
355 	signalFrameData->context.uc_mcontext.r8 = frame->r8;
356 	signalFrameData->context.uc_mcontext.r9 = frame->r9;
357 	signalFrameData->context.uc_mcontext.r10 = frame->r10;
358 	signalFrameData->context.uc_mcontext.r11 = frame->r11;
359 	signalFrameData->context.uc_mcontext.r12 = frame->r12;
360 	signalFrameData->context.uc_mcontext.r13 = frame->r13;
361 	signalFrameData->context.uc_mcontext.r14 = frame->r14;
362 	signalFrameData->context.uc_mcontext.r15 = frame->r15;
363 	signalFrameData->context.uc_mcontext.rsp = frame->user_sp;
364 	signalFrameData->context.uc_mcontext.rip = frame->ip;
365 	signalFrameData->context.uc_mcontext.rflags = frame->flags;
366 
367 	if (frame->fpu != nullptr) {
368 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu,
369 			gFPUSaveLength);
370 	} else {
371 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
372 			sInitialState.fpu_state, gFPUSaveLength);
373 	}
374 
375 	// Fill in signalFrameData->context.uc_stack.
376 	signal_get_user_stack(frame->user_sp, &signalFrameData->context.uc_stack);
377 
378 	// Store syscall_restart_return_value.
379 	signalFrameData->syscall_restart_return_value = frame->orig_rax;
380 
381 	// Get the stack to use and copy the frame data to it.
382 	uint8* userStack = get_signal_stack(thread, frame, action,
383 		sizeof(*signalFrameData) + sizeof(frame->ip));
384 
385 	signal_frame_data* userSignalFrameData
386 		= (signal_frame_data*)(userStack + sizeof(frame->ip));
387 
388 	if (user_memcpy(userSignalFrameData, signalFrameData,
389 			sizeof(*signalFrameData)) != B_OK) {
390 		return B_BAD_ADDRESS;
391 	}
392 
393 	// Copy a return address to the stack so that backtraces will be correct.
394 	if (user_memcpy(userStack, &frame->ip, sizeof(frame->ip)) != B_OK)
395 		return B_BAD_ADDRESS;
396 
397 	// Update Thread::user_signal_context, now that everything seems to have
398 	// gone fine.
399 	thread->user_signal_context = &userSignalFrameData->context;
400 
401 	// Set up the iframe to execute the signal handler wrapper on our prepared
402 	// stack. First argument points to the frame data.
403 	addr_t* commPageAddress = (addr_t*)thread->team->commpage_address;
404 	frame->user_sp = (addr_t)userStack;
405 	set_ac();
406 	frame->ip = commPageAddress[COMMPAGE_ENTRY_X86_SIGNAL_HANDLER]
407 		+ (addr_t)commPageAddress;
408 	clear_ac();
409 	frame->di = (addr_t)userSignalFrameData;
410 	frame->flags &= ~(uint64)(X86_EFLAGS_TRAP | X86_EFLAGS_DIRECTION);
411 
412 	return B_OK;
413 }
414 
415 
416 int64
417 arch_restore_signal_frame(struct signal_frame_data* signalFrameData)
418 {
419 	iframe* frame = x86_get_current_iframe();
420 
421 	frame->orig_rax = signalFrameData->syscall_restart_return_value;
422 	frame->ax = signalFrameData->context.uc_mcontext.rax;
423 	frame->bx = signalFrameData->context.uc_mcontext.rbx;
424 	frame->cx = signalFrameData->context.uc_mcontext.rcx;
425 	frame->dx = signalFrameData->context.uc_mcontext.rdx;
426 	frame->di = signalFrameData->context.uc_mcontext.rdi;
427 	frame->si = signalFrameData->context.uc_mcontext.rsi;
428 	frame->bp = signalFrameData->context.uc_mcontext.rbp;
429 	frame->r8 = signalFrameData->context.uc_mcontext.r8;
430 	frame->r9 = signalFrameData->context.uc_mcontext.r9;
431 	frame->r10 = signalFrameData->context.uc_mcontext.r10;
432 	frame->r11 = signalFrameData->context.uc_mcontext.r11;
433 	frame->r12 = signalFrameData->context.uc_mcontext.r12;
434 	frame->r13 = signalFrameData->context.uc_mcontext.r13;
435 	frame->r14 = signalFrameData->context.uc_mcontext.r14;
436 	frame->r15 = signalFrameData->context.uc_mcontext.r15;
437 	frame->user_sp = signalFrameData->context.uc_mcontext.rsp;
438 	frame->ip = signalFrameData->context.uc_mcontext.rip;
439 	frame->flags = (frame->flags & ~(uint64)X86_EFLAGS_USER_FLAGS)
440 		| (signalFrameData->context.uc_mcontext.rflags & X86_EFLAGS_USER_FLAGS);
441 
442 	Thread* thread = thread_get_current_thread();
443 
444 	memcpy(thread->arch_info.fpu_state,
445 		(void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength);
446 	frame->fpu = &thread->arch_info.fpu_state;
447 
448 	// The syscall return code overwrites frame->ax with the return value of
449 	// the syscall, need to return it here to ensure the correct value is
450 	// restored.
451 	return frame->ax;
452 }
453