xref: /haiku/src/system/kernel/arch/x86/64/thread.cpp (revision 60a6f1d5d7a8715cd3897dd0b626f2e4a64984a8)
1 /*
2  * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com.
3  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include <arch/thread.h>
13 
14 #include <string.h>
15 
16 #include <commpage.h>
17 #include <cpu.h>
18 #include <debug.h>
19 #include <kernel.h>
20 #include <ksignal.h>
21 #include <int.h>
22 #include <team.h>
23 #include <thread.h>
24 #include <tls.h>
25 #include <tracing.h>
26 #include <util/Random.h>
27 #include <vm/vm_types.h>
28 #include <vm/VMAddressSpace.h>
29 
30 #include "paging/X86PagingStructures.h"
31 #include "paging/X86VMTranslationMap.h"
32 
33 
34 //#define TRACE_ARCH_THREAD
35 #ifdef TRACE_ARCH_THREAD
36 #	define TRACE(x...) dprintf(x)
37 #else
38 #	define TRACE(x...) ;
39 #endif
40 
41 
42 #ifdef SYSCALL_TRACING
43 
44 namespace SyscallTracing {
45 
46 class RestartSyscall : public AbstractTraceEntry {
47 	public:
48 		RestartSyscall()
49 		{
50 			Initialized();
51 		}
52 
53 		virtual void AddDump(TraceOutput& out)
54 		{
55 			out.Print("syscall restart");
56 		}
57 };
58 
59 }
60 
61 #	define TSYSCALL(x)	new(std::nothrow) SyscallTracing::x
62 
63 #else
64 #	define TSYSCALL(x)
65 #endif	// SYSCALL_TRACING
66 
67 
68 extern "C" void x86_64_thread_entry();
69 
70 // Initial thread saved state.
71 static arch_thread sInitialState _ALIGNED(64);
72 extern uint64 gFPUSaveLength;
73 extern bool gHasXsave;
74 extern bool gHasXsavec;
75 
76 
77 void
78 x86_restart_syscall(iframe* frame)
79 {
80 	Thread* thread = thread_get_current_thread();
81 
82 	atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL);
83 	atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED);
84 
85 	// Get back the original system call number and modify the frame to
86 	// re-execute the syscall instruction.
87 	frame->ax = frame->orig_rax;
88 	frame->ip -= 2;
89 
90 	TSYSCALL(RestartSyscall());
91 }
92 
93 
94 void
95 x86_set_tls_context(Thread* thread)
96 {
97 	// Set FS segment base address to the TLS segment.
98 	x86_write_msr(IA32_MSR_FS_BASE, thread->user_local_storage);
99 }
100 
101 
102 static addr_t
103 arch_randomize_stack_pointer(addr_t value)
104 {
105 	static_assert(MAX_RANDOM_VALUE >= B_PAGE_SIZE - 1,
106 		"randomization range is too big");
107 	value -= random_value() & (B_PAGE_SIZE - 1);
108 	return (value & ~addr_t(0xf)) - 8;
109 		// This means, result % 16 == 8, which is what rsp should adhere to
110 		// when a function is entered for the stack to be considered aligned to
111 		// 16 byte.
112 }
113 
114 
115 static uint8*
116 get_signal_stack(Thread* thread, iframe* frame, struct sigaction* action,
117 	size_t spaceNeeded)
118 {
119 	// Use the alternate signal stack if we should and can.
120 	if (thread->signal_stack_enabled
121 			&& (action->sa_flags & SA_ONSTACK) != 0
122 			&& (frame->user_sp < thread->signal_stack_base
123 				|| frame->user_sp >= thread->signal_stack_base
124 					+ thread->signal_stack_size)) {
125 		addr_t stackTop = thread->signal_stack_base + thread->signal_stack_size;
126 		return (uint8*)arch_randomize_stack_pointer(stackTop - spaceNeeded);
127 	}
128 
129 	// We are going to use the stack that we are already on. We must not touch
130 	// the red zone (128 byte area below the stack pointer, reserved for use
131 	// by functions to store temporary data and guaranteed not to be modified
132 	// by signal handlers).
133 	return (uint8*)((frame->user_sp - 128 - spaceNeeded) & ~addr_t(0xf)) - 8;
134 		// align stack pointer (cf. arch_randomize_stack_pointer())
135 }
136 
137 
138 //	#pragma mark -
139 
140 
141 status_t
142 arch_thread_init(kernel_args* args)
143 {
144 	// Save one global valid FPU state; it will be copied in the arch dependent
145 	// part of each new thread.
146 	if (gHasXsave || gHasXsavec) {
147 		ASSERT(gFPUSaveLength <= sizeof(sInitialState.fpu_state));
148 		memset(sInitialState.fpu_state, 0, gFPUSaveLength);
149 		if (gHasXsavec) {
150 			asm volatile (
151 				"clts;"		\
152 				"fninit;"	\
153 				"fnclex;"	\
154 				"movl $0x7,%%eax;"	\
155 				"movl $0x0,%%edx;"	\
156 				"xsavec64 %0"
157 				:: "m" (sInitialState.fpu_state));
158 		} else {
159 			asm volatile (
160 				"clts;"		\
161 				"fninit;"	\
162 				"fnclex;"	\
163 				"movl $0x7,%%eax;"	\
164 				"movl $0x0,%%edx;"	\
165 				"xsave64 %0"
166 				:: "m" (sInitialState.fpu_state));
167 		}
168 	} else {
169 		asm volatile (
170 			"clts;"		\
171 			"fninit;"	\
172 			"fnclex;"	\
173 			"fxsaveq %0"
174 			:: "m" (sInitialState.fpu_state));
175 	}
176 	return B_OK;
177 }
178 
179 
180 status_t
181 arch_thread_init_thread_struct(Thread* thread)
182 {
183 	// Copy the initial saved FPU state to the new thread.
184 	memcpy(&thread->arch_info, &sInitialState, sizeof(arch_thread));
185 
186 	// Initialise the current thread pointer.
187 	thread->arch_info.thread = thread;
188 
189 	return B_OK;
190 }
191 
192 
193 /*!	Prepares the given thread's kernel stack for executing its entry function.
194 
195 	\param thread The thread.
196 	\param stack The usable bottom of the thread's kernel stack.
197 	\param stackTop The usable top of the thread's kernel stack.
198 	\param function The entry function the thread shall execute.
199 	\param data Pointer to be passed to the entry function.
200 */
201 void
202 arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop,
203 	void (*function)(void*), const void* data)
204 {
205 	uintptr_t* stackTop = static_cast<uintptr_t*>(_stackTop);
206 
207 	TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: "
208 		"%p\n", _stackTop, function, data);
209 
210 	// Save the stack top for system call entry.
211 	thread->arch_info.syscall_rsp = (uint64*)thread->kernel_stack_top;
212 
213 	thread->arch_info.instruction_pointer
214 		= reinterpret_cast<uintptr_t>(x86_64_thread_entry);
215 
216 	*--stackTop = uintptr_t(data);
217 	*--stackTop = uintptr_t(function);
218 
219 	// Save the stack position.
220 	thread->arch_info.current_stack = stackTop;
221 }
222 
223 
224 void
225 arch_thread_dump_info(void* info)
226 {
227 	arch_thread* thread = (arch_thread*)info;
228 
229 	kprintf("\trsp: %p\n", thread->current_stack);
230 	kprintf("\tsyscall_rsp: %p\n", thread->syscall_rsp);
231 	kprintf("\tuser_rsp: %p\n", thread->user_rsp);
232 	kprintf("\tfpu_state at %p\n", thread->fpu_state);
233 }
234 
235 
236 /*!	Sets up initial thread context and enters user space
237 */
238 status_t
239 arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1,
240 	void* args2)
241 {
242 	addr_t stackTop = thread->user_stack_base + thread->user_stack_size;
243 	addr_t codeAddr;
244 
245 	TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, "
246 		"stackTop %#lx\n", entry, args1, args2, stackTop);
247 
248 	stackTop = arch_randomize_stack_pointer(stackTop - sizeof(codeAddr));
249 
250 	// Copy the address of the stub that calls exit_thread() when the thread
251 	// entry function returns to the top of the stack to act as the return
252 	// address. The stub is inside commpage.
253 	addr_t commPageAddress = (addr_t)thread->team->commpage_address;
254 	set_ac();
255 	codeAddr = ((addr_t*)commPageAddress)[COMMPAGE_ENTRY_X86_THREAD_EXIT]
256 		+ commPageAddress;
257 	clear_ac();
258 	if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr))
259 			!= B_OK)
260 		return B_BAD_ADDRESS;
261 
262 	// Prepare the user iframe.
263 	iframe frame = {};
264 	frame.type = IFRAME_TYPE_SYSCALL;
265 	frame.si = (uint64)args2;
266 	frame.di = (uint64)args1;
267 	frame.ip = entry;
268 	frame.cs = USER_CODE_SELECTOR;
269 	frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT
270 		| (3 << X86_EFLAGS_IO_PRIVILEG_LEVEL_SHIFT);
271 	frame.sp = stackTop;
272 	frame.ss = USER_DATA_SELECTOR;
273 
274 	// Return to userland. Never returns.
275 	x86_initial_return_to_userland(thread, &frame);
276 
277 	return B_OK;
278 }
279 
280 
281 /*!	Sets up the user iframe for invoking a signal handler.
282 
283 	The function fills in the remaining fields of the given \a signalFrameData,
284 	copies it to the thread's userland stack (the one on which the signal shall
285 	be handled), and sets up the user iframe so that when returning to userland
286 	a wrapper function is executed that calls the user-defined signal handler.
287 	When the signal handler returns, the wrapper function shall call the
288 	"restore signal frame" syscall with the (possibly modified) signal frame
289 	data.
290 
291 	The following fields of the \a signalFrameData structure still need to be
292 	filled in:
293 	- \c context.uc_stack: The stack currently used by the thread.
294 	- \c context.uc_mcontext: The current userland state of the registers.
295 	- \c syscall_restart_return_value: Architecture specific use. On x86_64 the
296 		value of rax which is overwritten by the syscall return value.
297 
298 	Furthermore the function needs to set \c thread->user_signal_context to the
299 	userland pointer to the \c ucontext_t on the user stack.
300 
301 	\param thread The current thread.
302 	\param action The signal action specified for the signal to be handled.
303 	\param signalFrameData A partially initialized structure of all the data
304 		that need to be copied to userland.
305 	\return \c B_OK on success, another error code, if something goes wrong.
306 */
307 status_t
308 arch_setup_signal_frame(Thread* thread, struct sigaction* action,
309 	struct signal_frame_data* signalFrameData)
310 {
311 	iframe* frame = x86_get_current_iframe();
312 	if (!IFRAME_IS_USER(frame)) {
313 		panic("arch_setup_signal_frame(): No user iframe!");
314 		return B_BAD_VALUE;
315 	}
316 
317 	// Store the register state.
318 	signalFrameData->context.uc_mcontext.rax = frame->ax;
319 	signalFrameData->context.uc_mcontext.rbx = frame->bx;
320 	signalFrameData->context.uc_mcontext.rcx = frame->cx;
321 	signalFrameData->context.uc_mcontext.rdx = frame->dx;
322 	signalFrameData->context.uc_mcontext.rdi = frame->di;
323 	signalFrameData->context.uc_mcontext.rsi = frame->si;
324 	signalFrameData->context.uc_mcontext.rbp = frame->bp;
325 	signalFrameData->context.uc_mcontext.r8 = frame->r8;
326 	signalFrameData->context.uc_mcontext.r9 = frame->r9;
327 	signalFrameData->context.uc_mcontext.r10 = frame->r10;
328 	signalFrameData->context.uc_mcontext.r11 = frame->r11;
329 	signalFrameData->context.uc_mcontext.r12 = frame->r12;
330 	signalFrameData->context.uc_mcontext.r13 = frame->r13;
331 	signalFrameData->context.uc_mcontext.r14 = frame->r14;
332 	signalFrameData->context.uc_mcontext.r15 = frame->r15;
333 	signalFrameData->context.uc_mcontext.rsp = frame->user_sp;
334 	signalFrameData->context.uc_mcontext.rip = frame->ip;
335 	signalFrameData->context.uc_mcontext.rflags = frame->flags;
336 
337 	if (frame->fpu != nullptr) {
338 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu,
339 			gFPUSaveLength);
340 	} else {
341 		memcpy((void*)&signalFrameData->context.uc_mcontext.fpu,
342 			sInitialState.fpu_state, gFPUSaveLength);
343 	}
344 
345 	// Fill in signalFrameData->context.uc_stack.
346 	signal_get_user_stack(frame->user_sp, &signalFrameData->context.uc_stack);
347 
348 	// Store syscall_restart_return_value.
349 	signalFrameData->syscall_restart_return_value = frame->orig_rax;
350 
351 	// Get the stack to use and copy the frame data to it.
352 	uint8* userStack = get_signal_stack(thread, frame, action,
353 		sizeof(*signalFrameData) + sizeof(frame->ip));
354 
355 	signal_frame_data* userSignalFrameData
356 		= (signal_frame_data*)(userStack + sizeof(frame->ip));
357 
358 	if (user_memcpy(userSignalFrameData, signalFrameData,
359 			sizeof(*signalFrameData)) != B_OK) {
360 		return B_BAD_ADDRESS;
361 	}
362 
363 	// Copy a return address to the stack so that backtraces will be correct.
364 	if (user_memcpy(userStack, &frame->ip, sizeof(frame->ip)) != B_OK)
365 		return B_BAD_ADDRESS;
366 
367 	// Update Thread::user_signal_context, now that everything seems to have
368 	// gone fine.
369 	thread->user_signal_context = &userSignalFrameData->context;
370 
371 	// Set up the iframe to execute the signal handler wrapper on our prepared
372 	// stack. First argument points to the frame data.
373 	addr_t* commPageAddress = (addr_t*)thread->team->commpage_address;
374 	frame->user_sp = (addr_t)userStack;
375 	set_ac();
376 	frame->ip = commPageAddress[COMMPAGE_ENTRY_X86_SIGNAL_HANDLER]
377 		+ (addr_t)commPageAddress;
378 	clear_ac();
379 	frame->di = (addr_t)userSignalFrameData;
380 
381 	return B_OK;
382 }
383 
384 
385 int64
386 arch_restore_signal_frame(struct signal_frame_data* signalFrameData)
387 {
388 	iframe* frame = x86_get_current_iframe();
389 
390 	frame->orig_rax = signalFrameData->syscall_restart_return_value;
391 	frame->ax = signalFrameData->context.uc_mcontext.rax;
392 	frame->bx = signalFrameData->context.uc_mcontext.rbx;
393 	frame->cx = signalFrameData->context.uc_mcontext.rcx;
394 	frame->dx = signalFrameData->context.uc_mcontext.rdx;
395 	frame->di = signalFrameData->context.uc_mcontext.rdi;
396 	frame->si = signalFrameData->context.uc_mcontext.rsi;
397 	frame->bp = signalFrameData->context.uc_mcontext.rbp;
398 	frame->r8 = signalFrameData->context.uc_mcontext.r8;
399 	frame->r9 = signalFrameData->context.uc_mcontext.r9;
400 	frame->r10 = signalFrameData->context.uc_mcontext.r10;
401 	frame->r11 = signalFrameData->context.uc_mcontext.r11;
402 	frame->r12 = signalFrameData->context.uc_mcontext.r12;
403 	frame->r13 = signalFrameData->context.uc_mcontext.r13;
404 	frame->r14 = signalFrameData->context.uc_mcontext.r14;
405 	frame->r15 = signalFrameData->context.uc_mcontext.r15;
406 	frame->user_sp = signalFrameData->context.uc_mcontext.rsp;
407 	frame->ip = signalFrameData->context.uc_mcontext.rip;
408 	frame->flags = (frame->flags & ~(uint64)X86_EFLAGS_USER_FLAGS)
409 		| (signalFrameData->context.uc_mcontext.rflags & X86_EFLAGS_USER_FLAGS);
410 
411 	Thread* thread = thread_get_current_thread();
412 
413 	memcpy(thread->arch_info.fpu_state,
414 		(void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength);
415 	frame->fpu = &thread->arch_info.fpu_state;
416 
417 	// The syscall return code overwrites frame->ax with the return value of
418 	// the syscall, need to return it here to ensure the correct value is
419 	// restored.
420 	return frame->ax;
421 }
422