1 /* 2 * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com. 3 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. 4 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 */ 10 11 12 #include <arch/thread.h> 13 14 #include <string.h> 15 16 #include <arch_thread_defs.h> 17 #include <commpage.h> 18 #include <cpu.h> 19 #include <debug.h> 20 #include <generic_syscall.h> 21 #include <kernel.h> 22 #include <ksignal.h> 23 #include <int.h> 24 #include <team.h> 25 #include <thread.h> 26 #include <tls.h> 27 #include <tracing.h> 28 #include <util/Random.h> 29 #include <vm/vm_types.h> 30 #include <vm/VMAddressSpace.h> 31 32 #include "paging/X86PagingStructures.h" 33 #include "paging/X86VMTranslationMap.h" 34 35 36 //#define TRACE_ARCH_THREAD 37 #ifdef TRACE_ARCH_THREAD 38 # define TRACE(x...) dprintf(x) 39 #else 40 # define TRACE(x...) ; 41 #endif 42 43 44 #ifdef SYSCALL_TRACING 45 46 namespace SyscallTracing { 47 48 class RestartSyscall : public AbstractTraceEntry { 49 public: 50 RestartSyscall() 51 { 52 Initialized(); 53 } 54 55 virtual void AddDump(TraceOutput& out) 56 { 57 out.Print("syscall restart"); 58 } 59 }; 60 61 } 62 63 # define TSYSCALL(x) new(std::nothrow) SyscallTracing::x 64 65 #else 66 # define TSYSCALL(x) 67 #endif // SYSCALL_TRACING 68 69 70 extern "C" void x86_64_thread_entry(); 71 72 // Initial thread saved state. 73 static arch_thread sInitialState _ALIGNED(64); 74 uint16 gFPUControlDefault; 75 uint32 gFPUMXCSRDefault; 76 extern uint64 gFPUSaveLength; 77 extern bool gHasXsave; 78 extern bool gHasXsavec; 79 80 81 void 82 x86_restart_syscall(iframe* frame) 83 { 84 Thread* thread = thread_get_current_thread(); 85 86 atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL); 87 atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED); 88 89 // Get back the original system call number and modify the frame to 90 // re-execute the syscall instruction. 91 frame->ax = frame->orig_rax; 92 frame->ip -= 2; 93 94 TSYSCALL(RestartSyscall()); 95 } 96 97 98 void 99 x86_set_tls_context(Thread* thread) 100 { 101 // Set FS segment base address to the TLS segment. 102 x86_write_msr(IA32_MSR_FS_BASE, thread->user_local_storage); 103 x86_write_msr(IA32_MSR_KERNEL_GS_BASE, thread->arch_info.user_gs_base); 104 } 105 106 107 static addr_t 108 arch_randomize_stack_pointer(addr_t value) 109 { 110 static_assert(MAX_RANDOM_VALUE >= B_PAGE_SIZE - 1, 111 "randomization range is too big"); 112 value -= random_value() & (B_PAGE_SIZE - 1); 113 return (value & ~addr_t(0xf)) - 8; 114 // This means, result % 16 == 8, which is what rsp should adhere to 115 // when a function is entered for the stack to be considered aligned to 116 // 16 byte. 117 } 118 119 120 static uint8* 121 get_signal_stack(Thread* thread, iframe* frame, struct sigaction* action, 122 size_t spaceNeeded) 123 { 124 // Use the alternate signal stack if we should and can. 125 if (thread->signal_stack_enabled 126 && (action->sa_flags & SA_ONSTACK) != 0 127 && (frame->user_sp < thread->signal_stack_base 128 || frame->user_sp >= thread->signal_stack_base 129 + thread->signal_stack_size)) { 130 addr_t stackTop = thread->signal_stack_base + thread->signal_stack_size; 131 return (uint8*)arch_randomize_stack_pointer(stackTop - spaceNeeded); 132 } 133 134 // We are going to use the stack that we are already on. We must not touch 135 // the red zone (128 byte area below the stack pointer, reserved for use 136 // by functions to store temporary data and guaranteed not to be modified 137 // by signal handlers). 138 return (uint8*)((frame->user_sp - 128 - spaceNeeded) & ~addr_t(0xf)) - 8; 139 // align stack pointer (cf. arch_randomize_stack_pointer()) 140 } 141 142 143 static status_t 144 arch_thread_control(const char* subsystem, uint32 function, void* buffer, 145 size_t bufferSize) 146 { 147 switch (function) { 148 case THREAD_SET_GS_BASE: 149 { 150 uint64 base; 151 if (bufferSize != sizeof(base)) 152 return B_BAD_VALUE; 153 154 if (!IS_USER_ADDRESS(buffer) 155 || user_memcpy(&base, buffer, sizeof(base)) < B_OK) { 156 return B_BAD_ADDRESS; 157 } 158 159 Thread* thread = thread_get_current_thread(); 160 thread->arch_info.user_gs_base = base; 161 x86_write_msr(IA32_MSR_KERNEL_GS_BASE, base); 162 return B_OK; 163 } 164 } 165 return B_BAD_HANDLER; 166 } 167 168 169 // #pragma mark - 170 171 172 status_t 173 arch_thread_init(kernel_args* args) 174 { 175 // Save one global valid FPU state; it will be copied in the arch dependent 176 // part of each new thread. 177 if (gHasXsave || gHasXsavec) { 178 memset(sInitialState.fpu_state, 0, gFPUSaveLength); 179 if (gHasXsavec) { 180 asm volatile ( 181 "clts;" \ 182 "fninit;" \ 183 "fnclex;" \ 184 "movl $0x7,%%eax;" \ 185 "movl $0x0,%%edx;" \ 186 "xsavec64 %0" 187 :: "m" (sInitialState.fpu_state)); 188 } else { 189 asm volatile ( 190 "clts;" \ 191 "fninit;" \ 192 "fnclex;" \ 193 "movl $0x7,%%eax;" \ 194 "movl $0x0,%%edx;" \ 195 "xsave64 %0" 196 :: "m" (sInitialState.fpu_state)); 197 } 198 } else { 199 asm volatile ( 200 "clts;" \ 201 "fninit;" \ 202 "fnclex;" \ 203 "fxsaveq %0" 204 :: "m" (sInitialState.fpu_state)); 205 } 206 gFPUControlDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.control; 207 gFPUMXCSRDefault = ((savefpu*)&sInitialState.fpu_state)->fp_fxsave.mxcsr; 208 209 register_generic_syscall(THREAD_SYSCALLS, arch_thread_control, 1, 0); 210 211 return B_OK; 212 } 213 214 215 status_t 216 arch_thread_init_thread_struct(Thread* thread) 217 { 218 // Copy the initial saved FPU state to the new thread. 219 memcpy(&thread->arch_info, &sInitialState, sizeof(arch_thread)); 220 221 // Initialise the current thread pointer. 222 thread->arch_info.thread = thread; 223 224 return B_OK; 225 } 226 227 228 /*! Prepares the given thread's kernel stack for executing its entry function. 229 230 \param thread The thread. 231 \param stack The usable bottom of the thread's kernel stack. 232 \param stackTop The usable top of the thread's kernel stack. 233 \param function The entry function the thread shall execute. 234 \param data Pointer to be passed to the entry function. 235 */ 236 void 237 arch_thread_init_kthread_stack(Thread* thread, void* _stack, void* _stackTop, 238 void (*function)(void*), const void* data) 239 { 240 uintptr_t* stackTop = static_cast<uintptr_t*>(_stackTop); 241 242 TRACE("arch_thread_init_kthread_stack: stack top %p, function %p, data: " 243 "%p\n", _stackTop, function, data); 244 245 // Save the stack top for system call entry. 246 thread->arch_info.syscall_rsp = (uint64*)thread->kernel_stack_top; 247 248 thread->arch_info.instruction_pointer 249 = reinterpret_cast<uintptr_t>(x86_64_thread_entry); 250 251 *--stackTop = uintptr_t(data); 252 *--stackTop = uintptr_t(function); 253 254 // Save the stack position. 255 thread->arch_info.current_stack = stackTop; 256 } 257 258 259 void 260 arch_thread_dump_info(void* info) 261 { 262 arch_thread* thread = (arch_thread*)info; 263 264 kprintf("\trsp: %p\n", thread->current_stack); 265 kprintf("\tsyscall_rsp: %p\n", thread->syscall_rsp); 266 kprintf("\tuser_rsp: %p\n", thread->user_rsp); 267 kprintf("\tfpu_state at %p\n", thread->fpu_state); 268 } 269 270 271 /*! Sets up initial thread context and enters user space 272 */ 273 status_t 274 arch_thread_enter_userspace(Thread* thread, addr_t entry, void* args1, 275 void* args2) 276 { 277 addr_t stackTop = thread->user_stack_base + thread->user_stack_size; 278 addr_t codeAddr; 279 280 TRACE("arch_thread_enter_userspace: entry %#lx, args %p %p, " 281 "stackTop %#lx\n", entry, args1, args2, stackTop); 282 283 stackTop = arch_randomize_stack_pointer(stackTop - sizeof(codeAddr)); 284 285 // Copy the address of the stub that calls exit_thread() when the thread 286 // entry function returns to the top of the stack to act as the return 287 // address. The stub is inside commpage. 288 addr_t commPageAddress = (addr_t)thread->team->commpage_address; 289 arch_cpu_enable_user_access(); 290 codeAddr = ((addr_t*)commPageAddress)[COMMPAGE_ENTRY_X86_THREAD_EXIT] 291 + commPageAddress; 292 arch_cpu_disable_user_access(); 293 if (user_memcpy((void*)stackTop, (const void*)&codeAddr, sizeof(codeAddr)) 294 != B_OK) 295 return B_BAD_ADDRESS; 296 297 // Prepare the user iframe. 298 iframe frame = {}; 299 frame.type = IFRAME_TYPE_SYSCALL; 300 frame.si = (uint64)args2; 301 frame.di = (uint64)args1; 302 frame.ip = entry; 303 frame.cs = USER_CODE_SELECTOR; 304 frame.flags = X86_EFLAGS_RESERVED1 | X86_EFLAGS_INTERRUPT; 305 frame.sp = stackTop; 306 frame.ss = USER_DATA_SELECTOR; 307 308 // Return to userland. Never returns. 309 x86_initial_return_to_userland(thread, &frame); 310 311 return B_OK; 312 } 313 314 315 /*! Sets up the user iframe for invoking a signal handler. 316 317 The function fills in the remaining fields of the given \a signalFrameData, 318 copies it to the thread's userland stack (the one on which the signal shall 319 be handled), and sets up the user iframe so that when returning to userland 320 a wrapper function is executed that calls the user-defined signal handler. 321 When the signal handler returns, the wrapper function shall call the 322 "restore signal frame" syscall with the (possibly modified) signal frame 323 data. 324 325 The following fields of the \a signalFrameData structure still need to be 326 filled in: 327 - \c context.uc_stack: The stack currently used by the thread. 328 - \c context.uc_mcontext: The current userland state of the registers. 329 - \c syscall_restart_return_value: Architecture specific use. On x86_64 the 330 value of rax which is overwritten by the syscall return value. 331 332 Furthermore the function needs to set \c thread->user_signal_context to the 333 userland pointer to the \c ucontext_t on the user stack. 334 335 \param thread The current thread. 336 \param action The signal action specified for the signal to be handled. 337 \param signalFrameData A partially initialized structure of all the data 338 that need to be copied to userland. 339 \return \c B_OK on success, another error code, if something goes wrong. 340 */ 341 status_t 342 arch_setup_signal_frame(Thread* thread, struct sigaction* action, 343 struct signal_frame_data* signalFrameData) 344 { 345 iframe* frame = x86_get_current_iframe(); 346 if (!IFRAME_IS_USER(frame)) { 347 panic("arch_setup_signal_frame(): No user iframe!"); 348 return B_BAD_VALUE; 349 } 350 351 // Store the register state. 352 signalFrameData->context.uc_mcontext.rax = frame->ax; 353 signalFrameData->context.uc_mcontext.rbx = frame->bx; 354 signalFrameData->context.uc_mcontext.rcx = frame->cx; 355 signalFrameData->context.uc_mcontext.rdx = frame->dx; 356 signalFrameData->context.uc_mcontext.rdi = frame->di; 357 signalFrameData->context.uc_mcontext.rsi = frame->si; 358 signalFrameData->context.uc_mcontext.rbp = frame->bp; 359 signalFrameData->context.uc_mcontext.r8 = frame->r8; 360 signalFrameData->context.uc_mcontext.r9 = frame->r9; 361 signalFrameData->context.uc_mcontext.r10 = frame->r10; 362 signalFrameData->context.uc_mcontext.r11 = frame->r11; 363 signalFrameData->context.uc_mcontext.r12 = frame->r12; 364 signalFrameData->context.uc_mcontext.r13 = frame->r13; 365 signalFrameData->context.uc_mcontext.r14 = frame->r14; 366 signalFrameData->context.uc_mcontext.r15 = frame->r15; 367 signalFrameData->context.uc_mcontext.rsp = frame->user_sp; 368 signalFrameData->context.uc_mcontext.rip = frame->ip; 369 signalFrameData->context.uc_mcontext.rflags = frame->flags; 370 371 if (frame->fpu != nullptr) { 372 memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, frame->fpu, 373 gFPUSaveLength); 374 } else { 375 memcpy((void*)&signalFrameData->context.uc_mcontext.fpu, 376 sInitialState.fpu_state, gFPUSaveLength); 377 } 378 379 // Fill in signalFrameData->context.uc_stack. 380 signal_get_user_stack(frame->user_sp, &signalFrameData->context.uc_stack); 381 382 // Store syscall_restart_return_value. 383 signalFrameData->syscall_restart_return_value = frame->orig_rax; 384 385 // Get the stack to use and copy the frame data to it. 386 uint8* userStack = get_signal_stack(thread, frame, action, 387 sizeof(*signalFrameData) + sizeof(frame->ip)); 388 389 signal_frame_data* userSignalFrameData 390 = (signal_frame_data*)(userStack + sizeof(frame->ip)); 391 392 if (user_memcpy(userSignalFrameData, signalFrameData, 393 sizeof(*signalFrameData)) != B_OK) { 394 return B_BAD_ADDRESS; 395 } 396 397 // Copy a return address to the stack so that backtraces will be correct. 398 if (user_memcpy(userStack, &frame->ip, sizeof(frame->ip)) != B_OK) 399 return B_BAD_ADDRESS; 400 401 // Update Thread::user_signal_context, now that everything seems to have 402 // gone fine. 403 thread->user_signal_context = &userSignalFrameData->context; 404 405 // Set up the iframe to execute the signal handler wrapper on our prepared 406 // stack. First argument points to the frame data. 407 addr_t* commPageAddress = (addr_t*)thread->team->commpage_address; 408 frame->user_sp = (addr_t)userStack; 409 arch_cpu_enable_user_access(); 410 frame->ip = commPageAddress[COMMPAGE_ENTRY_X86_SIGNAL_HANDLER] 411 + (addr_t)commPageAddress; 412 arch_cpu_disable_user_access(); 413 frame->di = (addr_t)userSignalFrameData; 414 frame->flags &= ~(uint64)(X86_EFLAGS_TRAP | X86_EFLAGS_DIRECTION); 415 416 return B_OK; 417 } 418 419 420 int64 421 arch_restore_signal_frame(struct signal_frame_data* signalFrameData) 422 { 423 iframe* frame = x86_get_current_iframe(); 424 425 frame->orig_rax = signalFrameData->syscall_restart_return_value; 426 frame->ax = signalFrameData->context.uc_mcontext.rax; 427 frame->bx = signalFrameData->context.uc_mcontext.rbx; 428 frame->cx = signalFrameData->context.uc_mcontext.rcx; 429 frame->dx = signalFrameData->context.uc_mcontext.rdx; 430 frame->di = signalFrameData->context.uc_mcontext.rdi; 431 frame->si = signalFrameData->context.uc_mcontext.rsi; 432 frame->bp = signalFrameData->context.uc_mcontext.rbp; 433 frame->r8 = signalFrameData->context.uc_mcontext.r8; 434 frame->r9 = signalFrameData->context.uc_mcontext.r9; 435 frame->r10 = signalFrameData->context.uc_mcontext.r10; 436 frame->r11 = signalFrameData->context.uc_mcontext.r11; 437 frame->r12 = signalFrameData->context.uc_mcontext.r12; 438 frame->r13 = signalFrameData->context.uc_mcontext.r13; 439 frame->r14 = signalFrameData->context.uc_mcontext.r14; 440 frame->r15 = signalFrameData->context.uc_mcontext.r15; 441 frame->user_sp = signalFrameData->context.uc_mcontext.rsp; 442 frame->ip = signalFrameData->context.uc_mcontext.rip; 443 frame->flags = (frame->flags & ~(uint64)X86_EFLAGS_USER_FLAGS) 444 | (signalFrameData->context.uc_mcontext.rflags & X86_EFLAGS_USER_FLAGS); 445 446 Thread* thread = thread_get_current_thread(); 447 448 memcpy(thread->arch_info.fpu_state, 449 (void*)&signalFrameData->context.uc_mcontext.fpu, gFPUSaveLength); 450 frame->fpu = &thread->arch_info.fpu_state; 451 452 // The syscall return code overwrites frame->ax with the return value of 453 // the syscall, need to return it here to ensure the correct value is 454 // restored. 455 return frame->ax; 456 } 457