1 /* 2 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 10 #include <arch/thread.h> 11 12 #include <string.h> 13 14 #include <arch/user_debugger.h> 15 #include <arch_cpu.h> 16 #include <cpu.h> 17 #include <debug.h> 18 #include <kernel.h> 19 #include <ksignal.h> 20 #include <int.h> 21 #include <team.h> 22 #include <thread.h> 23 #include <tls.h> 24 #include <tracing.h> 25 #include <vm/vm_types.h> 26 #include <vm/VMAddressSpace.h> 27 28 #include "x86_paging.h" 29 #include "x86_syscalls.h" 30 #include "X86VMTranslationMap.h" 31 32 33 //#define TRACE_ARCH_THREAD 34 #ifdef TRACE_ARCH_THREAD 35 # define TRACE(x) dprintf x 36 #else 37 # define TRACE(x) ; 38 #endif 39 40 41 #ifdef SYSCALL_TRACING 42 43 namespace SyscallTracing { 44 45 class RestartSyscall : public AbstractTraceEntry { 46 public: 47 RestartSyscall() 48 { 49 Initialized(); 50 } 51 52 virtual void AddDump(TraceOutput& out) 53 { 54 out.Print("syscall restart"); 55 } 56 }; 57 58 } 59 60 # define TSYSCALL(x) new(std::nothrow) SyscallTracing::x 61 62 #else 63 # define TSYSCALL(x) 64 #endif // SYSCALL_TRACING 65 66 67 // from arch_interrupts.S 68 extern "C" void i386_stack_init(struct farcall *interrupt_stack_offset); 69 extern "C" void i386_restore_frame_from_syscall(struct iframe frame); 70 71 // from arch_cpu.c 72 extern void (*gX86SwapFPUFunc)(void *oldState, const void *newState); 73 extern bool gHasSSE; 74 75 static struct arch_thread sInitialState _ALIGNED(16); 76 // the fpu_state must be aligned on a 16 byte boundary, so that fxsave can use it 77 78 79 status_t 80 arch_thread_init(struct kernel_args *args) 81 { 82 // save one global valid FPU state; it will be copied in the arch dependent 83 // part of each new thread 84 85 asm volatile ("clts; fninit; fnclex;"); 86 if (gHasSSE) 87 i386_fxsave(sInitialState.fpu_state); 88 else 89 i386_fnsave(sInitialState.fpu_state); 90 91 return B_OK; 92 } 93 94 95 static struct iframe * 96 find_previous_iframe(struct thread *thread, addr_t frame) 97 { 98 // iterate backwards through the stack frames, until we hit an iframe 99 while (frame >= thread->kernel_stack_base 100 && frame < thread->kernel_stack_top) { 101 addr_t previousFrame = *(addr_t*)frame; 102 if ((previousFrame & ~IFRAME_TYPE_MASK) == 0) { 103 if (previousFrame == 0) 104 return NULL; 105 return (struct iframe*)frame; 106 } 107 108 frame = previousFrame; 109 } 110 111 return NULL; 112 } 113 114 115 static struct iframe* 116 get_previous_iframe(struct iframe* frame) 117 { 118 if (frame == NULL) 119 return NULL; 120 121 return find_previous_iframe(thread_get_current_thread(), frame->ebp); 122 } 123 124 125 /*! 126 Returns the current iframe structure of the running thread. 127 This function must only be called in a context where it's actually 128 sure that such iframe exists; ie. from syscalls, but usually not 129 from standard kernel threads. 130 */ 131 static struct iframe* 132 get_current_iframe(void) 133 { 134 return find_previous_iframe(thread_get_current_thread(), x86_read_ebp()); 135 } 136 137 138 /*! 139 \brief Returns the current thread's topmost (i.e. most recent) 140 userland->kernel transition iframe (usually the first one, save for 141 interrupts in signal handlers). 142 \return The iframe, or \c NULL, if there is no such iframe (e.g. when 143 the thread is a kernel thread). 144 */ 145 struct iframe * 146 i386_get_user_iframe(void) 147 { 148 struct iframe* frame = get_current_iframe(); 149 150 while (frame != NULL) { 151 if (IFRAME_IS_USER(frame)) 152 return frame; 153 frame = get_previous_iframe(frame); 154 } 155 156 return NULL; 157 } 158 159 160 /*! \brief Like i386_get_user_iframe(), just for the given thread. 161 The thread must not be running and the threads spinlock must be held. 162 */ 163 struct iframe * 164 i386_get_thread_user_iframe(struct thread *thread) 165 { 166 if (thread->state == B_THREAD_RUNNING) 167 return NULL; 168 169 // read %ebp from the thread's stack stored by a pushad 170 addr_t ebp = thread->arch_info.current_stack.esp[2]; 171 172 // find the user iframe 173 struct iframe *frame = find_previous_iframe(thread, ebp); 174 175 while (frame != NULL) { 176 if (IFRAME_IS_USER(frame)) 177 return frame; 178 frame = get_previous_iframe(frame); 179 } 180 181 return NULL; 182 } 183 184 185 struct iframe * 186 i386_get_current_iframe(void) 187 { 188 return get_current_iframe(); 189 } 190 191 192 void * 193 x86_next_page_directory(struct thread *from, struct thread *to) 194 { 195 VMAddressSpace* toAddressSpace = to->team->address_space; 196 if (from->team->address_space == toAddressSpace) { 197 // don't change the pgdir, same address space 198 return NULL; 199 } 200 201 if (toAddressSpace == NULL) 202 toAddressSpace = VMAddressSpace::Kernel(); 203 204 return i386_translation_map_get_pgdir(toAddressSpace->TranslationMap()); 205 } 206 207 208 static inline void 209 set_fs_register(uint32 segment) 210 { 211 asm("movl %0,%%fs" :: "r" (segment)); 212 } 213 214 215 static void 216 set_tls_context(struct thread *thread) 217 { 218 int entry = smp_get_current_cpu() + TLS_BASE_SEGMENT; 219 220 set_segment_descriptor_base(&gGDT[entry], thread->user_local_storage); 221 set_fs_register((entry << 3) | DPL_USER); 222 } 223 224 225 void 226 x86_restart_syscall(struct iframe* frame) 227 { 228 struct thread* thread = thread_get_current_thread(); 229 230 atomic_and(&thread->flags, ~THREAD_FLAGS_RESTART_SYSCALL); 231 atomic_or(&thread->flags, THREAD_FLAGS_SYSCALL_RESTARTED); 232 233 frame->eax = frame->orig_eax; 234 frame->edx = frame->orig_edx; 235 frame->eip -= 2; 236 // undoes the "int $99"/"sysenter"/"syscall" instruction 237 // (so that it'll be executed again) 238 239 TSYSCALL(RestartSyscall()); 240 } 241 242 243 static uint32 * 244 get_signal_stack(struct thread *thread, struct iframe *frame, int signal) 245 { 246 // use the alternate signal stack if we should and can 247 if (thread->signal_stack_enabled 248 && (thread->sig_action[signal - 1].sa_flags & SA_ONSTACK) != 0 249 && (frame->user_esp < thread->signal_stack_base 250 || frame->user_esp >= thread->signal_stack_base 251 + thread->signal_stack_size)) { 252 return (uint32 *)(thread->signal_stack_base 253 + thread->signal_stack_size); 254 } 255 256 return (uint32 *)frame->user_esp; 257 } 258 259 260 // #pragma mark - 261 262 263 status_t 264 arch_team_init_team_struct(struct team *p, bool kernel) 265 { 266 return B_OK; 267 } 268 269 270 status_t 271 arch_thread_init_thread_struct(struct thread *thread) 272 { 273 // set up an initial state (stack & fpu) 274 memcpy(&thread->arch_info, &sInitialState, sizeof(struct arch_thread)); 275 return B_OK; 276 } 277 278 279 status_t 280 arch_thread_init_kthread_stack(struct thread *t, int (*start_func)(void), 281 void (*entry_func)(void), void (*exit_func)(void)) 282 { 283 addr_t *kstack = (addr_t *)t->kernel_stack_base; 284 addr_t *kstack_top = (addr_t *)t->kernel_stack_top; 285 int i; 286 287 TRACE(("arch_thread_initialize_kthread_stack: kstack 0x%p, start_func 0x%p, entry_func 0x%p\n", 288 kstack, start_func, entry_func)); 289 290 // clear the kernel stack 291 #ifdef DEBUG_KERNEL_STACKS 292 # ifdef STACK_GROWS_DOWNWARDS 293 memset((void *)((addr_t)kstack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE), 0, 294 KERNEL_STACK_SIZE); 295 # else 296 memset(kstack, 0, KERNEL_STACK_SIZE); 297 # endif 298 #else 299 memset(kstack, 0, KERNEL_STACK_SIZE); 300 #endif 301 302 // set the final return address to be thread_kthread_exit 303 kstack_top--; 304 *kstack_top = (unsigned int)exit_func; 305 306 // set the return address to be the start of the first function 307 kstack_top--; 308 *kstack_top = (unsigned int)start_func; 309 310 // set the return address to be the start of the entry (thread setup) 311 // function 312 kstack_top--; 313 *kstack_top = (unsigned int)entry_func; 314 315 // simulate pushfl 316 // kstack_top--; 317 // *kstack_top = 0x00; // interrupts still disabled after the switch 318 319 // simulate initial popad 320 for (i = 0; i < 8; i++) { 321 kstack_top--; 322 *kstack_top = 0; 323 } 324 325 // save the stack position 326 t->arch_info.current_stack.esp = kstack_top; 327 t->arch_info.current_stack.ss = (addr_t *)KERNEL_DATA_SEG; 328 329 return B_OK; 330 } 331 332 333 /** Initializes the user-space TLS local storage pointer in 334 * the thread structure, and the reserved TLS slots. 335 * 336 * Is called from _create_user_thread_kentry(). 337 */ 338 339 status_t 340 arch_thread_init_tls(struct thread *thread) 341 { 342 uint32 tls[TLS_USER_THREAD_SLOT + 1]; 343 344 thread->user_local_storage = thread->user_stack_base 345 + thread->user_stack_size; 346 347 // initialize default TLS fields 348 memset(tls, 0, sizeof(tls)); 349 tls[TLS_BASE_ADDRESS_SLOT] = thread->user_local_storage; 350 tls[TLS_THREAD_ID_SLOT] = thread->id; 351 tls[TLS_USER_THREAD_SLOT] = (addr_t)thread->user_thread; 352 353 return user_memcpy((void *)thread->user_local_storage, tls, sizeof(tls)); 354 } 355 356 357 void 358 arch_thread_context_switch(struct thread *from, struct thread *to) 359 { 360 i386_set_tss_and_kstack(to->kernel_stack_top); 361 x86_set_syscall_stack(to->kernel_stack_top); 362 363 // set TLS GDT entry to the current thread - since this action is 364 // dependent on the current CPU, we have to do it here 365 if (to->user_local_storage != 0) 366 set_tls_context(to); 367 368 struct cpu_ent* cpuData = to->cpu; 369 vm_translation_map_arch_info* activeMap 370 = cpuData->arch.active_translation_map; 371 VMAddressSpace* toAddressSpace = to->team->address_space; 372 373 addr_t newPageDirectory; 374 vm_translation_map_arch_info* toMap; 375 if (toAddressSpace != NULL 376 && (toMap = static_cast<X86VMTranslationMap*>( 377 toAddressSpace->TranslationMap())->ArchData()) != activeMap) { 378 // update on which CPUs the address space is used 379 int cpu = cpuData->cpu_num; 380 atomic_and(&activeMap->active_on_cpus, ~((uint32)1 << cpu)); 381 atomic_or(&toMap->active_on_cpus, (uint32)1 << cpu); 382 383 activeMap->RemoveReference(); 384 // this might causes the map to be deferred deleted - ie. it won't 385 // be deleted when it is still in use 386 387 // assign the new map to the CPU 388 toMap->AddReference(); 389 cpuData->arch.active_translation_map = toMap; 390 391 // get the new page directory 392 newPageDirectory = (addr_t)toMap->pgdir_phys; 393 } else { 394 newPageDirectory = 0; 395 // this means no change 396 } 397 398 gX86SwapFPUFunc(from->arch_info.fpu_state, to->arch_info.fpu_state); 399 i386_context_switch(&from->arch_info, &to->arch_info, newPageDirectory); 400 } 401 402 403 void 404 arch_thread_dump_info(void *info) 405 { 406 struct arch_thread *at = (struct arch_thread *)info; 407 408 kprintf("\tesp: %p\n", at->current_stack.esp); 409 kprintf("\tss: %p\n", at->current_stack.ss); 410 kprintf("\tfpu_state at %p\n", at->fpu_state); 411 } 412 413 414 /** Sets up initial thread context and enters user space 415 */ 416 417 status_t 418 arch_thread_enter_userspace(struct thread *t, addr_t entry, void *args1, 419 void *args2) 420 { 421 addr_t stackTop = t->user_stack_base + t->user_stack_size; 422 uint32 codeSize = (addr_t)x86_end_userspace_thread_exit 423 - (addr_t)x86_userspace_thread_exit; 424 uint32 args[3]; 425 426 TRACE(("arch_thread_enter_uspace: entry 0x%lx, args %p %p, ustack_top 0x%lx\n", 427 entry, args1, args2, stackTop)); 428 429 // copy the little stub that calls exit_thread() when the thread entry 430 // function returns, as well as the arguments of the entry function 431 stackTop -= codeSize; 432 433 if (user_memcpy((void *)stackTop, (const void *)&x86_userspace_thread_exit, codeSize) < B_OK) 434 return B_BAD_ADDRESS; 435 436 args[0] = stackTop; 437 args[1] = (uint32)args1; 438 args[2] = (uint32)args2; 439 stackTop -= sizeof(args); 440 441 if (user_memcpy((void *)stackTop, args, sizeof(args)) < B_OK) 442 return B_BAD_ADDRESS; 443 444 thread_at_kernel_exit(); 445 // also disables interrupts 446 447 // install user breakpoints, if any 448 if ((t->flags & THREAD_FLAGS_BREAKPOINTS_DEFINED) != 0) 449 x86_init_user_debug_at_kernel_exit(NULL); 450 451 i386_set_tss_and_kstack(t->kernel_stack_top); 452 453 // set the CPU dependent GDT entry for TLS 454 set_tls_context(t); 455 456 x86_set_syscall_stack(t->kernel_stack_top); 457 x86_enter_userspace(entry, stackTop); 458 459 return B_OK; 460 // never gets here 461 } 462 463 464 bool 465 arch_on_signal_stack(struct thread *thread) 466 { 467 struct iframe *frame = get_current_iframe(); 468 469 return frame->user_esp >= thread->signal_stack_base 470 && frame->user_esp < thread->signal_stack_base 471 + thread->signal_stack_size; 472 } 473 474 475 status_t 476 arch_setup_signal_frame(struct thread *thread, struct sigaction *action, 477 int signal, int signalMask) 478 { 479 struct iframe *frame = get_current_iframe(); 480 if (!IFRAME_IS_USER(frame)) { 481 panic("arch_setup_signal_frame(): No user iframe!"); 482 return B_BAD_VALUE; 483 } 484 485 uint32 *signalCode; 486 uint32 *userRegs; 487 struct vregs regs; 488 uint32 buffer[6]; 489 status_t status; 490 491 // start stuffing stuff on the user stack 492 uint32* userStack = get_signal_stack(thread, frame, signal); 493 494 // copy syscall restart info onto the user stack 495 userStack -= (sizeof(thread->syscall_restart.parameters) + 12 + 3) / 4; 496 uint32 threadFlags = atomic_and(&thread->flags, 497 ~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN)); 498 if (user_memcpy(userStack, &threadFlags, 4) < B_OK 499 || user_memcpy(userStack + 1, &frame->orig_eax, 4) < B_OK 500 || user_memcpy(userStack + 2, &frame->orig_edx, 4) < B_OK) 501 return B_BAD_ADDRESS; 502 status = user_memcpy(userStack + 3, thread->syscall_restart.parameters, 503 sizeof(thread->syscall_restart.parameters)); 504 if (status < B_OK) 505 return status; 506 507 // store the saved regs onto the user stack 508 regs.eip = frame->eip; 509 regs.eflags = frame->flags; 510 regs.eax = frame->eax; 511 regs.ecx = frame->ecx; 512 regs.edx = frame->edx; 513 regs.ebp = frame->ebp; 514 regs.esp = frame->esp; 515 regs._reserved_1 = frame->user_esp; 516 regs._reserved_2[0] = frame->edi; 517 regs._reserved_2[1] = frame->esi; 518 regs._reserved_2[2] = frame->ebx; 519 i386_fnsave((void *)(®s.xregs)); 520 521 userStack -= (sizeof(struct vregs) + 3) / 4; 522 userRegs = userStack; 523 status = user_memcpy(userRegs, ®s, sizeof(regs)); 524 if (status < B_OK) 525 return status; 526 527 // now store a code snippet on the stack 528 userStack -= ((uint32)i386_end_return_from_signal + 3 529 - (uint32)i386_return_from_signal) / 4; 530 signalCode = userStack; 531 status = user_memcpy(signalCode, (const void *)&i386_return_from_signal, 532 ((uint32)i386_end_return_from_signal 533 - (uint32)i386_return_from_signal)); 534 if (status < B_OK) 535 return status; 536 537 // now set up the final part 538 buffer[0] = (uint32)signalCode; // return address when sa_handler done 539 buffer[1] = signal; // arguments to sa_handler 540 buffer[2] = (uint32)action->sa_userdata; 541 buffer[3] = (uint32)userRegs; 542 543 buffer[4] = signalMask; // Old signal mask to restore 544 buffer[5] = (uint32)userRegs; // Int frame + extra regs to restore 545 546 userStack -= sizeof(buffer) / 4; 547 548 status = user_memcpy(userStack, buffer, sizeof(buffer)); 549 if (status < B_OK) 550 return status; 551 552 frame->user_esp = (uint32)userStack; 553 frame->eip = (uint32)action->sa_handler; 554 555 return B_OK; 556 } 557 558 559 int64 560 arch_restore_signal_frame(void) 561 { 562 struct thread *thread = thread_get_current_thread(); 563 struct iframe *frame = get_current_iframe(); 564 int32 signalMask; 565 uint32 *userStack; 566 struct vregs* regsPointer; 567 struct vregs regs; 568 569 TRACE(("### arch_restore_signal_frame: entry\n")); 570 571 userStack = (uint32 *)frame->user_esp; 572 if (user_memcpy(&signalMask, &userStack[0], 4) < B_OK 573 || user_memcpy(®sPointer, &userStack[1], 4) < B_OK 574 || user_memcpy(®s, regsPointer, sizeof(vregs)) < B_OK) { 575 return B_BAD_ADDRESS; 576 } 577 578 uint32* syscallRestartInfo 579 = (uint32*)regsPointer + (sizeof(struct vregs) + 3) / 4; 580 uint32 threadFlags; 581 if (user_memcpy(&threadFlags, syscallRestartInfo, 4) < B_OK 582 || user_memcpy(&frame->orig_eax, syscallRestartInfo + 1, 4) < B_OK 583 || user_memcpy(&frame->orig_edx, syscallRestartInfo + 2, 4) < B_OK 584 || user_memcpy(thread->syscall_restart.parameters, 585 syscallRestartInfo + 3, 586 sizeof(thread->syscall_restart.parameters)) < B_OK) { 587 return B_BAD_ADDRESS; 588 } 589 590 // set restart/64bit return value flags from previous syscall 591 atomic_and(&thread->flags, 592 ~(THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN)); 593 atomic_or(&thread->flags, threadFlags 594 & (THREAD_FLAGS_RESTART_SYSCALL | THREAD_FLAGS_64_BIT_SYSCALL_RETURN)); 595 596 // TODO: Verify that just restoring the old signal mask is right! Bash for 597 // instance changes the procmask in a signal handler. Those changes are 598 // lost the way we do it. 599 atomic_set(&thread->sig_block_mask, signalMask); 600 update_current_thread_signals_flag(); 601 602 frame->eip = regs.eip; 603 frame->flags = regs.eflags; 604 frame->eax = regs.eax; 605 frame->ecx = regs.ecx; 606 frame->edx = regs.edx; 607 frame->ebp = regs.ebp; 608 frame->esp = regs.esp; 609 frame->user_esp = regs._reserved_1; 610 frame->edi = regs._reserved_2[0]; 611 frame->esi = regs._reserved_2[1]; 612 frame->ebx = regs._reserved_2[2]; 613 614 i386_frstor((void *)(®s.xregs)); 615 616 TRACE(("### arch_restore_signal_frame: exit\n")); 617 618 return (int64)frame->eax | ((int64)frame->edx << 32); 619 } 620 621 622 /** Saves everything needed to restore the frame in the child fork in the 623 * arch_fork_arg structure to be passed to arch_restore_fork_frame(). 624 * Also makes sure to return the right value. 625 */ 626 627 void 628 arch_store_fork_frame(struct arch_fork_arg *arg) 629 { 630 struct iframe *frame = get_current_iframe(); 631 632 // we need to copy the threads current iframe 633 arg->iframe = *frame; 634 635 // we also want fork() to return 0 for the child 636 arg->iframe.eax = 0; 637 } 638 639 640 /** Restores the frame from a forked team as specified by the provided 641 * arch_fork_arg structure. 642 * Needs to be called from within the child team, ie. instead of 643 * arch_thread_enter_uspace() as thread "starter". 644 * This function does not return to the caller, but will enter userland 645 * in the child team at the same position where the parent team left of. 646 */ 647 648 void 649 arch_restore_fork_frame(struct arch_fork_arg *arg) 650 { 651 struct thread *thread = thread_get_current_thread(); 652 653 disable_interrupts(); 654 655 i386_set_tss_and_kstack(thread->kernel_stack_top); 656 657 // set the CPU dependent GDT entry for TLS (set the current %fs register) 658 set_tls_context(thread); 659 660 i386_restore_frame_from_syscall(arg->iframe); 661 } 662 663 664 void 665 arch_syscall_64_bit_return_value(void) 666 { 667 struct thread* thread = thread_get_current_thread(); 668 atomic_or(&thread->flags, THREAD_FLAGS_64_BIT_SYSCALL_RETURN); 669 } 670