1/* 2 * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com. 3 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. 4 * Distributed under the terms of the MIT License. 5 */ 6 7 8#include <asm_defs.h> 9 10#include <thread_types.h> 11 12#include <arch/x86/descriptors.h> 13#include <arch/x86/arch_altcodepatch.h> 14#include <arch/x86/arch_cpu.h> 15#include <arch/x86/arch_kernel.h> 16 17#include "asm_offsets.h" 18#include "syscall_numbers.h" 19#include "syscall_table.h" 20 21 22// Push the remainder of the interrupt frame onto the stack. 23#define PUSH_IFRAME_BOTTOM(iframeType) \ 24 push %rax; /* orig_rax */ \ 25 push %rax; \ 26 push %rbx; \ 27 push %rcx; \ 28 push %rdx; \ 29 push %rdi; \ 30 push %rsi; \ 31 push %rbp; \ 32 push %r8; \ 33 push %r9; \ 34 push %r10; \ 35 push %r11; \ 36 push %r12; \ 37 push %r13; \ 38 push %r14; \ 39 push %r15; \ 40 pushq $0; \ 41 push $iframeType; 42 43 44// Restore the interrupt frame. 45#define RESTORE_IFRAME() \ 46 add $16, %rsp; \ 47 pop %r15; \ 48 pop %r14; \ 49 pop %r13; \ 50 pop %r12; \ 51 pop %r11; \ 52 pop %r10; \ 53 pop %r9; \ 54 pop %r8; \ 55 pop %rbp; \ 56 pop %rsi; \ 57 pop %rdi; \ 58 pop %rdx; \ 59 pop %rcx; \ 60 pop %rbx; \ 61 pop %rax; \ 62 addq $24, %rsp; 63 64 65// The macros below require R12 to contain the current thread pointer. R12 is 66// callee-save so will be preserved through all function calls and only needs 67// to be obtained once. R13 is used to store the system call start time, will 68// also be preserved. 69 70#define LOCK_THREAD_TIME() \ 71 leaq THREAD_time_lock(%r12), %rdi; \ 72 call acquire_spinlock; 73 74#define UNLOCK_THREAD_TIME() \ 75 leaq THREAD_time_lock(%r12), %rdi; \ 76 call release_spinlock; \ 77 78#define UPDATE_THREAD_USER_TIME() \ 79 LOCK_THREAD_TIME() \ 80 \ 81 call system_time; \ 82 \ 83 /* Preserve system_time for post syscall debug */ \ 84 movq %rax, %r13; \ 85 \ 86 /* thread->user_time += now - thread->last_time; */ \ 87 subq THREAD_last_time(%r12), %rax; \ 88 addq %rax, THREAD_user_time(%r12); \ 89 \ 90 /* thread->last_time = now; */ \ 91 movq %r13, THREAD_last_time(%r12); \ 92 \ 93 /* thread->in_kernel = true; */ \ 94 movb $1, THREAD_in_kernel(%r12); \ 95 \ 96 UNLOCK_THREAD_TIME() 97 98#define UPDATE_THREAD_KERNEL_TIME() \ 99 LOCK_THREAD_TIME() \ 100 \ 101 call system_time; \ 102 movq %rax, %r13; \ 103 \ 104 /* thread->kernel_time += now - thread->last_time; */ \ 105 subq THREAD_last_time(%r12), %rax; \ 106 addq %rax, THREAD_kernel_time(%r12); \ 107 \ 108 /* thread->last_time = now; */ \ 109 movq %r13, THREAD_last_time(%r12); \ 110 \ 111 /* thread->in_kernel = false; */ \ 112 movb $0, THREAD_in_kernel(%r12); \ 113 \ 114 UNLOCK_THREAD_TIME() 115 116#define STOP_USER_DEBUGGING() \ 117 testl $(THREAD_FLAGS_BREAKPOINTS_INSTALLED \ 118 | THREAD_FLAGS_SINGLE_STEP), THREAD_flags(%r12); \ 119 jz 1f; \ 120 call x86_exit_user_debug_at_kernel_entry; \ 121 1: 122 123#define CLEAR_FPU_STATE() \ 124 pxor %xmm0, %xmm0; \ 125 pxor %xmm1, %xmm1; \ 126 pxor %xmm2, %xmm2; \ 127 pxor %xmm3, %xmm3; \ 128 pxor %xmm4, %xmm4; \ 129 pxor %xmm5, %xmm5; \ 130 pxor %xmm6, %xmm6; \ 131 pxor %xmm7, %xmm7; \ 132 pxor %xmm8, %xmm8; \ 133 pxor %xmm9, %xmm9; \ 134 pxor %xmm10, %xmm10; \ 135 pxor %xmm11, %xmm11; \ 136 pxor %xmm12, %xmm12; \ 137 pxor %xmm13, %xmm13; \ 138 pxor %xmm14, %xmm14; \ 139 pxor %xmm15, %xmm15 140 141// The following code defines the interrupt service routines for all 256 142// interrupts. It creates a block of handlers, each 16 bytes, that the IDT 143// initialization code just loops through. 144 145// Interrupt with no error code, pushes a 0 error code. 146#define DEFINE_ISR(nr) \ 147 .align 16; \ 148 ASM_CLAC \ 149 push $0; \ 150 push $nr; \ 151 jmp int_bottom; 152 153// Interrupt with an error code. 154#define DEFINE_ISR_E(nr) \ 155 .align 16; \ 156 ASM_CLAC \ 157 push $nr; \ 158 jmp int_bottom; 159 160// Array of interrupt service routines. 161.align 16 162SYMBOL(isr_array): 163 // Exceptions (0-19) and reserved interrupts (20-31). 164 DEFINE_ISR(0) 165 DEFINE_ISR(1) 166 DEFINE_ISR(2) 167 DEFINE_ISR(3) 168 DEFINE_ISR(4) 169 DEFINE_ISR(5) 170 DEFINE_ISR(6) 171 DEFINE_ISR(7) 172 DEFINE_ISR_E(8) 173 DEFINE_ISR(9) 174 DEFINE_ISR_E(10) 175 DEFINE_ISR_E(11) 176 DEFINE_ISR_E(12) 177 DEFINE_ISR_E(13) 178 DEFINE_ISR_E(14) 179 DEFINE_ISR(15) 180 DEFINE_ISR(16) 181 DEFINE_ISR_E(17) 182 DEFINE_ISR(18) 183 DEFINE_ISR(19) 184 DEFINE_ISR(20) 185 DEFINE_ISR(21) 186 DEFINE_ISR(22) 187 DEFINE_ISR(23) 188 DEFINE_ISR(24) 189 DEFINE_ISR(25) 190 DEFINE_ISR(26) 191 DEFINE_ISR(27) 192 DEFINE_ISR(28) 193 DEFINE_ISR(29) 194 DEFINE_ISR(30) 195 DEFINE_ISR(31) 196 197 // User-defined ISRs (32-255) - none take an error code. 198 .Lintr = 32 199 .rept 224 200 DEFINE_ISR(.Lintr) 201 .Lintr = .Lintr+1 202 .endr 203 204 205// Common interrupt handling code. 206STATIC_FUNCTION(int_bottom): 207 // Coming from user-mode requires special handling. 208 testl $3, 24(%rsp) 209 jnz int_bottom_user 210 211 // Push the rest of the interrupt frame to the stack. 212 PUSH_IFRAME_BOTTOM(IFRAME_TYPE_OTHER) 213 214 cld 215 216 // Frame pointer is the iframe. 217 movq %rsp, %rbp 218 219 // Set the RF (resume flag) in RFLAGS. This prevents an instruction 220 // breakpoint on the instruction we're returning to to trigger a debug 221 // exception. 222 orq $X86_EFLAGS_RESUME, IFRAME_flags(%rbp) 223 224 subq $512, %rsp 225 andq $~15, %rsp 226 fxsaveq (%rsp) 227 228 // Call the interrupt handler. 229 movq %rbp, %rdi 230 movq IFRAME_vector(%rbp), %rax 231 call *gInterruptHandlerTable(, %rax, 8) 232 233 fxrstorq (%rsp) 234 movq %rbp, %rsp 235 236 // Restore the saved registers. 237 RESTORE_IFRAME() 238 239 iretq 240FUNCTION_END(int_bottom) 241 242 243// Handler for an interrupt that occurred in user-mode. 244STATIC_FUNCTION(int_bottom_user): 245 // Load the kernel GS segment base. 246 swapgs 247 lfence 248 249 // Push the rest of the interrupt frame to the stack. 250 PUSH_IFRAME_BOTTOM(IFRAME_TYPE_OTHER) 251 cld 252 253 // Frame pointer is the iframe. 254 movq %rsp, %rbp 255 256 subq $512, %rsp 257 andq $~15, %rsp 258 fxsaveq (%rsp) 259 movq %rsp, IFRAME_fpu(%rbp) 260 261 // Set the RF (resume flag) in RFLAGS. This prevents an instruction 262 // breakpoint on the instruction we're returning to to trigger a debug 263 // exception. 264 orq $X86_EFLAGS_RESUME, IFRAME_flags(%rbp) 265 266 // Get thread pointer. 267 movq %gs:0, %r12 268 269 STOP_USER_DEBUGGING() 270 UPDATE_THREAD_USER_TIME() 271 272 // Call the interrupt handler. 273 movq %rbp, %rdi 274 movq IFRAME_vector(%rbp), %rax 275 call *gInterruptHandlerTable(, %rax, 8) 276 277 // If there are no signals pending or we're not debugging, we can avoid 278 // most of the work here, just need to update the kernel time. 279 testl $(THREAD_FLAGS_DEBUGGER_INSTALLED | THREAD_FLAGS_SIGNALS_PENDING \ 280 | THREAD_FLAGS_DEBUG_THREAD | THREAD_FLAGS_BREAKPOINTS_DEFINED \ 281 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP) \ 282 , THREAD_flags(%r12) 283 jnz .Lkernel_exit_work 284 285 cli 286 287 UPDATE_THREAD_KERNEL_TIME() 288 289 fxrstorq (%rsp) 290 movq %rbp, %rsp 291 292 // Restore the saved registers. 293 RESTORE_IFRAME() 294 295 // Restore the previous GS base and return. 296 swapgs 297 lfence 298 iretq 299 300.Lkernel_exit_work: 301 // Slow path for return to userland. 302 303 // Do we need to handle signals? 304 testl $(THREAD_FLAGS_SIGNALS_PENDING | THREAD_FLAGS_DEBUG_THREAD \ 305 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP) \ 306 , THREAD_flags(%r12) 307 jnz .Lkernel_exit_handle_signals 308 cli 309 call thread_at_kernel_exit_no_signals 310 311.Lkernel_exit_work_done: 312 // Install breakpoints, if defined. 313 testl $THREAD_FLAGS_BREAKPOINTS_DEFINED, THREAD_flags(%r12) 314 jz 1f 315 movq %rbp, %rdi 316 call x86_init_user_debug_at_kernel_exit 3171: 318 fxrstorq (%rsp) 319 movq %rbp, %rsp 320 321 // Restore the saved registers. 322 RESTORE_IFRAME() 323 324 // Restore the previous GS base and return. 325 swapgs 326 lfence 327 iretq 328 329.Lkernel_exit_handle_signals: 330 // thread_at_kernel_exit requires interrupts to be enabled, it will disable 331 // them after. 332 sti 333 call thread_at_kernel_exit 334 jmp .Lkernel_exit_work_done 335FUNCTION_END(int_bottom_user) 336 337 338// SYSCALL entry point. 339FUNCTION(x86_64_syscall_entry): 340 // Upon entry, RSP still points at the user stack. Load the kernel GS 341 // segment base address, which points at the current thread's arch_thread 342 // structure. This contains our kernel stack pointer and a temporary 343 // scratch space to store the user stack pointer in before we can push it 344 // to the stack. 345 swapgs 346 lfence 347 movq %rsp, %gs:ARCH_THREAD_user_rsp 348 movq %gs:ARCH_THREAD_syscall_rsp, %rsp 349 350 // The following pushes de-align the stack by 8 bytes, so account for that first. 351 sub $8, %rsp 352 353 // Set up an iframe on the stack (R11 = saved RFLAGS, RCX = saved RIP). 354 push $USER_DATA_SELECTOR // ss 355 push %gs:ARCH_THREAD_user_rsp // rsp 356 push %r11 // flags 357 push $USER_CODE_SELECTOR // cs 358 push %rcx // ip 359 push $0 // error_code 360 push $99 // vector 361 PUSH_IFRAME_BOTTOM(IFRAME_TYPE_SYSCALL) 362 363 cld 364 365 // Frame pointer is the iframe. 366 movq %rsp, %rbp 367 368 // Preserve call number (R14 is callee-save), get thread pointer. 369 movq %rax, %r14 370 movq %gs:0, %r12 371 372 STOP_USER_DEBUGGING() 373 UPDATE_THREAD_USER_TIME() 374 375 // No longer need interrupts disabled. 376 sti 377 378 // Check whether the syscall number is valid. 379 cmpq $SYSCALL_COUNT, %r14 380 jae .Lsyscall_return 381 382 // Get the system call table entry. Note I'm hardcoding the shift because 383 // sizeof(syscall_info) is 16 and scale factors of 16 aren't supported, 384 // so can't just do leaq kSyscallInfos(, %rax, SYSCALL_INFO_sizeof). 385 movq %r14, %rax 386 shlq $4, %rax 387 leaq kSyscallInfos(, %rax, 1), %rax 388 389 // Check the number of call arguments, greater than 6 (6 * 8 = 48) requires 390 // a stack copy. 391 movq SYSCALL_INFO_parameter_size(%rax), %rcx 392 cmpq $48, %rcx 393 ja .Lsyscall_stack_args 394 395.Lperform_syscall: 396 testl $THREAD_FLAGS_DEBUGGER_INSTALLED, THREAD_flags(%r12) 397 jnz .Lpre_syscall_debug 398 399.Lpre_syscall_debug_done: 400 // Restore the arguments from the iframe. UPDATE_THREAD_USER_TIME() makes 401 // 2 function calls which means they may have been overwritten. Note that 402 // argument 4 is in R10 on the frame rather than RCX as RCX is used by 403 // SYSCALL. 404 movq IFRAME_di(%rbp), %rdi 405 movq IFRAME_si(%rbp), %rsi 406 movq IFRAME_dx(%rbp), %rdx 407 movq IFRAME_r10(%rbp), %rcx 408 movq IFRAME_r8(%rbp), %r8 409 movq IFRAME_r9(%rbp), %r9 410 411 // TODO: pre-syscall tracing 412 413 // Call the function and save its return value. 414 call *SYSCALL_INFO_function(%rax) 415 movq %rax, IFRAME_ax(%rbp) 416 417 // TODO: post-syscall tracing 418 419.Lsyscall_return: 420 // Restore the original stack pointer and return. 421 movq %rbp, %rsp 422 423 // Clear the restarted flag. 424 testl $THREAD_FLAGS_SYSCALL_RESTARTED, THREAD_flags(%r12) 425 jz 2f 4261: 427 movl THREAD_flags(%r12), %eax 428 movl %eax, %edx 429 andl $~THREAD_FLAGS_SYSCALL_RESTARTED, %edx 430 lock 431 cmpxchgl %edx, THREAD_flags(%r12) 432 jnz 1b 4332: 434 testl $(THREAD_FLAGS_DEBUGGER_INSTALLED | THREAD_FLAGS_SIGNALS_PENDING \ 435 | THREAD_FLAGS_DEBUG_THREAD | THREAD_FLAGS_BREAKPOINTS_DEFINED \ 436 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP | THREAD_FLAGS_RESTART_SYSCALL) \ 437 , THREAD_flags(%r12) 438 jnz .Lpost_syscall_work 439 440 cli 441 442 UPDATE_THREAD_KERNEL_TIME() 443 444 // If we've just restored a signal frame, use the IRET path. 445 cmpq $SYSCALL_RESTORE_SIGNAL_FRAME, %r14 446 je .Lrestore_fpu 447 448 CLEAR_FPU_STATE() 449 450 // Restore the iframe and RCX/R11 for SYSRET. 451 RESTORE_IFRAME() 452 pop %rcx 453 addq $8, %rsp 454 pop %r11 455 pop %rsp 456 457 // Restore previous GS base and return. 458 swapgs 459 lfence 460 sysretq 461 462.Lpre_syscall_debug: 463 // user_debug_pre_syscall expects a pointer to a block of arguments, need 464 // to push the register arguments onto the stack. 465 push IFRAME_r9(%rbp) 466 push IFRAME_r8(%rbp) 467 push IFRAME_r10(%rbp) 468 push IFRAME_dx(%rbp) 469 push IFRAME_si(%rbp) 470 push IFRAME_di(%rbp) 471 movq %r14, %rdi // syscall number 472 movq %rsp, %rsi 473 push %rax 474 call user_debug_pre_syscall 475 pop %rax 476 addq $48, %rsp 477 jmp .Lpre_syscall_debug_done 478 479.Lpost_syscall_work: 480 testl $THREAD_FLAGS_DEBUGGER_INSTALLED, THREAD_flags(%r12) 481 jz 1f 482 483 // Post-syscall debugging. Same as above, need a block of arguments. 484 push IFRAME_r9(%rbp) 485 push IFRAME_r8(%rbp) 486 push IFRAME_r10(%rbp) 487 push IFRAME_dx(%rbp) 488 push IFRAME_si(%rbp) 489 push IFRAME_di(%rbp) 490 movq %r14, %rdi // syscall number 491 movq %rsp, %rsi 492 movq IFRAME_ax(%rbp), %rdx // return value 493 movq %r13, %rcx // start time, preserved earlier 494 call user_debug_post_syscall 495 addq $48, %rsp 4961: 497 // Do we need to handle signals? 498 testl $(THREAD_FLAGS_SIGNALS_PENDING | THREAD_FLAGS_DEBUG_THREAD \ 499 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP) \ 500 , THREAD_flags(%r12) 501 jnz .Lpost_syscall_handle_signals 502 cli 503 call thread_at_kernel_exit_no_signals 504 505.Lpost_syscall_work_done: 506 // Handle syscall restarting. 507 testl $THREAD_FLAGS_RESTART_SYSCALL, THREAD_flags(%r12) 508 jz 1f 509 movq %rsp, %rdi 510 call x86_restart_syscall 5111: 512 // Install breakpoints, if defined. 513 testl $THREAD_FLAGS_BREAKPOINTS_DEFINED, THREAD_flags(%r12) 514 jz 1f 515 movq %rbp, %rdi 516 call x86_init_user_debug_at_kernel_exit 5171: 518 // On this return path it is possible that the frame has been modified, 519 // for example to execute a signal handler. In this case it is safer to 520 // return via IRET. 521 CLEAR_FPU_STATE() 522 jmp .Liret 523 524.Lrestore_fpu: 525 movq IFRAME_fpu(%rbp), %rax 526 fxrstorq (%rax) 527.Liret: 528 // Restore the saved registers. 529 RESTORE_IFRAME() 530 531 // Restore the previous GS base and return. 532 swapgs 533 lfence 534 iretq 535 536.Lpost_syscall_handle_signals: 537 call thread_at_kernel_exit 538 jmp .Lpost_syscall_work_done 539 540.Lsyscall_stack_args: 541 // Some arguments are on the stack, work out what we need to copy. 6 542 // arguments (48 bytes) are already in registers. 543 // RAX = syscall table entry address, RCX = argument size. 544 subq $48, %rcx 545 546 // Get the address to copy from. 547 movq IFRAME_user_sp(%rbp), %rsi 548 addq $8, %rsi 549 movabs $(USER_BASE + USER_SIZE), %rdx 550 cmp %rdx, %rsi 551 jae .Lbad_syscall_args 552 553 // Make space on the stack. 554 subq %rcx, %rsp 555 andq $~15, %rsp 556 movq %rsp, %rdi 557 558 // Set a fault handler. 559 movq $.Lbad_syscall_args, THREAD_fault_handler(%r12) 560 561 ASM_STAC 562 563 // Copy them by quadwords. 564 shrq $3, %rcx 565 rep 566 movsq 567 ASM_CLAC 568 movq $0, THREAD_fault_handler(%r12) 569 570 // Perform the call. 571 jmp .Lperform_syscall 572 573.Lbad_syscall_args: 574 movq $0, THREAD_fault_handler(%r12) 575 movq %rbp, %rsp 576 jmp .Lsyscall_return 577FUNCTION_END(x86_64_syscall_entry) 578 579 580/*! \fn void x86_return_to_userland(iframe* frame) 581 \brief Returns to the userland environment given by \a frame. 582 583 Before returning to userland all potentially necessary kernel exit work is 584 done. 585 586 \a frame must point to a location somewhere on the caller's stack (e.g. a 587 local variable). 588 The function must be called with interrupts disabled. 589 590 \param frame The iframe defining the userland environment. 591*/ 592FUNCTION(x86_return_to_userland): 593 movq %rdi, %rbp 594 movq %rbp, %rsp 595 596 // Perform kernel exit work. 597 movq %gs:0, %r12 598 testl $(THREAD_FLAGS_DEBUGGER_INSTALLED | THREAD_FLAGS_SIGNALS_PENDING \ 599 | THREAD_FLAGS_DEBUG_THREAD | THREAD_FLAGS_BREAKPOINTS_DEFINED \ 600 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP) \ 601 , THREAD_flags(%r12) 602 jnz .Luserland_return_work 603 604 // update the thread's kernel time and return 605 UPDATE_THREAD_KERNEL_TIME() 606 607 // Restore the frame and return. 608 RESTORE_IFRAME() 609 swapgs 610 lfence 611 iretq 612.Luserland_return_work: 613 // Slow path for return to userland. 614 615 // Do we need to handle signals? 616 testl $(THREAD_FLAGS_SIGNALS_PENDING | THREAD_FLAGS_DEBUG_THREAD \ 617 | THREAD_FLAGS_TRAP_FOR_CORE_DUMP) \ 618 , THREAD_flags(%r12) 619 jnz .Luserland_return_handle_signals 620 cli 621 call thread_at_kernel_exit_no_signals 622 623.Luserland_return_work_done: 624 // Install breakpoints, if defined. 625 testl $THREAD_FLAGS_BREAKPOINTS_DEFINED, THREAD_flags(%r12) 626 jz 1f 627 movq %rbp, %rdi 628 call x86_init_user_debug_at_kernel_exit 6291: 630 // Restore the saved registers. 631 RESTORE_IFRAME() 632 633 // Restore the previous GS base and return. 634 swapgs 635 lfence 636 iretq 637.Luserland_return_handle_signals: 638 // thread_at_kernel_exit requires interrupts to be enabled, it will disable 639 // them after. 640 sti 641 call thread_at_kernel_exit 642 jmp .Luserland_return_work_done 643FUNCTION_END(x86_return_to_userland) 644