1 /* 2 * Copyright 2008, Dustin Howett, dustin.howett@gmail.com. All rights reserved. 3 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include "smp.h" 12 13 #include <string.h> 14 15 #include <KernelExport.h> 16 17 #include <kernel.h> 18 #include <safemode.h> 19 #include <boot/stage2.h> 20 #include <boot/menu.h> 21 #include <arch/x86/arch_acpi.h> 22 #include <arch/x86/arch_apic.h> 23 #include <arch/x86/arch_system_info.h> 24 25 #include "mmu.h" 26 #include "acpi.h" 27 #include "hpet.h" 28 29 30 #define NO_SMP 0 31 32 #define TRACE_SMP 33 #ifdef TRACE_SMP 34 # define TRACE(x) dprintf x 35 #else 36 # define TRACE(x) ; 37 #endif 38 39 struct gdt_idt_descr { 40 uint16 a; 41 uint32 *b; 42 } _PACKED; 43 44 static struct scan_spots_struct smp_scan_spots[] = { 45 { 0x9fc00, 0xa0000, 0xa0000 - 0x9fc00 }, 46 { 0xf0000, 0x100000, 0x100000 - 0xf0000 }, 47 { 0, 0, 0 } 48 }; 49 50 extern "C" void execute_n_instructions(int count); 51 52 extern "C" void smp_trampoline(void); 53 extern "C" void smp_trampoline_end(void); 54 55 56 static int smp_get_current_cpu(void); 57 58 59 static uint32 60 apic_read(uint32 offset) 61 { 62 return *(uint32 *)((uint32)gKernelArgs.arch_args.apic + offset); 63 } 64 65 66 static void 67 apic_write(uint32 offset, uint32 data) 68 { 69 uint32 *addr = (uint32 *)((uint32)gKernelArgs.arch_args.apic + offset); 70 *addr = data; 71 } 72 73 74 static int 75 smp_get_current_cpu(void) 76 { 77 if (gKernelArgs.arch_args.apic == NULL) 78 return 0; 79 80 return gKernelArgs.arch_args.cpu_os_id[ 81 (apic_read(APIC_ID) & 0xffffffff) >> 24]; 82 } 83 84 85 static mp_floating_struct * 86 smp_mp_probe(uint32 base, uint32 limit) 87 { 88 TRACE(("smp_mp_probe: entry base 0x%lx, limit 0x%lx\n", base, limit)); 89 for (uint32 *pointer = (uint32 *)base; (uint32)pointer < limit; pointer++) { 90 if (*pointer == MP_FLOATING_SIGNATURE) { 91 TRACE(("smp_mp_probe: found floating pointer structure at %p\n", 92 pointer)); 93 return (mp_floating_struct *)pointer; 94 } 95 } 96 97 return NULL; 98 } 99 100 101 static status_t 102 smp_do_mp_config(mp_floating_struct *floatingStruct) 103 { 104 TRACE(("smp: intel mp version %s, %s", 105 (floatingStruct->spec_revision == 1) ? "1.1" : "1.4", 106 (floatingStruct->mp_feature_2 & 0x80) 107 ? "imcr and pic compatibility mode.\n" 108 : "virtual wire compatibility mode.\n")); 109 110 if (floatingStruct->config_table == NULL) { 111 #if 1 112 // TODO: need to implement 113 TRACE(("smp: standard configuration %d unimplemented\n", 114 floatingStruct->mp_feature_1)); 115 gKernelArgs.num_cpus = 1; 116 return B_OK; 117 #else 118 // this system conforms to one of the default configurations 119 TRACE(("smp: standard configuration %d\n", floatingStruct->mp_feature_1)); 120 gKernelArgs.num_cpus = 2; 121 gKernelArgs.cpu_apic_id[0] = 0; 122 gKernelArgs.cpu_apic_id[1] = 1; 123 apic_phys = (unsigned int *)0xfee00000; 124 ioapic_phys = (unsigned int *)0xfec00000; 125 dprintf("smp: WARNING: standard configuration code is untested"); 126 return B_OK; 127 #endif 128 } 129 130 // We are not running in standard configuration, so we have to look through 131 // all of the mp configuration table crap to figure out how many processors 132 // we have, where our apics are, etc. 133 134 mp_config_table *config = floatingStruct->config_table; 135 gKernelArgs.num_cpus = 0; 136 137 // print our new found configuration. 138 TRACE(("smp: oem id: %.8s product id: %.12s\n", config->oem, 139 config->product)); 140 TRACE(("smp: base table has %d entries, extended section %d bytes\n", 141 config->num_base_entries, config->ext_length)); 142 143 gKernelArgs.arch_args.apic_phys = (uint32)config->apic; 144 145 char *pointer = (char *)((uint32)config + sizeof(struct mp_config_table)); 146 for (int32 i = 0; i < config->num_base_entries; i++) { 147 switch (*pointer) { 148 case MP_BASE_PROCESSOR: 149 { 150 struct mp_base_processor *processor 151 = (struct mp_base_processor *)pointer; 152 pointer += sizeof(struct mp_base_processor); 153 154 if (gKernelArgs.num_cpus == MAX_BOOT_CPUS) { 155 TRACE(("smp: already reached maximum boot CPUs (%d)\n", 156 MAX_BOOT_CPUS)); 157 continue; 158 } 159 160 // skip if the processor is not enabled. 161 if (!(processor->cpu_flags & 0x1)) { 162 TRACE(("smp: skip apic id %d: disabled\n", 163 processor->apic_id)); 164 continue; 165 } 166 167 gKernelArgs.arch_args.cpu_apic_id[gKernelArgs.num_cpus] 168 = processor->apic_id; 169 gKernelArgs.arch_args.cpu_os_id[processor->apic_id] 170 = gKernelArgs.num_cpus; 171 gKernelArgs.arch_args.cpu_apic_version[gKernelArgs.num_cpus] 172 = processor->apic_version; 173 174 #ifdef TRACE_SMP 175 const char *cpuFamily[] = { "", "", "", "", "Intel 486", 176 "Intel Pentium", "Intel Pentium Pro", "Intel Pentium II" }; 177 #endif 178 TRACE(("smp: cpu#%ld: %s, apic id %d, version %d%s\n", 179 gKernelArgs.num_cpus, 180 cpuFamily[(processor->signature & 0xf00) >> 8], 181 processor->apic_id, processor->apic_version, 182 (processor->cpu_flags & 0x2) ? ", BSP" : "")); 183 184 gKernelArgs.num_cpus++; 185 break; 186 } 187 case MP_BASE_BUS: 188 { 189 struct mp_base_bus *bus = (struct mp_base_bus *)pointer; 190 pointer += sizeof(struct mp_base_bus); 191 192 TRACE(("smp: bus %d: %c%c%c%c%c%c\n", bus->bus_id, 193 bus->name[0], bus->name[1], bus->name[2], bus->name[3], 194 bus->name[4], bus->name[5])); 195 196 break; 197 } 198 case MP_BASE_IO_APIC: 199 { 200 struct mp_base_ioapic *io = (struct mp_base_ioapic *)pointer; 201 pointer += sizeof(struct mp_base_ioapic); 202 203 gKernelArgs.arch_args.ioapic_phys = (uint32)io->addr; 204 205 TRACE(("smp: found io apic with apic id %d, version %d\n", 206 io->ioapic_id, io->ioapic_version)); 207 208 break; 209 } 210 case MP_BASE_IO_INTR: 211 case MP_BASE_LOCAL_INTR: 212 { 213 struct mp_base_interrupt *interrupt 214 = (struct mp_base_interrupt *)pointer; 215 pointer += sizeof(struct mp_base_interrupt); 216 217 dprintf("smp: %s int: type %d, source bus %d, irq %3d, dest " 218 "apic %d, int %3d, polarity %d, trigger mode %d\n", 219 interrupt->type == MP_BASE_IO_INTR ? "I/O" : "local", 220 interrupt->interrupt_type, interrupt->source_bus_id, 221 interrupt->source_bus_irq, interrupt->dest_apic_id, 222 interrupt->dest_apic_int, interrupt->polarity, 223 interrupt->trigger_mode); 224 break; 225 } 226 } 227 } 228 229 dprintf("smp: apic @ %p, i/o apic @ %p, total %ld processors detected\n", 230 (void *)gKernelArgs.arch_args.apic_phys, 231 (void *)gKernelArgs.arch_args.ioapic_phys, 232 gKernelArgs.num_cpus); 233 234 return gKernelArgs.num_cpus > 0 ? B_OK : B_ERROR; 235 } 236 237 238 static status_t 239 smp_do_acpi_config(void) 240 { 241 TRACE(("smp: using ACPI to detect MP configuration\n")); 242 243 // reset CPU count 244 gKernelArgs.num_cpus = 0; 245 246 acpi_madt *madt = (acpi_madt *)acpi_find_table(ACPI_MADT_SIGNATURE); 247 248 if (madt == NULL) { 249 TRACE(("smp: Failed to find MADT!\n")); 250 return B_ERROR; 251 } 252 253 gKernelArgs.arch_args.apic_phys = madt->local_apic_address; 254 TRACE(("smp: local apic address is 0x%lx\n", madt->local_apic_address)); 255 256 acpi_apic *apic = (acpi_apic *)((uint8 *)madt + sizeof(acpi_madt)); 257 acpi_apic *end = (acpi_apic *)((uint8 *)madt + madt->header.length); 258 while (apic < end) { 259 switch (apic->type) { 260 case ACPI_MADT_LOCAL_APIC: 261 { 262 if (gKernelArgs.num_cpus == MAX_BOOT_CPUS) { 263 TRACE(("smp: already reached maximum boot CPUs (%d)\n", 264 MAX_BOOT_CPUS)); 265 break; 266 } 267 268 acpi_local_apic *localApic = (acpi_local_apic *)apic; 269 TRACE(("smp: found local APIC with id %u\n", 270 localApic->apic_id)); 271 if ((localApic->flags & ACPI_LOCAL_APIC_ENABLED) == 0) { 272 TRACE(("smp: APIC is disabled and will not be used\n")); 273 break; 274 } 275 276 gKernelArgs.arch_args.cpu_apic_id[gKernelArgs.num_cpus] 277 = localApic->apic_id; 278 gKernelArgs.arch_args.cpu_os_id[localApic->apic_id] 279 = gKernelArgs.num_cpus; 280 // TODO: how to find out? putting 0x10 in to indicate a local apic 281 gKernelArgs.arch_args.cpu_apic_version[gKernelArgs.num_cpus] 282 = 0x10; 283 gKernelArgs.num_cpus++; 284 break; 285 } 286 287 case ACPI_MADT_IO_APIC: { 288 acpi_io_apic *ioApic = (acpi_io_apic *)apic; 289 TRACE(("smp: found io APIC with id %u and address 0x%lx\n", 290 ioApic->io_apic_id, ioApic->io_apic_address)); 291 gKernelArgs.arch_args.ioapic_phys = ioApic->io_apic_address; 292 break; 293 } 294 } 295 296 apic = (acpi_apic *)((uint8 *)apic + apic->length); 297 } 298 299 return gKernelArgs.num_cpus > 0 ? B_OK : B_ERROR; 300 } 301 302 303 /*! Target function of the trampoline code. 304 The trampoline code should have the pgdir and a gdt set up for us, 305 along with us being on the final stack for this processor. We need 306 to set up the local APIC and load the global idt and gdt. When we're 307 done, we'll jump into the kernel with the cpu number as an argument. 308 */ 309 static int 310 smp_cpu_ready(void) 311 { 312 uint32 curr_cpu = smp_get_current_cpu(); 313 struct gdt_idt_descr idt_descr; 314 struct gdt_idt_descr gdt_descr; 315 316 //TRACE(("smp_cpu_ready: entry cpu %ld\n", curr_cpu)); 317 318 // Important. Make sure supervisor threads can fault on read only pages... 319 asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1)); 320 asm("cld"); 321 asm("fninit"); 322 323 // Set up the final idt 324 idt_descr.a = IDT_LIMIT - 1; 325 idt_descr.b = (uint32 *)gKernelArgs.arch_args.vir_idt; 326 327 asm("lidt %0;" 328 : : "m" (idt_descr)); 329 330 // Set up the final gdt 331 gdt_descr.a = GDT_LIMIT - 1; 332 gdt_descr.b = (uint32 *)gKernelArgs.arch_args.vir_gdt; 333 334 asm("lgdt %0;" 335 : : "m" (gdt_descr)); 336 337 asm("pushl %0; " // push the cpu number 338 "pushl %1; " // kernel args 339 "pushl $0x0;" // dummy retval for call to main 340 "pushl %2; " // this is the start address 341 "ret; " // jump. 342 : : "g" (curr_cpu), "g" (&gKernelArgs), 343 "g" (gKernelArgs.kernel_image.elf_header.e_entry)); 344 345 // no where to return to 346 return 0; 347 } 348 349 350 static void 351 calculate_apic_timer_conversion_factor(void) 352 { 353 int64 t1, t2; 354 uint32 config; 355 uint32 count; 356 357 // setup the timer 358 config = apic_read(APIC_LVT_TIMER); 359 config = (config & APIC_LVT_TIMER_MASK) + APIC_LVT_MASKED; 360 // timer masked, vector 0 361 apic_write(APIC_LVT_TIMER, config); 362 363 config = (apic_read(APIC_TIMER_DIVIDE_CONFIG) & ~0x0000000f); 364 apic_write(APIC_TIMER_DIVIDE_CONFIG, config | APIC_TIMER_DIVIDE_CONFIG_1); 365 // divide clock by one 366 367 t1 = system_time(); 368 apic_write(APIC_INITIAL_TIMER_COUNT, 0xffffffff); // start the counter 369 370 execute_n_instructions(128 * 20000); 371 372 count = apic_read(APIC_CURRENT_TIMER_COUNT); 373 t2 = system_time(); 374 375 count = 0xffffffff - count; 376 377 gKernelArgs.arch_args.apic_time_cv_factor 378 = (uint32)((1000000.0/(t2 - t1)) * count); 379 380 TRACE(("APIC ticks/sec = %ld\n", 381 gKernelArgs.arch_args.apic_time_cv_factor)); 382 } 383 384 385 // #pragma mark - 386 387 388 void 389 smp_init_other_cpus(void) 390 { 391 if (get_safemode_boolean(B_SAFEMODE_DISABLE_SMP, false)) { 392 // SMP has been disabled! 393 TRACE(("smp disabled per safemode setting\n")); 394 gKernelArgs.num_cpus = 1; 395 } 396 397 if (gKernelArgs.arch_args.apic_phys == 0) 398 return; 399 400 TRACE(("smp: found %ld cpu%s\n", gKernelArgs.num_cpus, 401 gKernelArgs.num_cpus != 1 ? "s" : "")); 402 TRACE(("smp: apic_phys = %p\n", (void *)gKernelArgs.arch_args.apic_phys)); 403 TRACE(("smp: ioapic_phys = %p\n", 404 (void *)gKernelArgs.arch_args.ioapic_phys)); 405 406 // map in the apic & ioapic (if available) 407 gKernelArgs.arch_args.apic = (uint32 *)mmu_map_physical_memory( 408 gKernelArgs.arch_args.apic_phys, B_PAGE_SIZE, kDefaultPageFlags); 409 if (gKernelArgs.arch_args.ioapic_phys != 0) { 410 gKernelArgs.arch_args.ioapic = (uint32 *)mmu_map_physical_memory( 411 gKernelArgs.arch_args.ioapic_phys, B_PAGE_SIZE, kDefaultPageFlags); 412 } 413 414 TRACE(("smp: apic = %p\n", gKernelArgs.arch_args.apic)); 415 TRACE(("smp: ioapic = %p\n", gKernelArgs.arch_args.ioapic)); 416 417 // calculate how fast the apic timer is 418 calculate_apic_timer_conversion_factor(); 419 420 if (gKernelArgs.num_cpus < 2) 421 return; 422 423 for (uint32 i = 1; i < gKernelArgs.num_cpus; i++) { 424 // create a final stack the trampoline code will put the ap processor on 425 gKernelArgs.cpu_kstack[i].start = (addr_t)mmu_allocate(NULL, 426 KERNEL_STACK_SIZE + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE); 427 gKernelArgs.cpu_kstack[i].size = KERNEL_STACK_SIZE 428 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE; 429 } 430 } 431 432 433 void 434 smp_boot_other_cpus(void) 435 { 436 if (gKernelArgs.num_cpus < 2) 437 return; 438 439 TRACE(("trampolining other cpus\n")); 440 441 // The first 8 MB are identity mapped, either 0x9e000-0x9ffff is reserved 442 // for this, or when PXE services are used 0x8b000-0x8cfff. 443 444 // allocate a stack and a code area for the smp trampoline 445 // (these have to be < 1M physical, 0xa0000-0xfffff is reserved by the BIOS, 446 // and when PXE services are used, the 0x8d000-0x9ffff is also reserved) 447 #ifdef _PXE_ENV 448 uint32 trampolineCode = 0x8b000; 449 uint32 trampolineStack = 0x8c000; 450 #else 451 uint32 trampolineCode = 0x9f000; 452 uint32 trampolineStack = 0x9e000; 453 #endif 454 455 // copy the trampoline code over 456 memcpy((char *)trampolineCode, (const void*)&smp_trampoline, 457 (uint32)&smp_trampoline_end - (uint32)&smp_trampoline); 458 459 // boot the cpus 460 for (uint32 i = 1; i < gKernelArgs.num_cpus; i++) { 461 uint32 *finalStack; 462 uint32 *tempStack; 463 uint32 config; 464 uint32 numStartups; 465 uint32 j; 466 467 // set this stack up 468 finalStack = (uint32 *)gKernelArgs.cpu_kstack[i].start; 469 memset((uint8*)finalStack + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE, 0, 470 KERNEL_STACK_SIZE); 471 tempStack = (finalStack 472 + (KERNEL_STACK_SIZE + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 473 / sizeof(uint32)) - 1; 474 *tempStack = (uint32)&smp_cpu_ready; 475 476 // set the trampoline stack up 477 tempStack = (uint32 *)(trampolineStack + B_PAGE_SIZE - 4); 478 // final location of the stack 479 *tempStack = ((uint32)finalStack) + KERNEL_STACK_SIZE 480 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE - sizeof(uint32); 481 tempStack--; 482 // page dir 483 *tempStack = gKernelArgs.arch_args.phys_pgdir; 484 485 // put a gdt descriptor at the bottom of the stack 486 *((uint16 *)trampolineStack) = 0x18 - 1; // LIMIT 487 *((uint32 *)(trampolineStack + 2)) = trampolineStack + 8; 488 489 // put the gdt at the bottom 490 memcpy(&((uint32 *)trampolineStack)[2], 491 (void *)gKernelArgs.arch_args.vir_gdt, 6 * 4); 492 493 /* clear apic errors */ 494 if (gKernelArgs.arch_args.cpu_apic_version[i] & 0xf0) { 495 apic_write(APIC_ERROR_STATUS, 0); 496 apic_read(APIC_ERROR_STATUS); 497 } 498 499 //dprintf("assert INIT\n"); 500 /* send (aka assert) INIT IPI */ 501 config = (apic_read(APIC_INTR_COMMAND_2) & APIC_INTR_COMMAND_2_MASK) 502 | (gKernelArgs.arch_args.cpu_apic_id[i] << 24); 503 apic_write(APIC_INTR_COMMAND_2, config); /* set target pe */ 504 config = (apic_read(APIC_INTR_COMMAND_1) & 0xfff00000) 505 | APIC_TRIGGER_MODE_LEVEL | APIC_INTR_COMMAND_1_ASSERT 506 | APIC_DELIVERY_MODE_INIT; 507 apic_write(APIC_INTR_COMMAND_1, config); 508 509 dprintf("wait for delivery\n"); 510 // wait for pending to end 511 while ((apic_read(APIC_INTR_COMMAND_1) & APIC_DELIVERY_STATUS) != 0) 512 asm volatile ("pause;"); 513 514 dprintf("deassert INIT\n"); 515 /* deassert INIT */ 516 config = (apic_read(APIC_INTR_COMMAND_2) & APIC_INTR_COMMAND_2_MASK) 517 | (gKernelArgs.arch_args.cpu_apic_id[i] << 24); 518 apic_write(APIC_INTR_COMMAND_2, config); 519 config = (apic_read(APIC_INTR_COMMAND_1) & 0xfff00000) 520 | APIC_TRIGGER_MODE_LEVEL | APIC_DELIVERY_MODE_INIT; 521 apic_write(APIC_INTR_COMMAND_1, config); 522 523 dprintf("wait for delivery\n"); 524 // wait for pending to end 525 while ((apic_read(APIC_INTR_COMMAND_1) & APIC_DELIVERY_STATUS) != 0) 526 asm volatile ("pause;"); 527 528 /* wait 10ms */ 529 spin(10000); 530 531 /* is this a local apic or an 82489dx ? */ 532 numStartups = (gKernelArgs.arch_args.cpu_apic_version[i] & 0xf0) 533 ? 2 : 0; 534 dprintf("num startups = %ld\n", numStartups); 535 for (j = 0; j < numStartups; j++) { 536 /* it's a local apic, so send STARTUP IPIs */ 537 dprintf("send STARTUP\n"); 538 apic_write(APIC_ERROR_STATUS, 0); 539 540 /* set target pe */ 541 config = (apic_read(APIC_INTR_COMMAND_2) & APIC_INTR_COMMAND_2_MASK) 542 | (gKernelArgs.arch_args.cpu_apic_id[i] << 24); 543 apic_write(APIC_INTR_COMMAND_2, config); 544 545 /* send the IPI */ 546 config = (apic_read(APIC_INTR_COMMAND_1) & 0xfff0f800) 547 | APIC_DELIVERY_MODE_STARTUP | (trampolineCode >> 12); 548 apic_write(APIC_INTR_COMMAND_1, config); 549 550 /* wait */ 551 spin(200); 552 553 dprintf("wait for delivery\n"); 554 while ((apic_read(APIC_INTR_COMMAND_1) & APIC_DELIVERY_STATUS) != 0) 555 asm volatile ("pause;"); 556 } 557 558 // Wait for the trampoline code to clear the final stack location. 559 // This serves as a notification for us that it has loaded the address 560 // and it is safe for us to overwrite it to trampoline the next CPU. 561 tempStack++; 562 while (*tempStack != 0) 563 spin(1000); 564 } 565 566 TRACE(("done trampolining\n")); 567 } 568 569 570 void 571 smp_add_safemode_menus(Menu *menu) 572 { 573 if (gKernelArgs.num_cpus < 2) 574 return; 575 576 MenuItem *item = new(nothrow) MenuItem("Disable SMP"); 577 menu->AddItem(item); 578 item->SetData(B_SAFEMODE_DISABLE_SMP); 579 item->SetType(MENU_ITEM_MARKABLE); 580 item->SetHelpText("Disables all but one CPU core."); 581 } 582 583 584 void 585 smp_init(void) 586 { 587 #if NO_SMP 588 gKernelArgs.num_cpus = 1; 589 return; 590 #endif 591 592 // first try to find ACPI tables to get MP configuration as it handles 593 // physical as well as logical MP configurations as in multiple cpus, 594 // multiple cores or hyper threading. 595 if (smp_do_acpi_config() == B_OK) 596 return; 597 598 // then try to find MPS tables and do configuration based on them 599 for (int32 i = 0; smp_scan_spots[i].length > 0; i++) { 600 mp_floating_struct *floatingStruct = smp_mp_probe( 601 smp_scan_spots[i].start, smp_scan_spots[i].stop); 602 if (floatingStruct != NULL && smp_do_mp_config(floatingStruct) == B_OK) 603 return; 604 } 605 606 // everything failed or we are not running an SMP system 607 gKernelArgs.num_cpus = 1; 608 } 609