1 /* 2 * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Copyright 2013, Paweł Dziepak, pdziepak@quarnos.org. 5 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. 6 * Distributed under the terms of the MIT License. 7 * 8 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 9 * Distributed under the terms of the NewOS License. 10 */ 11 12 13 #include <cpu.h> 14 15 #include <string.h> 16 #include <stdlib.h> 17 #include <stdio.h> 18 19 #include <algorithm> 20 21 #include <ACPI.h> 22 23 #include <boot_device.h> 24 #include <commpage.h> 25 #include <debug.h> 26 #include <elf.h> 27 #include <safemode.h> 28 #include <smp.h> 29 #include <util/BitUtils.h> 30 #include <vm/vm.h> 31 #include <vm/vm_types.h> 32 #include <vm/VMAddressSpace.h> 33 34 #include <arch_system_info.h> 35 #include <arch/x86/apic.h> 36 #include <boot/kernel_args.h> 37 38 #include "paging/X86PagingStructures.h" 39 #include "paging/X86VMTranslationMap.h" 40 41 42 #define DUMP_FEATURE_STRING 1 43 #define DUMP_CPU_TOPOLOGY 1 44 #define DUMP_CPU_PATCHLEVEL_TYPE 1 45 46 47 /* cpu vendor info */ 48 struct cpu_vendor_info { 49 const char *vendor; 50 const char *ident_string[2]; 51 }; 52 53 static const struct cpu_vendor_info vendor_info[VENDOR_NUM] = { 54 { "Intel", { "GenuineIntel" } }, 55 { "AMD", { "AuthenticAMD" } }, 56 { "Cyrix", { "CyrixInstead" } }, 57 { "UMC", { "UMC UMC UMC" } }, 58 { "NexGen", { "NexGenDriven" } }, 59 { "Centaur", { "CentaurHauls" } }, 60 { "Rise", { "RiseRiseRise" } }, 61 { "Transmeta", { "GenuineTMx86", "TransmetaCPU" } }, 62 { "NSC", { "Geode by NSC" } }, 63 { "Hygon", { "HygonGenuine" } }, 64 }; 65 66 #define K8_SMIONCMPHALT (1ULL << 27) 67 #define K8_C1EONCMPHALT (1ULL << 28) 68 69 #define K8_CMPHALT (K8_SMIONCMPHALT | K8_C1EONCMPHALT) 70 71 struct set_mtrr_parameter { 72 int32 index; 73 uint64 base; 74 uint64 length; 75 uint8 type; 76 }; 77 78 struct set_mtrrs_parameter { 79 const x86_mtrr_info* infos; 80 uint32 count; 81 uint8 defaultType; 82 }; 83 84 85 #ifdef __x86_64__ 86 extern addr_t _stac; 87 extern addr_t _clac; 88 extern addr_t _xsave; 89 extern addr_t _xsavec; 90 extern addr_t _xrstor; 91 uint64 gXsaveMask; 92 uint64 gFPUSaveLength = 512; 93 bool gHasXsave = false; 94 bool gHasXsavec = false; 95 #endif 96 97 extern "C" void x86_reboot(void); 98 // from arch.S 99 100 void (*gCpuIdleFunc)(void); 101 #ifndef __x86_64__ 102 void (*gX86SwapFPUFunc)(void* oldState, const void* newState) = x86_noop_swap; 103 bool gHasSSE = false; 104 #endif 105 106 static uint32 sCpuRendezvous; 107 static uint32 sCpuRendezvous2; 108 static uint32 sCpuRendezvous3; 109 static vint32 sTSCSyncRendezvous; 110 111 /* Some specials for the double fault handler */ 112 static addr_t sDoubleFaultStacks = 0; 113 static const size_t kDoubleFaultStackSize = 4096; // size per CPU 114 115 static x86_cpu_module_info* sCpuModule; 116 117 118 /* CPU topology information */ 119 static uint32 (*sGetCPUTopologyID)(int currentCPU); 120 static uint32 sHierarchyMask[CPU_TOPOLOGY_LEVELS]; 121 static uint32 sHierarchyShift[CPU_TOPOLOGY_LEVELS]; 122 123 /* Cache topology information */ 124 static uint32 sCacheSharingMask[CPU_MAX_CACHE_LEVEL]; 125 126 static void* sUcodeData = NULL; 127 static size_t sUcodeDataSize = 0; 128 static void* sLoadedUcodeUpdate; 129 static spinlock sUcodeUpdateLock = B_SPINLOCK_INITIALIZER; 130 131 static bool sUsePAT = false; 132 133 134 static status_t 135 acpi_shutdown(bool rebootSystem) 136 { 137 if (debug_debugger_running() || !are_interrupts_enabled()) 138 return B_ERROR; 139 140 acpi_module_info* acpi; 141 if (get_module(B_ACPI_MODULE_NAME, (module_info**)&acpi) != B_OK) 142 return B_NOT_SUPPORTED; 143 144 status_t status; 145 if (rebootSystem) { 146 status = acpi->reboot(); 147 } else { 148 status = acpi->prepare_sleep_state(ACPI_POWER_STATE_OFF, NULL, 0); 149 if (status == B_OK) { 150 //cpu_status state = disable_interrupts(); 151 status = acpi->enter_sleep_state(ACPI_POWER_STATE_OFF); 152 //restore_interrupts(state); 153 } 154 } 155 156 put_module(B_ACPI_MODULE_NAME); 157 return status; 158 } 159 160 161 /*! Disable CPU caches, and invalidate them. */ 162 static void 163 disable_caches() 164 { 165 x86_write_cr0((x86_read_cr0() | CR0_CACHE_DISABLE) 166 & ~CR0_NOT_WRITE_THROUGH); 167 wbinvd(); 168 arch_cpu_global_TLB_invalidate(); 169 } 170 171 172 /*! Invalidate CPU caches, and enable them. */ 173 static void 174 enable_caches() 175 { 176 wbinvd(); 177 arch_cpu_global_TLB_invalidate(); 178 x86_write_cr0(x86_read_cr0() 179 & ~(CR0_CACHE_DISABLE | CR0_NOT_WRITE_THROUGH)); 180 } 181 182 183 static void 184 set_mtrr(void* _parameter, int cpu) 185 { 186 struct set_mtrr_parameter* parameter 187 = (struct set_mtrr_parameter*)_parameter; 188 189 // wait until all CPUs have arrived here 190 smp_cpu_rendezvous(&sCpuRendezvous); 191 192 // One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU 193 // that initiated the call_all_cpus() from doing that again and clearing 194 // sCpuRendezvous2 before the last CPU has actually left the loop in 195 // smp_cpu_rendezvous(); 196 if (cpu == 0) 197 atomic_set((int32*)&sCpuRendezvous3, 0); 198 199 disable_caches(); 200 201 sCpuModule->set_mtrr(parameter->index, parameter->base, parameter->length, 202 parameter->type); 203 204 enable_caches(); 205 206 // wait until all CPUs have arrived here 207 smp_cpu_rendezvous(&sCpuRendezvous2); 208 smp_cpu_rendezvous(&sCpuRendezvous3); 209 } 210 211 212 static void 213 set_mtrrs(void* _parameter, int cpu) 214 { 215 set_mtrrs_parameter* parameter = (set_mtrrs_parameter*)_parameter; 216 217 // wait until all CPUs have arrived here 218 smp_cpu_rendezvous(&sCpuRendezvous); 219 220 // One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU 221 // that initiated the call_all_cpus() from doing that again and clearing 222 // sCpuRendezvous2 before the last CPU has actually left the loop in 223 // smp_cpu_rendezvous(); 224 if (cpu == 0) 225 atomic_set((int32*)&sCpuRendezvous3, 0); 226 227 disable_caches(); 228 229 sCpuModule->set_mtrrs(parameter->defaultType, parameter->infos, 230 parameter->count); 231 232 enable_caches(); 233 234 // wait until all CPUs have arrived here 235 smp_cpu_rendezvous(&sCpuRendezvous2); 236 smp_cpu_rendezvous(&sCpuRendezvous3); 237 } 238 239 240 static void 241 init_mtrrs(void* _unused, int cpu) 242 { 243 // wait until all CPUs have arrived here 244 smp_cpu_rendezvous(&sCpuRendezvous); 245 246 // One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU 247 // that initiated the call_all_cpus() from doing that again and clearing 248 // sCpuRendezvous2 before the last CPU has actually left the loop in 249 // smp_cpu_rendezvous(); 250 if (cpu == 0) 251 atomic_set((int32*)&sCpuRendezvous3, 0); 252 253 disable_caches(); 254 255 sCpuModule->init_mtrrs(); 256 257 enable_caches(); 258 259 // wait until all CPUs have arrived here 260 smp_cpu_rendezvous(&sCpuRendezvous2); 261 smp_cpu_rendezvous(&sCpuRendezvous3); 262 } 263 264 265 uint32 266 x86_count_mtrrs(void) 267 { 268 if (sUsePAT) { 269 // When PAT is supported, we completely ignore MTRRs and leave them as 270 // initialized by firmware. This follows the suggestion in Intel SDM 271 // that these don't usually need to be touched by anything after system 272 // init. Using page attributes is the more flexible and modern approach 273 // to memory type handling and they can override MTRRs in the critical 274 // case of write-combining, usually used for framebuffers. 275 dprintf("ignoring MTRRs due to PAT support\n"); 276 return 0; 277 } 278 279 if (sCpuModule == NULL) 280 return 0; 281 282 return sCpuModule->count_mtrrs(); 283 } 284 285 286 void 287 x86_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type) 288 { 289 struct set_mtrr_parameter parameter; 290 parameter.index = index; 291 parameter.base = base; 292 parameter.length = length; 293 parameter.type = type; 294 295 sCpuRendezvous = sCpuRendezvous2 = 0; 296 call_all_cpus(&set_mtrr, ¶meter); 297 } 298 299 300 status_t 301 x86_get_mtrr(uint32 index, uint64* _base, uint64* _length, uint8* _type) 302 { 303 // the MTRRs are identical on all CPUs, so it doesn't matter 304 // on which CPU this runs 305 return sCpuModule->get_mtrr(index, _base, _length, _type); 306 } 307 308 309 void 310 x86_set_mtrrs(uint8 defaultType, const x86_mtrr_info* infos, uint32 count) 311 { 312 if (sCpuModule == NULL) 313 return; 314 315 struct set_mtrrs_parameter parameter; 316 parameter.defaultType = defaultType; 317 parameter.infos = infos; 318 parameter.count = count; 319 320 sCpuRendezvous = sCpuRendezvous2 = 0; 321 call_all_cpus(&set_mtrrs, ¶meter); 322 } 323 324 325 static void 326 init_pat(int cpu) 327 { 328 disable_caches(); 329 330 uint64 value = x86_read_msr(IA32_MSR_PAT); 331 dprintf("PAT MSR on CPU %d before init: %#" B_PRIx64 "\n", cpu, value); 332 333 // Use PAT entry 4 for write-combining, leave the rest as is 334 value &= ~(IA32_MSR_PAT_ENTRY_MASK << IA32_MSR_PAT_ENTRY_SHIFT(4)); 335 value |= IA32_MSR_PAT_TYPE_WRITE_COMBINING << IA32_MSR_PAT_ENTRY_SHIFT(4); 336 337 dprintf("PAT MSR on CPU %d after init: %#" B_PRIx64 "\n", cpu, value); 338 x86_write_msr(IA32_MSR_PAT, value); 339 340 enable_caches(); 341 } 342 343 344 void 345 x86_init_fpu(void) 346 { 347 // All x86_64 CPUs support SSE, don't need to bother checking for it. 348 #ifndef __x86_64__ 349 if (!x86_check_feature(IA32_FEATURE_FPU, FEATURE_COMMON)) { 350 // No FPU... time to install one in your 386? 351 dprintf("%s: Warning: CPU has no reported FPU.\n", __func__); 352 gX86SwapFPUFunc = x86_noop_swap; 353 return; 354 } 355 356 if (!x86_check_feature(IA32_FEATURE_SSE, FEATURE_COMMON) 357 || !x86_check_feature(IA32_FEATURE_FXSR, FEATURE_COMMON)) { 358 dprintf("%s: CPU has no SSE... just enabling FPU.\n", __func__); 359 // we don't have proper SSE support, just enable FPU 360 x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU)); 361 gX86SwapFPUFunc = x86_fnsave_swap; 362 return; 363 } 364 #endif 365 366 dprintf("%s: CPU has SSE... enabling FXSR and XMM.\n", __func__); 367 #ifndef __x86_64__ 368 // enable OS support for SSE 369 x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION); 370 x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU)); 371 372 gX86SwapFPUFunc = x86_fxsave_swap; 373 gHasSSE = true; 374 #endif 375 } 376 377 378 #if DUMP_FEATURE_STRING 379 static void 380 dump_feature_string(int currentCPU, cpu_ent* cpu) 381 { 382 char features[768]; 383 features[0] = 0; 384 385 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FPU) 386 strlcat(features, "fpu ", sizeof(features)); 387 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_VME) 388 strlcat(features, "vme ", sizeof(features)); 389 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DE) 390 strlcat(features, "de ", sizeof(features)); 391 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE) 392 strlcat(features, "pse ", sizeof(features)); 393 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TSC) 394 strlcat(features, "tsc ", sizeof(features)); 395 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MSR) 396 strlcat(features, "msr ", sizeof(features)); 397 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAE) 398 strlcat(features, "pae ", sizeof(features)); 399 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCE) 400 strlcat(features, "mce ", sizeof(features)); 401 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CX8) 402 strlcat(features, "cx8 ", sizeof(features)); 403 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_APIC) 404 strlcat(features, "apic ", sizeof(features)); 405 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SEP) 406 strlcat(features, "sep ", sizeof(features)); 407 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MTRR) 408 strlcat(features, "mtrr ", sizeof(features)); 409 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PGE) 410 strlcat(features, "pge ", sizeof(features)); 411 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCA) 412 strlcat(features, "mca ", sizeof(features)); 413 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CMOV) 414 strlcat(features, "cmov ", sizeof(features)); 415 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAT) 416 strlcat(features, "pat ", sizeof(features)); 417 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE36) 418 strlcat(features, "pse36 ", sizeof(features)); 419 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSN) 420 strlcat(features, "psn ", sizeof(features)); 421 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CLFSH) 422 strlcat(features, "clfsh ", sizeof(features)); 423 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DS) 424 strlcat(features, "ds ", sizeof(features)); 425 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_ACPI) 426 strlcat(features, "acpi ", sizeof(features)); 427 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MMX) 428 strlcat(features, "mmx ", sizeof(features)); 429 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FXSR) 430 strlcat(features, "fxsr ", sizeof(features)); 431 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE) 432 strlcat(features, "sse ", sizeof(features)); 433 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE2) 434 strlcat(features, "sse2 ", sizeof(features)); 435 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SS) 436 strlcat(features, "ss ", sizeof(features)); 437 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT) 438 strlcat(features, "htt ", sizeof(features)); 439 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TM) 440 strlcat(features, "tm ", sizeof(features)); 441 if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PBE) 442 strlcat(features, "pbe ", sizeof(features)); 443 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE3) 444 strlcat(features, "sse3 ", sizeof(features)); 445 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCLMULQDQ) 446 strlcat(features, "pclmulqdq ", sizeof(features)); 447 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DTES64) 448 strlcat(features, "dtes64 ", sizeof(features)); 449 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MONITOR) 450 strlcat(features, "monitor ", sizeof(features)); 451 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DSCPL) 452 strlcat(features, "dscpl ", sizeof(features)); 453 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_VMX) 454 strlcat(features, "vmx ", sizeof(features)); 455 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SMX) 456 strlcat(features, "smx ", sizeof(features)); 457 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_EST) 458 strlcat(features, "est ", sizeof(features)); 459 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TM2) 460 strlcat(features, "tm2 ", sizeof(features)); 461 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSSE3) 462 strlcat(features, "ssse3 ", sizeof(features)); 463 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CNXTID) 464 strlcat(features, "cnxtid ", sizeof(features)); 465 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_FMA) 466 strlcat(features, "fma ", sizeof(features)); 467 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CX16) 468 strlcat(features, "cx16 ", sizeof(features)); 469 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XTPR) 470 strlcat(features, "xtpr ", sizeof(features)); 471 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PDCM) 472 strlcat(features, "pdcm ", sizeof(features)); 473 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCID) 474 strlcat(features, "pcid ", sizeof(features)); 475 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DCA) 476 strlcat(features, "dca ", sizeof(features)); 477 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_1) 478 strlcat(features, "sse4_1 ", sizeof(features)); 479 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_2) 480 strlcat(features, "sse4_2 ", sizeof(features)); 481 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_X2APIC) 482 strlcat(features, "x2apic ", sizeof(features)); 483 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MOVBE) 484 strlcat(features, "movbe ", sizeof(features)); 485 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_POPCNT) 486 strlcat(features, "popcnt ", sizeof(features)); 487 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TSCDEADLINE) 488 strlcat(features, "tscdeadline ", sizeof(features)); 489 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AES) 490 strlcat(features, "aes ", sizeof(features)); 491 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XSAVE) 492 strlcat(features, "xsave ", sizeof(features)); 493 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_OSXSAVE) 494 strlcat(features, "osxsave ", sizeof(features)); 495 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AVX) 496 strlcat(features, "avx ", sizeof(features)); 497 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_F16C) 498 strlcat(features, "f16c ", sizeof(features)); 499 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_RDRND) 500 strlcat(features, "rdrnd ", sizeof(features)); 501 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) 502 strlcat(features, "hypervisor ", sizeof(features)); 503 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_SYSCALL) 504 strlcat(features, "syscall ", sizeof(features)); 505 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_NX) 506 strlcat(features, "nx ", sizeof(features)); 507 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_MMXEXT) 508 strlcat(features, "mmxext ", sizeof(features)); 509 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_FFXSR) 510 strlcat(features, "ffxsr ", sizeof(features)); 511 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_PDPE1GB) 512 strlcat(features, "pdpe1gb ", sizeof(features)); 513 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_LONG) 514 strlcat(features, "long ", sizeof(features)); 515 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOWEXT) 516 strlcat(features, "3dnowext ", sizeof(features)); 517 if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOW) 518 strlcat(features, "3dnow ", sizeof(features)); 519 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_DTS) 520 strlcat(features, "dts ", sizeof(features)); 521 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ITB) 522 strlcat(features, "itb ", sizeof(features)); 523 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ARAT) 524 strlcat(features, "arat ", sizeof(features)); 525 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PLN) 526 strlcat(features, "pln ", sizeof(features)); 527 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ECMD) 528 strlcat(features, "ecmd ", sizeof(features)); 529 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PTM) 530 strlcat(features, "ptm ", sizeof(features)); 531 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP) 532 strlcat(features, "hwp ", sizeof(features)); 533 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_NOTIFY) 534 strlcat(features, "hwp_notify ", sizeof(features)); 535 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_ACTWIN) 536 strlcat(features, "hwp_actwin ", sizeof(features)); 537 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_EPP) 538 strlcat(features, "hwp_epp ", sizeof(features)); 539 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_PLR) 540 strlcat(features, "hwp_plr ", sizeof(features)); 541 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HDC) 542 strlcat(features, "hdc ", sizeof(features)); 543 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_TBMT3) 544 strlcat(features, "tbmt3 ", sizeof(features)); 545 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_CAP) 546 strlcat(features, "hwp_cap ", sizeof(features)); 547 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_PECI) 548 strlcat(features, "hwp_peci ", sizeof(features)); 549 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_FLEX) 550 strlcat(features, "hwp_flex ", sizeof(features)); 551 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_FAST) 552 strlcat(features, "hwp_fast ", sizeof(features)); 553 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HW_FEEDBACK) 554 strlcat(features, "hw_feedback ", sizeof(features)); 555 if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_IGNIDL) 556 strlcat(features, "hwp_ignidl ", sizeof(features)); 557 if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_APERFMPERF) 558 strlcat(features, "aperfmperf ", sizeof(features)); 559 if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_EPB) 560 strlcat(features, "epb ", sizeof(features)); 561 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_TSC_ADJUST) 562 strlcat(features, "tsc_adjust ", sizeof(features)); 563 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SGX) 564 strlcat(features, "sgx ", sizeof(features)); 565 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_BMI1) 566 strlcat(features, "bmi1 ", sizeof(features)); 567 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_HLE) 568 strlcat(features, "hle ", sizeof(features)); 569 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX2) 570 strlcat(features, "avx2 ", sizeof(features)); 571 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SMEP) 572 strlcat(features, "smep ", sizeof(features)); 573 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_BMI2) 574 strlcat(features, "bmi2 ", sizeof(features)); 575 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_ERMS) 576 strlcat(features, "erms ", sizeof(features)); 577 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_INVPCID) 578 strlcat(features, "invpcid ", sizeof(features)); 579 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RTM) 580 strlcat(features, "rtm ", sizeof(features)); 581 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CQM) 582 strlcat(features, "cqm ", sizeof(features)); 583 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_MPX) 584 strlcat(features, "mpx ", sizeof(features)); 585 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RDT_A) 586 strlcat(features, "rdt_a ", sizeof(features)); 587 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512F) 588 strlcat(features, "avx512f ", sizeof(features)); 589 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512DQ) 590 strlcat(features, "avx512dq ", sizeof(features)); 591 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RDSEED) 592 strlcat(features, "rdseed ", sizeof(features)); 593 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_ADX) 594 strlcat(features, "adx ", sizeof(features)); 595 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SMAP) 596 strlcat(features, "smap ", sizeof(features)); 597 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512IFMA) 598 strlcat(features, "avx512ifma ", sizeof(features)); 599 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_PCOMMIT) 600 strlcat(features, "pcommit ", sizeof(features)); 601 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CLFLUSHOPT) 602 strlcat(features, "cflushopt ", sizeof(features)); 603 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CLWB) 604 strlcat(features, "clwb ", sizeof(features)); 605 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_INTEL_PT) 606 strlcat(features, "intel_pt ", sizeof(features)); 607 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512PF) 608 strlcat(features, "avx512pf ", sizeof(features)); 609 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512ER) 610 strlcat(features, "avx512er ", sizeof(features)); 611 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512CD) 612 strlcat(features, "avx512cd ", sizeof(features)); 613 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SHA_NI) 614 strlcat(features, "sha_ni ", sizeof(features)); 615 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512BW) 616 strlcat(features, "avx512bw ", sizeof(features)); 617 if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512VI) 618 strlcat(features, "avx512vi ", sizeof(features)); 619 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512VMBI) 620 strlcat(features, "avx512vmbi ", sizeof(features)); 621 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_UMIP) 622 strlcat(features, "umip ", sizeof(features)); 623 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_PKU) 624 strlcat(features, "pku ", sizeof(features)); 625 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_OSPKE) 626 strlcat(features, "ospke ", sizeof(features)); 627 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512VMBI2) 628 strlcat(features, "avx512vmbi2 ", sizeof(features)); 629 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_GFNI) 630 strlcat(features, "gfni ", sizeof(features)); 631 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_VAES) 632 strlcat(features, "vaes ", sizeof(features)); 633 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_VPCLMULQDQ) 634 strlcat(features, "vpclmulqdq ", sizeof(features)); 635 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_VNNI) 636 strlcat(features, "avx512vnni ", sizeof(features)); 637 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_BITALG) 638 strlcat(features, "avx512bitalg ", sizeof(features)); 639 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_VPOPCNTDQ) 640 strlcat(features, "avx512vpopcntdq ", sizeof(features)); 641 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_LA57) 642 strlcat(features, "la57 ", sizeof(features)); 643 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_RDPID) 644 strlcat(features, "rdpid ", sizeof(features)); 645 if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_SGX_LC) 646 strlcat(features, "sgx_lc ", sizeof(features)); 647 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_HYBRID_CPU) 648 strlcat(features, "hybrid ", sizeof(features)); 649 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_IBRS) 650 strlcat(features, "ibrs ", sizeof(features)); 651 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_STIBP) 652 strlcat(features, "stibp ", sizeof(features)); 653 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_L1D_FLUSH) 654 strlcat(features, "l1d_flush ", sizeof(features)); 655 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_ARCH_CAPABILITIES) 656 strlcat(features, "msr_arch ", sizeof(features)); 657 if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_SSBD) 658 strlcat(features, "ssbd ", sizeof(features)); 659 if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_AMD_HW_PSTATE) 660 strlcat(features, "hwpstate ", sizeof(features)); 661 if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_INVARIANT_TSC) 662 strlcat(features, "constant_tsc ", sizeof(features)); 663 if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_CPB) 664 strlcat(features, "cpb ", sizeof(features)); 665 if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_PROC_FEEDBACK) 666 strlcat(features, "proc_feedback ", sizeof(features)); 667 if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVEOPT) 668 strlcat(features, "xsaveopt ", sizeof(features)); 669 if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVEC) 670 strlcat(features, "xsavec ", sizeof(features)); 671 if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XGETBV1) 672 strlcat(features, "xgetbv1 ", sizeof(features)); 673 if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVES) 674 strlcat(features, "xsaves ", sizeof(features)); 675 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_CLZERO) 676 strlcat(features, "clzero ", sizeof(features)); 677 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_IBPB) 678 strlcat(features, "ibpb ", sizeof(features)); 679 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_AMD_SSBD) 680 strlcat(features, "amd_ssbd ", sizeof(features)); 681 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_VIRT_SSBD) 682 strlcat(features, "virt_ssbd ", sizeof(features)); 683 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_AMD_SSB_NO) 684 strlcat(features, "amd_ssb_no ", sizeof(features)); 685 if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_CPPC) 686 strlcat(features, "cppc ", sizeof(features)); 687 dprintf("CPU %d: features: %s\n", currentCPU, features); 688 } 689 #endif // DUMP_FEATURE_STRING 690 691 692 static void 693 compute_cpu_hierarchy_masks(int maxLogicalID, int maxCoreID) 694 { 695 ASSERT(maxLogicalID >= maxCoreID); 696 const int kMaxSMTID = maxLogicalID / maxCoreID; 697 698 sHierarchyMask[CPU_TOPOLOGY_SMT] = kMaxSMTID - 1; 699 sHierarchyShift[CPU_TOPOLOGY_SMT] = 0; 700 701 sHierarchyMask[CPU_TOPOLOGY_CORE] = (maxCoreID - 1) * kMaxSMTID; 702 sHierarchyShift[CPU_TOPOLOGY_CORE] 703 = count_set_bits(sHierarchyMask[CPU_TOPOLOGY_SMT]); 704 705 const uint32 kSinglePackageMask = sHierarchyMask[CPU_TOPOLOGY_SMT] 706 | sHierarchyMask[CPU_TOPOLOGY_CORE]; 707 sHierarchyMask[CPU_TOPOLOGY_PACKAGE] = ~kSinglePackageMask; 708 sHierarchyShift[CPU_TOPOLOGY_PACKAGE] = count_set_bits(kSinglePackageMask); 709 } 710 711 712 static uint32 713 get_cpu_legacy_initial_apic_id(int /* currentCPU */) 714 { 715 cpuid_info cpuid; 716 get_current_cpuid(&cpuid, 1, 0); 717 return cpuid.regs.ebx >> 24; 718 } 719 720 721 static inline status_t 722 detect_amd_cpu_topology(uint32 maxBasicLeaf, uint32 maxExtendedLeaf) 723 { 724 sGetCPUTopologyID = get_cpu_legacy_initial_apic_id; 725 726 cpuid_info cpuid; 727 get_current_cpuid(&cpuid, 1, 0); 728 int maxLogicalID = next_power_of_2((cpuid.regs.ebx >> 16) & 0xff); 729 730 int maxCoreID = 1; 731 if (maxExtendedLeaf >= 0x80000008) { 732 get_current_cpuid(&cpuid, 0x80000008, 0); 733 maxCoreID = (cpuid.regs.ecx >> 12) & 0xf; 734 if (maxCoreID != 0) 735 maxCoreID = 1 << maxCoreID; 736 else 737 maxCoreID = next_power_of_2((cpuid.regs.edx & 0xf) + 1); 738 } 739 740 if (maxExtendedLeaf >= 0x80000001) { 741 get_current_cpuid(&cpuid, 0x80000001, 0); 742 if (x86_check_feature(IA32_FEATURE_AMD_EXT_CMPLEGACY, 743 FEATURE_EXT_AMD_ECX)) 744 maxCoreID = maxLogicalID; 745 } 746 747 compute_cpu_hierarchy_masks(maxLogicalID, maxCoreID); 748 749 return B_OK; 750 } 751 752 753 static void 754 detect_amd_cache_topology(uint32 maxExtendedLeaf) 755 { 756 if (!x86_check_feature(IA32_FEATURE_AMD_EXT_TOPOLOGY, FEATURE_EXT_AMD_ECX)) 757 return; 758 759 if (maxExtendedLeaf < 0x8000001d) 760 return; 761 762 uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL]; 763 int maxCacheLevel = 0; 764 765 int currentLevel = 0; 766 int cacheType; 767 do { 768 cpuid_info cpuid; 769 get_current_cpuid(&cpuid, 0x8000001d, currentLevel); 770 771 cacheType = cpuid.regs.eax & 0x1f; 772 if (cacheType == 0) 773 break; 774 775 int cacheLevel = (cpuid.regs.eax >> 5) & 0x7; 776 int coresCount = next_power_of_2(((cpuid.regs.eax >> 14) & 0x3f) + 1); 777 hierarchyLevels[cacheLevel - 1] 778 = coresCount * (sHierarchyMask[CPU_TOPOLOGY_SMT] + 1); 779 maxCacheLevel = std::max(maxCacheLevel, cacheLevel); 780 781 currentLevel++; 782 } while (true); 783 784 for (int i = 0; i < maxCacheLevel; i++) 785 sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1); 786 gCPUCacheLevelCount = maxCacheLevel; 787 } 788 789 790 static uint32 791 get_intel_cpu_initial_x2apic_id(int /* currentCPU */) 792 { 793 cpuid_info cpuid; 794 get_current_cpuid(&cpuid, 11, 0); 795 return cpuid.regs.edx; 796 } 797 798 799 static inline status_t 800 detect_intel_cpu_topology_x2apic(uint32 maxBasicLeaf) 801 { 802 803 uint32 leaf = 0; 804 cpuid_info cpuid; 805 if (maxBasicLeaf >= 0x1f) { 806 get_current_cpuid(&cpuid, 0x1f, 0); 807 if (cpuid.regs.ebx != 0) 808 leaf = 0x1f; 809 } 810 if (maxBasicLeaf >= 0xb && leaf == 0) { 811 get_current_cpuid(&cpuid, 0xb, 0); 812 if (cpuid.regs.ebx != 0) 813 leaf = 0xb; 814 } 815 if (leaf == 0) 816 return B_UNSUPPORTED; 817 818 uint8 hierarchyLevels[CPU_TOPOLOGY_LEVELS] = { 0 }; 819 820 int currentLevel = 0; 821 unsigned int levelsSet = 0; 822 do { 823 cpuid_info cpuid; 824 get_current_cpuid(&cpuid, leaf, currentLevel++); 825 int levelType = (cpuid.regs.ecx >> 8) & 0xff; 826 int levelValue = cpuid.regs.eax & 0x1f; 827 828 if (levelType == 0) 829 break; 830 831 switch (levelType) { 832 case 1: // SMT 833 hierarchyLevels[CPU_TOPOLOGY_SMT] = levelValue; 834 levelsSet |= 1; 835 break; 836 case 2: // core 837 hierarchyLevels[CPU_TOPOLOGY_CORE] = levelValue; 838 levelsSet |= 2; 839 break; 840 } 841 842 } while (levelsSet != 3); 843 844 sGetCPUTopologyID = get_intel_cpu_initial_x2apic_id; 845 846 for (int i = 1; i < CPU_TOPOLOGY_LEVELS; i++) { 847 if ((levelsSet & (1u << i)) != 0) 848 continue; 849 hierarchyLevels[i] = hierarchyLevels[i - 1]; 850 } 851 852 for (int i = 0; i < CPU_TOPOLOGY_LEVELS; i++) { 853 uint32 mask = ~uint32(0); 854 if (i < CPU_TOPOLOGY_LEVELS - 1) 855 mask = (1u << hierarchyLevels[i]) - 1; 856 if (i > 0) 857 mask &= ~sHierarchyMask[i - 1]; 858 sHierarchyMask[i] = mask; 859 sHierarchyShift[i] = i > 0 ? hierarchyLevels[i - 1] : 0; 860 } 861 862 return B_OK; 863 } 864 865 866 static inline status_t 867 detect_intel_cpu_topology_legacy(uint32 maxBasicLeaf) 868 { 869 sGetCPUTopologyID = get_cpu_legacy_initial_apic_id; 870 871 cpuid_info cpuid; 872 873 get_current_cpuid(&cpuid, 1, 0); 874 int maxLogicalID = next_power_of_2((cpuid.regs.ebx >> 16) & 0xff); 875 876 int maxCoreID = 1; 877 if (maxBasicLeaf >= 4) { 878 get_current_cpuid(&cpuid, 4, 0); 879 maxCoreID = next_power_of_2((cpuid.regs.eax >> 26) + 1); 880 } 881 882 compute_cpu_hierarchy_masks(maxLogicalID, maxCoreID); 883 884 return B_OK; 885 } 886 887 888 static void 889 detect_intel_cache_topology(uint32 maxBasicLeaf) 890 { 891 if (maxBasicLeaf < 4) 892 return; 893 894 uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL]; 895 int maxCacheLevel = 0; 896 897 int currentLevel = 0; 898 int cacheType; 899 do { 900 cpuid_info cpuid; 901 get_current_cpuid(&cpuid, 4, currentLevel); 902 903 cacheType = cpuid.regs.eax & 0x1f; 904 if (cacheType == 0) 905 break; 906 907 int cacheLevel = (cpuid.regs.eax >> 5) & 0x7; 908 hierarchyLevels[cacheLevel - 1] 909 = next_power_of_2(((cpuid.regs.eax >> 14) & 0x3f) + 1); 910 maxCacheLevel = std::max(maxCacheLevel, cacheLevel); 911 912 currentLevel++; 913 } while (true); 914 915 for (int i = 0; i < maxCacheLevel; i++) 916 sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1); 917 918 gCPUCacheLevelCount = maxCacheLevel; 919 } 920 921 922 static uint32 923 get_simple_cpu_topology_id(int currentCPU) 924 { 925 return currentCPU; 926 } 927 928 929 static inline int 930 get_topology_level_id(uint32 id, cpu_topology_level level) 931 { 932 ASSERT(level < CPU_TOPOLOGY_LEVELS); 933 return (id & sHierarchyMask[level]) >> sHierarchyShift[level]; 934 } 935 936 937 static void 938 detect_cpu_topology(int currentCPU, cpu_ent* cpu, uint32 maxBasicLeaf, 939 uint32 maxExtendedLeaf) 940 { 941 if (currentCPU == 0) { 942 memset(sCacheSharingMask, 0xff, sizeof(sCacheSharingMask)); 943 944 status_t result = B_UNSUPPORTED; 945 if (x86_check_feature(IA32_FEATURE_HTT, FEATURE_COMMON)) { 946 if (cpu->arch.vendor == VENDOR_AMD 947 || cpu->arch.vendor == VENDOR_HYGON) { 948 result = detect_amd_cpu_topology(maxBasicLeaf, maxExtendedLeaf); 949 950 if (result == B_OK) 951 detect_amd_cache_topology(maxExtendedLeaf); 952 } 953 954 if (cpu->arch.vendor == VENDOR_INTEL) { 955 result = detect_intel_cpu_topology_x2apic(maxBasicLeaf); 956 if (result != B_OK) 957 result = detect_intel_cpu_topology_legacy(maxBasicLeaf); 958 959 if (result == B_OK) 960 detect_intel_cache_topology(maxBasicLeaf); 961 } 962 } 963 964 if (result != B_OK) { 965 dprintf("No CPU topology information available.\n"); 966 967 sGetCPUTopologyID = get_simple_cpu_topology_id; 968 969 sHierarchyMask[CPU_TOPOLOGY_PACKAGE] = ~uint32(0); 970 } 971 } 972 973 ASSERT(sGetCPUTopologyID != NULL); 974 int topologyID = sGetCPUTopologyID(currentCPU); 975 cpu->topology_id[CPU_TOPOLOGY_SMT] 976 = get_topology_level_id(topologyID, CPU_TOPOLOGY_SMT); 977 cpu->topology_id[CPU_TOPOLOGY_CORE] 978 = get_topology_level_id(topologyID, CPU_TOPOLOGY_CORE); 979 cpu->topology_id[CPU_TOPOLOGY_PACKAGE] 980 = get_topology_level_id(topologyID, CPU_TOPOLOGY_PACKAGE); 981 982 unsigned int i; 983 for (i = 0; i < gCPUCacheLevelCount; i++) 984 cpu->cache_id[i] = topologyID & sCacheSharingMask[i]; 985 for (; i < CPU_MAX_CACHE_LEVEL; i++) 986 cpu->cache_id[i] = -1; 987 988 #if DUMP_CPU_TOPOLOGY 989 dprintf("CPU %d: apic id %d, package %d, core %d, smt %d\n", currentCPU, 990 topologyID, cpu->topology_id[CPU_TOPOLOGY_PACKAGE], 991 cpu->topology_id[CPU_TOPOLOGY_CORE], 992 cpu->topology_id[CPU_TOPOLOGY_SMT]); 993 994 if (gCPUCacheLevelCount > 0) { 995 char cacheLevels[256]; 996 unsigned int offset = 0; 997 for (i = 0; i < gCPUCacheLevelCount; i++) { 998 offset += snprintf(cacheLevels + offset, 999 sizeof(cacheLevels) - offset, 1000 " L%d id %d%s", i + 1, cpu->cache_id[i], 1001 i < gCPUCacheLevelCount - 1 ? "," : ""); 1002 1003 if (offset >= sizeof(cacheLevels)) 1004 break; 1005 } 1006 1007 dprintf("CPU %d: cache sharing:%s\n", currentCPU, cacheLevels); 1008 } 1009 #endif 1010 } 1011 1012 1013 static void 1014 detect_intel_patch_level(cpu_ent* cpu) 1015 { 1016 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) { 1017 cpu->arch.patch_level = 0; 1018 return; 1019 } 1020 1021 x86_write_msr(IA32_MSR_UCODE_REV, 0); 1022 cpuid_info cpuid; 1023 get_current_cpuid(&cpuid, 1, 0); 1024 1025 uint64 value = x86_read_msr(IA32_MSR_UCODE_REV); 1026 cpu->arch.patch_level = value >> 32; 1027 } 1028 1029 1030 static void 1031 detect_amd_patch_level(cpu_ent* cpu) 1032 { 1033 if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) { 1034 cpu->arch.patch_level = 0; 1035 return; 1036 } 1037 1038 uint64 value = x86_read_msr(IA32_MSR_UCODE_REV); 1039 cpu->arch.patch_level = (uint32)value; 1040 } 1041 1042 1043 static struct intel_microcode_header* 1044 find_microcode_intel(addr_t data, size_t size, uint32 patchLevel) 1045 { 1046 // 9.11.3 Processor Identification 1047 cpuid_info cpuid; 1048 get_current_cpuid(&cpuid, 1, 0); 1049 uint32 signature = cpuid.regs.eax; 1050 // 9.11.4 Platform Identification 1051 uint64 platformBits = (x86_read_msr(IA32_MSR_PLATFORM_ID) >> 50) & 0x7; 1052 uint64 mask = 1 << platformBits; 1053 1054 while (size > 0) { 1055 if (size < sizeof(struct intel_microcode_header)) { 1056 dprintf("find_microcode_intel update is too small for header\n"); 1057 break; 1058 } 1059 struct intel_microcode_header* header = 1060 (struct intel_microcode_header*)data; 1061 1062 uint32 totalSize = header->total_size; 1063 uint32 dataSize = header->data_size; 1064 if (dataSize == 0) { 1065 dataSize = 2000; 1066 totalSize = sizeof(struct intel_microcode_header) 1067 + dataSize; 1068 } 1069 if (totalSize > size) { 1070 dprintf("find_microcode_intel update is too small for data\n"); 1071 break; 1072 } 1073 1074 uint32* dwords = (uint32*)data; 1075 // prepare the next update 1076 size -= totalSize; 1077 data += totalSize; 1078 1079 if (header->loader_revision != 1) { 1080 dprintf("find_microcode_intel incorrect loader version\n"); 1081 continue; 1082 } 1083 // 9.11.6 The microcode update data requires a 16-byte boundary 1084 // alignment. 1085 if (((addr_t)header % 16) != 0) { 1086 dprintf("find_microcode_intel incorrect alignment\n"); 1087 continue; 1088 } 1089 uint32 sum = 0; 1090 for (uint32 i = 0; i < totalSize / 4; i++) { 1091 sum += dwords[i]; 1092 } 1093 if (sum != 0) { 1094 dprintf("find_microcode_intel incorrect checksum\n"); 1095 continue; 1096 } 1097 if (patchLevel > header->update_revision) { 1098 dprintf("find_microcode_intel update_revision is lower\n"); 1099 continue; 1100 } 1101 if (signature == header->processor_signature 1102 && (mask & header->processor_flags) != 0) { 1103 return header; 1104 } 1105 if (totalSize <= (sizeof(struct intel_microcode_header) + dataSize 1106 + sizeof(struct intel_microcode_extended_signature_header))) { 1107 continue; 1108 } 1109 struct intel_microcode_extended_signature_header* extSigHeader = 1110 (struct intel_microcode_extended_signature_header*)((addr_t)header 1111 + sizeof(struct intel_microcode_header) + dataSize); 1112 struct intel_microcode_extended_signature* extended_signature = 1113 (struct intel_microcode_extended_signature*)((addr_t)extSigHeader 1114 + sizeof(struct intel_microcode_extended_signature_header)); 1115 for (uint32 i = 0; i < extSigHeader->extended_signature_count; i++) { 1116 if (signature == extended_signature[i].processor_signature 1117 && (mask & extended_signature[i].processor_flags) != 0) 1118 return header; 1119 } 1120 } 1121 return NULL; 1122 } 1123 1124 1125 static void 1126 load_microcode_intel(int currentCPU, cpu_ent* cpu) 1127 { 1128 // serialize for HT cores 1129 if (currentCPU != 0) 1130 acquire_spinlock(&sUcodeUpdateLock); 1131 detect_intel_patch_level(cpu); 1132 uint32 revision = cpu->arch.patch_level; 1133 struct intel_microcode_header* update = (struct intel_microcode_header*)sLoadedUcodeUpdate; 1134 if (update == NULL) { 1135 update = find_microcode_intel((addr_t)sUcodeData, sUcodeDataSize, 1136 revision); 1137 } 1138 if (update == NULL) { 1139 dprintf("CPU %d: no update found\n", currentCPU); 1140 } else if (update->update_revision != revision) { 1141 addr_t data = (addr_t)update + sizeof(struct intel_microcode_header); 1142 wbinvd(); 1143 x86_write_msr(IA32_MSR_UCODE_WRITE, data); 1144 detect_intel_patch_level(cpu); 1145 if (revision == cpu->arch.patch_level) { 1146 dprintf("CPU %d: update failed\n", currentCPU); 1147 } else { 1148 if (sLoadedUcodeUpdate == NULL) 1149 sLoadedUcodeUpdate = update; 1150 dprintf("CPU %d: updated from revision 0x%" B_PRIx32 " to 0x%" B_PRIx32 1151 "\n", currentCPU, revision, cpu->arch.patch_level); 1152 } 1153 } 1154 if (currentCPU != 0) 1155 release_spinlock(&sUcodeUpdateLock); 1156 } 1157 1158 1159 static struct amd_microcode_header* 1160 find_microcode_amd(addr_t data, size_t size, uint32 patchLevel) 1161 { 1162 // 9.11.3 Processor Identification 1163 cpuid_info cpuid; 1164 get_current_cpuid(&cpuid, 1, 0); 1165 uint32 signature = cpuid.regs.eax; 1166 1167 if (size < sizeof(struct amd_container_header)) { 1168 dprintf("find_microcode_amd update is too small for header\n"); 1169 return NULL; 1170 } 1171 struct amd_container_header* container = (struct amd_container_header*)data; 1172 if (container->magic != 0x414d44) { 1173 dprintf("find_microcode_amd update invalid magic\n"); 1174 return NULL; 1175 } 1176 1177 size -= sizeof(*container); 1178 data += sizeof(*container); 1179 1180 struct amd_section_header* section = 1181 (struct amd_section_header*)data; 1182 if (section->type != 0 || section->size == 0) { 1183 dprintf("find_microcode_amd update first section invalid\n"); 1184 return NULL; 1185 } 1186 1187 size -= sizeof(*section); 1188 data += sizeof(*section); 1189 1190 amd_equiv_cpu_entry* table = (amd_equiv_cpu_entry*)data; 1191 size -= section->size; 1192 data += section->size; 1193 1194 uint16 equiv_id = 0; 1195 for (uint32 i = 0; table[i].installed_cpu != 0; i++) { 1196 if (signature == table[i].equiv_cpu) { 1197 equiv_id = table[i].equiv_cpu; 1198 dprintf("find_microcode_amd found equiv cpu: %x\n", equiv_id); 1199 break; 1200 } 1201 } 1202 if (equiv_id == 0) { 1203 dprintf("find_microcode_amd update cpu not found in equiv table\n"); 1204 return NULL; 1205 } 1206 1207 while (size > sizeof(amd_section_header)) { 1208 struct amd_section_header* section = (struct amd_section_header*)data; 1209 size -= sizeof(*section); 1210 data += sizeof(*section); 1211 1212 if (section->type != 1 || section->size > size 1213 || section->size < sizeof(amd_microcode_header)) { 1214 dprintf("find_microcode_amd update firmware section invalid\n"); 1215 return NULL; 1216 } 1217 struct amd_microcode_header* header = (struct amd_microcode_header*)data; 1218 size -= section->size; 1219 data += section->size; 1220 1221 if (header->processor_rev_id != equiv_id) { 1222 dprintf("find_microcode_amd update found rev_id %x\n", header->processor_rev_id); 1223 continue; 1224 } 1225 if (patchLevel >= header->patch_id) { 1226 dprintf("find_microcode_intel update_revision is lower\n"); 1227 continue; 1228 } 1229 if (header->nb_dev_id != 0 || header->sb_dev_id != 0) { 1230 dprintf("find_microcode_amd update chipset specific firmware\n"); 1231 continue; 1232 } 1233 if (((addr_t)header % 16) != 0) { 1234 dprintf("find_microcode_amd incorrect alignment\n"); 1235 continue; 1236 } 1237 1238 return header; 1239 } 1240 dprintf("find_microcode_amd no fw update found for this cpu\n"); 1241 return NULL; 1242 } 1243 1244 1245 static void 1246 load_microcode_amd(int currentCPU, cpu_ent* cpu) 1247 { 1248 // serialize for HT cores 1249 if (currentCPU != 0) 1250 acquire_spinlock(&sUcodeUpdateLock); 1251 detect_amd_patch_level(cpu); 1252 uint32 revision = cpu->arch.patch_level; 1253 struct amd_microcode_header* update = (struct amd_microcode_header*)sLoadedUcodeUpdate; 1254 if (update == NULL) { 1255 update = find_microcode_amd((addr_t)sUcodeData, sUcodeDataSize, 1256 revision); 1257 } 1258 if (update != NULL) { 1259 addr_t data = (addr_t)update; 1260 wbinvd(); 1261 1262 x86_write_msr(MSR_K8_UCODE_UPDATE, data); 1263 1264 detect_amd_patch_level(cpu); 1265 if (revision == cpu->arch.patch_level) { 1266 dprintf("CPU %d: update failed\n", currentCPU); 1267 } else { 1268 if (sLoadedUcodeUpdate == NULL) 1269 sLoadedUcodeUpdate = update; 1270 dprintf("CPU %d: updated from revision 0x%" B_PRIx32 " to 0x%" B_PRIx32 1271 "\n", currentCPU, revision, cpu->arch.patch_level); 1272 } 1273 1274 } else { 1275 dprintf("CPU %d: no update found\n", currentCPU); 1276 } 1277 1278 if (currentCPU != 0) 1279 release_spinlock(&sUcodeUpdateLock); 1280 } 1281 1282 1283 static void 1284 load_microcode(int currentCPU) 1285 { 1286 if (sUcodeData == NULL) 1287 return; 1288 cpu_ent* cpu = get_cpu_struct(); 1289 if ((cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) != 0) 1290 return; 1291 if (cpu->arch.vendor == VENDOR_INTEL) 1292 load_microcode_intel(currentCPU, cpu); 1293 else if (cpu->arch.vendor == VENDOR_AMD) 1294 load_microcode_amd(currentCPU, cpu); 1295 } 1296 1297 1298 static uint8 1299 get_hybrid_cpu_type() 1300 { 1301 cpu_ent* cpu = get_cpu_struct(); 1302 if ((cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_HYBRID_CPU) == 0) 1303 return 0; 1304 1305 #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 1306 cpuid_info cpuid; 1307 get_current_cpuid(&cpuid, 0x1a, 0); 1308 return cpuid.regs.eax >> X86_HYBRID_CPU_TYPE_ID_SHIFT; 1309 } 1310 1311 1312 static const char* 1313 get_hybrid_cpu_type_string(uint8 type) 1314 { 1315 switch (type) { 1316 case 0x20: 1317 return "Atom"; 1318 case 0x40: 1319 return "Core"; 1320 default: 1321 return ""; 1322 } 1323 } 1324 1325 1326 static void 1327 detect_cpu(int currentCPU, bool full = true) 1328 { 1329 cpu_ent* cpu = get_cpu_struct(); 1330 char vendorString[17]; 1331 cpuid_info cpuid; 1332 1333 // clear out the cpu info data 1334 cpu->arch.vendor = VENDOR_UNKNOWN; 1335 cpu->arch.vendor_name = "UNKNOWN VENDOR"; 1336 cpu->arch.feature[FEATURE_COMMON] = 0; 1337 cpu->arch.feature[FEATURE_EXT] = 0; 1338 cpu->arch.feature[FEATURE_EXT_AMD] = 0; 1339 cpu->arch.feature[FEATURE_7_EBX] = 0; 1340 cpu->arch.feature[FEATURE_7_ECX] = 0; 1341 cpu->arch.feature[FEATURE_7_EDX] = 0; 1342 cpu->arch.feature[FEATURE_D_1_EAX] = 0; 1343 cpu->arch.model_name[0] = 0; 1344 1345 // print some fun data 1346 get_current_cpuid(&cpuid, 0, 0); 1347 uint32 maxBasicLeaf = cpuid.eax_0.max_eax; 1348 1349 // build the vendor string 1350 memset(vendorString, 0, sizeof(vendorString)); 1351 memcpy(vendorString, cpuid.eax_0.vendor_id, sizeof(cpuid.eax_0.vendor_id)); 1352 1353 // get the family, model, stepping 1354 get_current_cpuid(&cpuid, 1, 0); 1355 cpu->arch.type = cpuid.eax_1.type; 1356 cpu->arch.family = cpuid.eax_1.family; 1357 cpu->arch.extended_family = cpuid.eax_1.extended_family; 1358 cpu->arch.model = cpuid.eax_1.model; 1359 cpu->arch.extended_model = cpuid.eax_1.extended_model; 1360 cpu->arch.stepping = cpuid.eax_1.stepping; 1361 if (full) { 1362 dprintf("CPU %d: type %d family %d extended_family %d model %d " 1363 "extended_model %d stepping %d, string '%s'\n", 1364 currentCPU, cpu->arch.type, cpu->arch.family, 1365 cpu->arch.extended_family, cpu->arch.model, 1366 cpu->arch.extended_model, cpu->arch.stepping, vendorString); 1367 } 1368 1369 // figure out what vendor we have here 1370 1371 for (int32 i = 0; i < VENDOR_NUM; i++) { 1372 if (vendor_info[i].ident_string[0] 1373 && !strcmp(vendorString, vendor_info[i].ident_string[0])) { 1374 cpu->arch.vendor = (x86_vendors)i; 1375 cpu->arch.vendor_name = vendor_info[i].vendor; 1376 break; 1377 } 1378 if (vendor_info[i].ident_string[1] 1379 && !strcmp(vendorString, vendor_info[i].ident_string[1])) { 1380 cpu->arch.vendor = (x86_vendors)i; 1381 cpu->arch.vendor_name = vendor_info[i].vendor; 1382 break; 1383 } 1384 } 1385 1386 // see if we can get the model name 1387 get_current_cpuid(&cpuid, 0x80000000, 0); 1388 uint32 maxExtendedLeaf = cpuid.eax_0.max_eax; 1389 if (maxExtendedLeaf >= 0x80000004) { 1390 // build the model string (need to swap ecx/edx data before copying) 1391 unsigned int temp; 1392 memset(cpu->arch.model_name, 0, sizeof(cpu->arch.model_name)); 1393 1394 get_current_cpuid(&cpuid, 0x80000002, 0); 1395 temp = cpuid.regs.edx; 1396 cpuid.regs.edx = cpuid.regs.ecx; 1397 cpuid.regs.ecx = temp; 1398 memcpy(cpu->arch.model_name, cpuid.as_chars, sizeof(cpuid.as_chars)); 1399 1400 get_current_cpuid(&cpuid, 0x80000003, 0); 1401 temp = cpuid.regs.edx; 1402 cpuid.regs.edx = cpuid.regs.ecx; 1403 cpuid.regs.ecx = temp; 1404 memcpy(cpu->arch.model_name + 16, cpuid.as_chars, 1405 sizeof(cpuid.as_chars)); 1406 1407 get_current_cpuid(&cpuid, 0x80000004, 0); 1408 temp = cpuid.regs.edx; 1409 cpuid.regs.edx = cpuid.regs.ecx; 1410 cpuid.regs.ecx = temp; 1411 memcpy(cpu->arch.model_name + 32, cpuid.as_chars, 1412 sizeof(cpuid.as_chars)); 1413 1414 // some cpus return a right-justified string 1415 int32 i = 0; 1416 while (cpu->arch.model_name[i] == ' ') 1417 i++; 1418 if (i > 0) { 1419 memmove(cpu->arch.model_name, &cpu->arch.model_name[i], 1420 strlen(&cpu->arch.model_name[i]) + 1); 1421 } 1422 1423 if (full) { 1424 dprintf("CPU %d: vendor '%s' model name '%s'\n", 1425 currentCPU, cpu->arch.vendor_name, cpu->arch.model_name); 1426 } 1427 } else { 1428 strlcpy(cpu->arch.model_name, "unknown", sizeof(cpu->arch.model_name)); 1429 } 1430 1431 // load feature bits 1432 get_current_cpuid(&cpuid, 1, 0); 1433 cpu->arch.feature[FEATURE_COMMON] = cpuid.eax_1.features; // edx 1434 cpu->arch.feature[FEATURE_EXT] = cpuid.eax_1.extended_features; // ecx 1435 1436 if (!full) 1437 return; 1438 1439 if (maxExtendedLeaf >= 0x80000001) { 1440 get_current_cpuid(&cpuid, 0x80000001, 0); 1441 if (cpu->arch.vendor == VENDOR_AMD) 1442 cpu->arch.feature[FEATURE_EXT_AMD_ECX] = cpuid.regs.ecx; // ecx 1443 cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx 1444 if (cpu->arch.vendor != VENDOR_AMD) 1445 cpu->arch.feature[FEATURE_EXT_AMD] &= IA32_FEATURES_INTEL_EXT; 1446 } 1447 1448 if (maxBasicLeaf >= 5) { 1449 get_current_cpuid(&cpuid, 5, 0); 1450 cpu->arch.feature[FEATURE_5_ECX] = cpuid.regs.ecx; 1451 } 1452 1453 if (maxBasicLeaf >= 6) { 1454 get_current_cpuid(&cpuid, 6, 0); 1455 cpu->arch.feature[FEATURE_6_EAX] = cpuid.regs.eax; 1456 cpu->arch.feature[FEATURE_6_ECX] = cpuid.regs.ecx; 1457 } 1458 1459 if (maxBasicLeaf >= 7) { 1460 get_current_cpuid(&cpuid, 7, 0); 1461 cpu->arch.feature[FEATURE_7_EBX] = cpuid.regs.ebx; 1462 cpu->arch.feature[FEATURE_7_ECX] = cpuid.regs.ecx; 1463 cpu->arch.feature[FEATURE_7_EDX] = cpuid.regs.edx; 1464 } 1465 1466 if (maxBasicLeaf >= 0xd) { 1467 get_current_cpuid(&cpuid, 0xd, 1); 1468 cpu->arch.feature[FEATURE_D_1_EAX] = cpuid.regs.eax; 1469 } 1470 1471 if (maxExtendedLeaf >= 0x80000007) { 1472 get_current_cpuid(&cpuid, 0x80000007, 0); 1473 cpu->arch.feature[FEATURE_EXT_7_EDX] = cpuid.regs.edx; 1474 } 1475 1476 if (maxExtendedLeaf >= 0x80000008) { 1477 get_current_cpuid(&cpuid, 0x80000008, 0); 1478 cpu->arch.feature[FEATURE_EXT_8_EBX] = cpuid.regs.ebx; 1479 } 1480 1481 detect_cpu_topology(currentCPU, cpu, maxBasicLeaf, maxExtendedLeaf); 1482 1483 if (cpu->arch.vendor == VENDOR_INTEL) 1484 detect_intel_patch_level(cpu); 1485 else if (cpu->arch.vendor == VENDOR_AMD) 1486 detect_amd_patch_level(cpu); 1487 1488 cpu->arch.hybrid_type = get_hybrid_cpu_type(); 1489 1490 #if DUMP_FEATURE_STRING 1491 dump_feature_string(currentCPU, cpu); 1492 #endif 1493 #if DUMP_CPU_PATCHLEVEL_TYPE 1494 dprintf("CPU %d: patch_level 0x%" B_PRIx32 "%s%s\n", currentCPU, 1495 cpu->arch.patch_level, 1496 cpu->arch.hybrid_type != 0 ? ", hybrid type ": "", 1497 get_hybrid_cpu_type_string(cpu->arch.hybrid_type)); 1498 #endif 1499 } 1500 1501 1502 bool 1503 x86_check_feature(uint32 feature, enum x86_feature_type type) 1504 { 1505 cpu_ent* cpu = get_cpu_struct(); 1506 1507 #if 0 1508 int i; 1509 dprintf("x86_check_feature: feature 0x%x, type %d\n", feature, type); 1510 for (i = 0; i < FEATURE_NUM; i++) { 1511 dprintf("features %d: 0x%x\n", i, cpu->arch.feature[i]); 1512 } 1513 #endif 1514 1515 return (cpu->arch.feature[type] & feature) != 0; 1516 } 1517 1518 1519 bool 1520 x86_use_pat() 1521 { 1522 return sUsePAT; 1523 } 1524 1525 1526 void* 1527 x86_get_double_fault_stack(int32 cpu, size_t* _size) 1528 { 1529 *_size = kDoubleFaultStackSize; 1530 return (void*)(sDoubleFaultStacks + kDoubleFaultStackSize * cpu); 1531 } 1532 1533 1534 /*! Returns the index of the current CPU. Can only be called from the double 1535 fault handler. 1536 */ 1537 int32 1538 x86_double_fault_get_cpu(void) 1539 { 1540 addr_t stack = x86_get_stack_frame(); 1541 return (stack - sDoubleFaultStacks) / kDoubleFaultStackSize; 1542 } 1543 1544 1545 // #pragma mark - 1546 1547 1548 status_t 1549 arch_cpu_preboot_init_percpu(kernel_args* args, int cpu) 1550 { 1551 if (cpu == 0) { 1552 // We can't allocate pages at this stage in the boot process, only virtual addresses. 1553 sDoubleFaultStacks = vm_allocate_early(args, 1554 kDoubleFaultStackSize * smp_get_num_cpus(), 0, 0, 0); 1555 } 1556 1557 // On SMP system we want to synchronize the CPUs' TSCs, so system_time() 1558 // will return consistent values. 1559 if (smp_get_num_cpus() > 1) { 1560 // let the first CPU prepare the rendezvous point 1561 if (cpu == 0) 1562 sTSCSyncRendezvous = smp_get_num_cpus() - 1; 1563 1564 // One CPU after the other will drop out of this loop and be caught by 1565 // the loop below, until the last CPU (0) gets there. Save for +/- a few 1566 // cycles the CPUs should pass the second loop at the same time. 1567 while (sTSCSyncRendezvous != cpu) { 1568 } 1569 1570 sTSCSyncRendezvous = cpu - 1; 1571 1572 while (sTSCSyncRendezvous != -1) { 1573 } 1574 1575 // reset TSC to 0 1576 x86_write_msr(IA32_MSR_TSC, 0); 1577 } 1578 1579 x86_descriptors_preboot_init_percpu(args, cpu); 1580 1581 return B_OK; 1582 } 1583 1584 1585 static void 1586 halt_idle(void) 1587 { 1588 asm("hlt"); 1589 } 1590 1591 1592 static void 1593 amdc1e_noarat_idle(void) 1594 { 1595 uint64 msr = x86_read_msr(K8_MSR_IPM); 1596 if (msr & K8_CMPHALT) 1597 x86_write_msr(K8_MSR_IPM, msr & ~K8_CMPHALT); 1598 halt_idle(); 1599 } 1600 1601 1602 static bool 1603 detect_amdc1e_noarat() 1604 { 1605 cpu_ent* cpu = get_cpu_struct(); 1606 1607 if (cpu->arch.vendor != VENDOR_AMD) 1608 return false; 1609 1610 // Family 0x12 and higher processors support ARAT 1611 // Family lower than 0xf processors doesn't support C1E 1612 // Family 0xf with model <= 0x40 procssors doesn't support C1E 1613 uint32 family = cpu->arch.family + cpu->arch.extended_family; 1614 uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model; 1615 return (family < 0x12 && family > 0xf) || (family == 0xf && model > 0x40); 1616 } 1617 1618 1619 static void 1620 init_tsc_with_cpuid(kernel_args* args, uint32* conversionFactor) 1621 { 1622 cpu_ent* cpu = get_cpu_struct(); 1623 if (cpu->arch.vendor != VENDOR_INTEL) 1624 return; 1625 1626 uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model; 1627 cpuid_info cpuid; 1628 get_current_cpuid(&cpuid, 0, 0); 1629 uint32 maxBasicLeaf = cpuid.eax_0.max_eax; 1630 if (maxBasicLeaf < 0x15) 1631 return; 1632 1633 get_current_cpuid(&cpuid, 0x15, 0); 1634 if (cpuid.regs.eax == 0 || cpuid.regs.ebx == 0) 1635 return; 1636 uint32 khz = cpuid.regs.ecx / 1000; 1637 uint32 denominator = cpuid.regs.eax; 1638 uint32 numerator = cpuid.regs.ebx; 1639 if (khz == 0 && model == 0x5f) { 1640 // CPUID 0x16 isn't supported, hardcoding 1641 khz = 25000; 1642 } 1643 1644 if (khz == 0 && maxBasicLeaf >= 0x16) { 1645 // for these CPUs the base frequency is also the tsc frequency 1646 get_current_cpuid(&cpuid, 0x16, 0); 1647 khz = cpuid.regs.eax * 1000 * denominator / numerator; 1648 } 1649 if (khz == 0) 1650 return; 1651 1652 dprintf("CPU: using TSC frequency from CPUID\n"); 1653 // compute for microseconds as follows (1000000 << 32) / (tsc freq in Hz), 1654 // or (1000 << 32) / (tsc freq in kHz) 1655 *conversionFactor = (1000ULL << 32) / (khz * numerator / denominator); 1656 // overwrite the bootloader value 1657 args->arch_args.system_time_cv_factor = *conversionFactor; 1658 } 1659 1660 1661 static void 1662 init_tsc_with_msr(kernel_args* args, uint32* conversionFactor) 1663 { 1664 cpu_ent* cpuEnt = get_cpu_struct(); 1665 if (cpuEnt->arch.vendor != VENDOR_AMD) 1666 return; 1667 1668 uint32 family = cpuEnt->arch.family + cpuEnt->arch.extended_family; 1669 if (family < 0x10) 1670 return; 1671 uint64 value = x86_read_msr(MSR_F10H_HWCR); 1672 if ((value & HWCR_TSCFREQSEL) == 0) 1673 return; 1674 1675 value = x86_read_msr(MSR_F10H_PSTATEDEF(0)); 1676 if ((value & PSTATEDEF_EN) == 0) 1677 return; 1678 if (family != 0x17 && family != 0x19) 1679 return; 1680 1681 uint64 khz = 200 * 1000; 1682 uint32 denominator = (value >> 8) & 0x3f; 1683 if (denominator < 0x8 || denominator > 0x2c) 1684 return; 1685 if (denominator > 0x1a && (denominator % 2) == 1) 1686 return; 1687 uint32 numerator = value & 0xff; 1688 if (numerator < 0x10) 1689 return; 1690 1691 dprintf("CPU: using TSC frequency from MSR %" B_PRIu64 "\n", khz * numerator / denominator); 1692 // compute for microseconds as follows (1000000 << 32) / (tsc freq in Hz), 1693 // or (1000 << 32) / (tsc freq in kHz) 1694 *conversionFactor = (1000ULL << 32) / (khz * numerator / denominator); 1695 // overwrite the bootloader value 1696 args->arch_args.system_time_cv_factor = *conversionFactor; 1697 } 1698 1699 1700 static void 1701 init_tsc(kernel_args* args) 1702 { 1703 // init the TSC -> system_time() conversion factors 1704 1705 // try to find the TSC frequency with CPUID 1706 uint32 conversionFactor = args->arch_args.system_time_cv_factor; 1707 init_tsc_with_cpuid(args, &conversionFactor); 1708 init_tsc_with_msr(args, &conversionFactor); 1709 uint64 conversionFactorNsecs = (uint64)conversionFactor * 1000; 1710 1711 #ifdef __x86_64__ 1712 // The x86_64 system_time() implementation uses 64-bit multiplication and 1713 // therefore shifting is not necessary for low frequencies (it's also not 1714 // too likely that there'll be any x86_64 CPUs clocked under 1GHz). 1715 __x86_setup_system_time((uint64)conversionFactor << 32, 1716 conversionFactorNsecs); 1717 #else 1718 if (conversionFactorNsecs >> 32 != 0) { 1719 // the TSC frequency is < 1 GHz, which forces us to shift the factor 1720 __x86_setup_system_time(conversionFactor, conversionFactorNsecs >> 16, 1721 true); 1722 } else { 1723 // the TSC frequency is >= 1 GHz 1724 __x86_setup_system_time(conversionFactor, conversionFactorNsecs, false); 1725 } 1726 #endif 1727 } 1728 1729 1730 status_t 1731 arch_cpu_init_percpu(kernel_args* args, int cpu) 1732 { 1733 detect_cpu(cpu, false); 1734 load_microcode(cpu); 1735 detect_cpu(cpu); 1736 1737 if (cpu == 0) 1738 init_tsc(args); 1739 1740 if (!gCpuIdleFunc) { 1741 if (detect_amdc1e_noarat()) 1742 gCpuIdleFunc = amdc1e_noarat_idle; 1743 else 1744 gCpuIdleFunc = halt_idle; 1745 } 1746 1747 if (x86_check_feature(IA32_FEATURE_MCE, FEATURE_COMMON)) 1748 x86_write_cr4(x86_read_cr4() | IA32_CR4_MCE); 1749 1750 cpu_ent* cpuEnt = get_cpu_struct(); 1751 if (cpu == 0) { 1752 bool supportsPAT = x86_check_feature(IA32_FEATURE_PAT, FEATURE_COMMON); 1753 1754 // Pentium II Errata A52 and Pentium III Errata E27 say the upper four 1755 // entries of the PAT are not useable as the PAT bit is ignored for 4K 1756 // PTEs. Pentium 4 Errata N46 says the PAT bit can be assumed 0 in some 1757 // specific cases. To avoid issues, disable PAT on such CPUs. 1758 bool brokenPAT = cpuEnt->arch.vendor == VENDOR_INTEL 1759 && cpuEnt->arch.extended_family == 0 1760 && cpuEnt->arch.extended_model == 0 1761 && ((cpuEnt->arch.family == 6 && cpuEnt->arch.model <= 13) 1762 || (cpuEnt->arch.family == 15 && cpuEnt->arch.model <= 6)); 1763 1764 sUsePAT = supportsPAT && !brokenPAT 1765 && !get_safemode_boolean_early(args, B_SAFEMODE_DISABLE_PAT, false); 1766 1767 if (sUsePAT) { 1768 dprintf("using PAT for memory type configuration\n"); 1769 } else { 1770 dprintf("not using PAT for memory type configuration (%s)\n", 1771 supportsPAT ? (brokenPAT ? "broken" : "disabled") 1772 : "unsupported"); 1773 } 1774 } 1775 1776 if (sUsePAT) 1777 init_pat(cpu); 1778 1779 #ifdef __x86_64__ 1780 // if RDTSCP or RDPID are available write cpu number in TSC_AUX 1781 if (x86_check_feature(IA32_FEATURE_AMD_EXT_RDTSCP, FEATURE_EXT_AMD) 1782 || x86_check_feature(IA32_FEATURE_RDPID, FEATURE_7_ECX)) { 1783 x86_write_msr(IA32_MSR_TSC_AUX, cpu); 1784 } 1785 1786 // make LFENCE a dispatch serializing instruction on AMD 64bit 1787 if (cpuEnt->arch.vendor == VENDOR_AMD) { 1788 uint32 family = cpuEnt->arch.family + cpuEnt->arch.extended_family; 1789 if (family >= 0x10 && family != 0x11) { 1790 uint64 value = x86_read_msr(MSR_F10H_DE_CFG); 1791 if ((value & DE_CFG_SERIALIZE_LFENCE) == 0) 1792 x86_write_msr(MSR_F10H_DE_CFG, value | DE_CFG_SERIALIZE_LFENCE); 1793 } 1794 } 1795 #endif 1796 1797 if (x86_check_feature(IA32_FEATURE_APERFMPERF, FEATURE_6_ECX)) { 1798 gCPU[cpu].arch.mperf_prev = x86_read_msr(IA32_MSR_MPERF); 1799 gCPU[cpu].arch.aperf_prev = x86_read_msr(IA32_MSR_APERF); 1800 gCPU[cpu].arch.frequency = 0; 1801 gCPU[cpu].arch.perf_timestamp = 0; 1802 } 1803 return __x86_patch_errata_percpu(cpu); 1804 } 1805 1806 1807 status_t 1808 arch_cpu_init(kernel_args* args) 1809 { 1810 if (args->ucode_data != NULL 1811 && args->ucode_data_size > 0) { 1812 sUcodeData = args->ucode_data; 1813 sUcodeDataSize = args->ucode_data_size; 1814 } else { 1815 dprintf("CPU: no microcode provided\n"); 1816 } 1817 1818 // Initialize descriptor tables. 1819 x86_descriptors_init(args); 1820 1821 return B_OK; 1822 } 1823 1824 1825 #ifdef __x86_64__ 1826 static void 1827 enable_smap(void* dummy, int cpu) 1828 { 1829 x86_write_cr4(x86_read_cr4() | IA32_CR4_SMAP); 1830 } 1831 1832 1833 static void 1834 enable_smep(void* dummy, int cpu) 1835 { 1836 x86_write_cr4(x86_read_cr4() | IA32_CR4_SMEP); 1837 } 1838 1839 1840 static void 1841 enable_osxsave(void* dummy, int cpu) 1842 { 1843 x86_write_cr4(x86_read_cr4() | IA32_CR4_OSXSAVE); 1844 } 1845 1846 1847 static void 1848 enable_xsavemask(void* dummy, int cpu) 1849 { 1850 xsetbv(0, gXsaveMask); 1851 } 1852 #endif 1853 1854 1855 status_t 1856 arch_cpu_init_post_vm(kernel_args* args) 1857 { 1858 // allocate the area for the double fault stacks 1859 area_id stacks = create_area("double fault stacks", 1860 (void**)&sDoubleFaultStacks, B_EXACT_ADDRESS, 1861 kDoubleFaultStackSize * smp_get_num_cpus(), 1862 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 1863 if (stacks < B_OK) 1864 panic("failed to create double fault stacks area: %" B_PRId32, stacks); 1865 1866 X86PagingStructures* kernelPagingStructures 1867 = static_cast<X86VMTranslationMap*>( 1868 VMAddressSpace::Kernel()->TranslationMap())->PagingStructures(); 1869 1870 // Set active translation map on each CPU. 1871 for (uint32 i = 0; i < args->num_cpus; i++) { 1872 gCPU[i].arch.active_paging_structures = kernelPagingStructures; 1873 kernelPagingStructures->AddReference(); 1874 } 1875 1876 if (!apic_available()) 1877 x86_init_fpu(); 1878 // else fpu gets set up in smp code 1879 1880 #ifdef __x86_64__ 1881 // if available enable SMEP (Supervisor Memory Execution Protection) 1882 if (x86_check_feature(IA32_FEATURE_SMEP, FEATURE_7_EBX)) { 1883 if (!get_safemode_boolean(B_SAFEMODE_DISABLE_SMEP_SMAP, false)) { 1884 dprintf("enable SMEP\n"); 1885 call_all_cpus_sync(&enable_smep, NULL); 1886 } else 1887 dprintf("SMEP disabled per safemode setting\n"); 1888 } 1889 1890 // if available enable SMAP (Supervisor Memory Access Protection) 1891 if (x86_check_feature(IA32_FEATURE_SMAP, FEATURE_7_EBX)) { 1892 if (!get_safemode_boolean(B_SAFEMODE_DISABLE_SMEP_SMAP, false)) { 1893 dprintf("enable SMAP\n"); 1894 call_all_cpus_sync(&enable_smap, NULL); 1895 1896 arch_altcodepatch_replace(ALTCODEPATCH_TAG_STAC, &_stac, 3); 1897 arch_altcodepatch_replace(ALTCODEPATCH_TAG_CLAC, &_clac, 3); 1898 } else 1899 dprintf("SMAP disabled per safemode setting\n"); 1900 } 1901 1902 // if available enable XSAVE (XSAVE and extended states) 1903 gHasXsave = x86_check_feature(IA32_FEATURE_EXT_XSAVE, FEATURE_EXT); 1904 if (gHasXsave) { 1905 gHasXsavec = x86_check_feature(IA32_FEATURE_XSAVEC, 1906 FEATURE_D_1_EAX); 1907 1908 call_all_cpus_sync(&enable_osxsave, NULL); 1909 gXsaveMask = IA32_XCR0_X87 | IA32_XCR0_SSE; 1910 cpuid_info cpuid; 1911 get_current_cpuid(&cpuid, 0xd, 0); 1912 gXsaveMask |= (cpuid.regs.eax & IA32_XCR0_AVX); 1913 call_all_cpus_sync(&enable_xsavemask, NULL); 1914 get_current_cpuid(&cpuid, 0xd, 0); 1915 gFPUSaveLength = cpuid.regs.ebx; 1916 if (gFPUSaveLength > sizeof(((struct arch_thread *)0)->fpu_state)) 1917 gFPUSaveLength = 832; 1918 1919 arch_altcodepatch_replace(ALTCODEPATCH_TAG_XSAVE, 1920 gHasXsavec ? &_xsavec : &_xsave, 4); 1921 arch_altcodepatch_replace(ALTCODEPATCH_TAG_XRSTOR, 1922 &_xrstor, 4); 1923 1924 dprintf("enable %s 0x%" B_PRIx64 " %" B_PRId64 "\n", 1925 gHasXsavec ? "XSAVEC" : "XSAVE", gXsaveMask, gFPUSaveLength); 1926 } 1927 #endif 1928 1929 return B_OK; 1930 } 1931 1932 1933 status_t 1934 arch_cpu_init_post_modules(kernel_args* args) 1935 { 1936 // initialize CPU module 1937 1938 void* cookie = open_module_list("cpu"); 1939 1940 while (true) { 1941 char name[B_FILE_NAME_LENGTH]; 1942 size_t nameLength = sizeof(name); 1943 1944 if (read_next_module_name(cookie, name, &nameLength) != B_OK 1945 || get_module(name, (module_info**)&sCpuModule) == B_OK) 1946 break; 1947 } 1948 1949 close_module_list(cookie); 1950 1951 // initialize MTRRs if available 1952 if (x86_count_mtrrs() > 0) { 1953 sCpuRendezvous = sCpuRendezvous2 = 0; 1954 call_all_cpus(&init_mtrrs, NULL); 1955 } 1956 1957 size_t threadExitLen = (addr_t)x86_end_userspace_thread_exit 1958 - (addr_t)x86_userspace_thread_exit; 1959 addr_t threadExitPosition = fill_commpage_entry( 1960 COMMPAGE_ENTRY_X86_THREAD_EXIT, (const void*)x86_userspace_thread_exit, 1961 threadExitLen); 1962 1963 // add the functions to the commpage image 1964 image_id image = get_commpage_image(); 1965 1966 elf_add_memory_image_symbol(image, "commpage_thread_exit", 1967 threadExitPosition, threadExitLen, B_SYMBOL_TYPE_TEXT); 1968 1969 return B_OK; 1970 } 1971 1972 1973 void 1974 arch_cpu_user_TLB_invalidate(void) 1975 { 1976 x86_write_cr3(x86_read_cr3()); 1977 } 1978 1979 1980 void 1981 arch_cpu_global_TLB_invalidate(void) 1982 { 1983 uint32 flags = x86_read_cr4(); 1984 1985 if (flags & IA32_CR4_GLOBAL_PAGES) { 1986 // disable and reenable the global pages to flush all TLBs regardless 1987 // of the global page bit 1988 x86_write_cr4(flags & ~IA32_CR4_GLOBAL_PAGES); 1989 x86_write_cr4(flags | IA32_CR4_GLOBAL_PAGES); 1990 } else { 1991 cpu_status state = disable_interrupts(); 1992 arch_cpu_user_TLB_invalidate(); 1993 restore_interrupts(state); 1994 } 1995 } 1996 1997 1998 void 1999 arch_cpu_invalidate_TLB_range(addr_t start, addr_t end) 2000 { 2001 int32 num_pages = end / B_PAGE_SIZE - start / B_PAGE_SIZE; 2002 while (num_pages-- >= 0) { 2003 invalidate_TLB(start); 2004 start += B_PAGE_SIZE; 2005 } 2006 } 2007 2008 2009 void 2010 arch_cpu_invalidate_TLB_list(addr_t pages[], int num_pages) 2011 { 2012 int i; 2013 for (i = 0; i < num_pages; i++) { 2014 invalidate_TLB(pages[i]); 2015 } 2016 } 2017 2018 2019 status_t 2020 arch_cpu_shutdown(bool rebootSystem) 2021 { 2022 if (acpi_shutdown(rebootSystem) == B_OK) 2023 return B_OK; 2024 2025 if (!rebootSystem) { 2026 #ifndef __x86_64__ 2027 return apm_shutdown(); 2028 #else 2029 return B_NOT_SUPPORTED; 2030 #endif 2031 } 2032 2033 cpu_status state = disable_interrupts(); 2034 2035 // try to reset the system using the keyboard controller 2036 out8(0xfe, 0x64); 2037 2038 // Give some time to the controller to do its job (0.5s) 2039 snooze(500000); 2040 2041 // if that didn't help, try it this way 2042 x86_reboot(); 2043 2044 restore_interrupts(state); 2045 return B_ERROR; 2046 } 2047 2048 2049 void 2050 arch_cpu_sync_icache(void* address, size_t length) 2051 { 2052 // instruction cache is always consistent on x86 2053 } 2054 2055