xref: /haiku/src/system/kernel/arch/x86/arch_cpu.cpp (revision 556aca31e907be45975dd1da23c19da44a148767)
1 /*
2  * Copyright 2018, Jérôme Duval, jerome.duval@gmail.com.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Copyright 2013, Paweł Dziepak, pdziepak@quarnos.org.
5  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
6  * Distributed under the terms of the MIT License.
7  *
8  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
9  * Distributed under the terms of the NewOS License.
10  */
11 
12 
13 #include <cpu.h>
14 
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 
19 #include <algorithm>
20 
21 #include <ACPI.h>
22 
23 #include <boot_device.h>
24 #include <commpage.h>
25 #include <debug.h>
26 #include <elf.h>
27 #include <safemode.h>
28 #include <smp.h>
29 #include <util/BitUtils.h>
30 #include <vm/vm.h>
31 #include <vm/vm_types.h>
32 #include <vm/VMAddressSpace.h>
33 
34 #include <arch_system_info.h>
35 #include <arch/x86/apic.h>
36 #include <boot/kernel_args.h>
37 
38 #include "paging/X86PagingStructures.h"
39 #include "paging/X86VMTranslationMap.h"
40 
41 
42 #define DUMP_FEATURE_STRING	1
43 #define DUMP_CPU_TOPOLOGY	1
44 #define DUMP_CPU_PATCHLEVEL_TYPE	1
45 
46 
47 /* cpu vendor info */
48 struct cpu_vendor_info {
49 	const char *vendor;
50 	const char *ident_string[2];
51 };
52 
53 static const struct cpu_vendor_info vendor_info[VENDOR_NUM] = {
54 	{ "Intel", { "GenuineIntel" } },
55 	{ "AMD", { "AuthenticAMD" } },
56 	{ "Cyrix", { "CyrixInstead" } },
57 	{ "UMC", { "UMC UMC UMC" } },
58 	{ "NexGen", { "NexGenDriven" } },
59 	{ "Centaur", { "CentaurHauls" } },
60 	{ "Rise", { "RiseRiseRise" } },
61 	{ "Transmeta", { "GenuineTMx86", "TransmetaCPU" } },
62 	{ "NSC", { "Geode by NSC" } },
63 	{ "Hygon", { "HygonGenuine" } },
64 };
65 
66 #define K8_SMIONCMPHALT			(1ULL << 27)
67 #define K8_C1EONCMPHALT			(1ULL << 28)
68 
69 #define K8_CMPHALT				(K8_SMIONCMPHALT | K8_C1EONCMPHALT)
70 
71 struct set_mtrr_parameter {
72 	int32	index;
73 	uint64	base;
74 	uint64	length;
75 	uint8	type;
76 };
77 
78 struct set_mtrrs_parameter {
79 	const x86_mtrr_info*	infos;
80 	uint32					count;
81 	uint8					defaultType;
82 };
83 
84 
85 #ifdef __x86_64__
86 extern addr_t _stac;
87 extern addr_t _clac;
88 extern addr_t _xsave;
89 extern addr_t _xsavec;
90 extern addr_t _xrstor;
91 uint64 gXsaveMask;
92 uint64 gFPUSaveLength = 512;
93 bool gHasXsave = false;
94 bool gHasXsavec = false;
95 #endif
96 
97 extern "C" void x86_reboot(void);
98 	// from arch.S
99 
100 void (*gCpuIdleFunc)(void);
101 #ifndef __x86_64__
102 void (*gX86SwapFPUFunc)(void* oldState, const void* newState) = x86_noop_swap;
103 bool gHasSSE = false;
104 #endif
105 
106 static uint32 sCpuRendezvous;
107 static uint32 sCpuRendezvous2;
108 static uint32 sCpuRendezvous3;
109 static vint32 sTSCSyncRendezvous;
110 
111 /* Some specials for the double fault handler */
112 static addr_t sDoubleFaultStacks = 0;
113 static const size_t kDoubleFaultStackSize = 4096;	// size per CPU
114 
115 static x86_cpu_module_info* sCpuModule;
116 
117 
118 /* CPU topology information */
119 static uint32 (*sGetCPUTopologyID)(int currentCPU);
120 static uint32 sHierarchyMask[CPU_TOPOLOGY_LEVELS];
121 static uint32 sHierarchyShift[CPU_TOPOLOGY_LEVELS];
122 
123 /* Cache topology information */
124 static uint32 sCacheSharingMask[CPU_MAX_CACHE_LEVEL];
125 
126 static void* sUcodeData = NULL;
127 static size_t sUcodeDataSize = 0;
128 static void* sLoadedUcodeUpdate;
129 static spinlock sUcodeUpdateLock = B_SPINLOCK_INITIALIZER;
130 
131 static bool sUsePAT = false;
132 
133 
134 static status_t
acpi_shutdown(bool rebootSystem)135 acpi_shutdown(bool rebootSystem)
136 {
137 	if (debug_debugger_running() || !are_interrupts_enabled())
138 		return B_ERROR;
139 
140 	acpi_module_info* acpi;
141 	if (get_module(B_ACPI_MODULE_NAME, (module_info**)&acpi) != B_OK)
142 		return B_NOT_SUPPORTED;
143 
144 	status_t status;
145 	if (rebootSystem) {
146 		status = acpi->reboot();
147 	} else {
148 		status = acpi->prepare_sleep_state(ACPI_POWER_STATE_OFF, NULL, 0);
149 		if (status == B_OK) {
150 			//cpu_status state = disable_interrupts();
151 			status = acpi->enter_sleep_state(ACPI_POWER_STATE_OFF);
152 			//restore_interrupts(state);
153 		}
154 	}
155 
156 	put_module(B_ACPI_MODULE_NAME);
157 	return status;
158 }
159 
160 
161 /*!	Disable CPU caches, and invalidate them. */
162 static void
disable_caches()163 disable_caches()
164 {
165 	x86_write_cr0((x86_read_cr0() | CR0_CACHE_DISABLE)
166 		& ~CR0_NOT_WRITE_THROUGH);
167 	wbinvd();
168 	arch_cpu_global_TLB_invalidate();
169 }
170 
171 
172 /*!	Invalidate CPU caches, and enable them. */
173 static void
enable_caches()174 enable_caches()
175 {
176 	wbinvd();
177 	arch_cpu_global_TLB_invalidate();
178 	x86_write_cr0(x86_read_cr0()
179 		& ~(CR0_CACHE_DISABLE | CR0_NOT_WRITE_THROUGH));
180 }
181 
182 
183 static void
set_mtrr(void * _parameter,int cpu)184 set_mtrr(void* _parameter, int cpu)
185 {
186 	struct set_mtrr_parameter* parameter
187 		= (struct set_mtrr_parameter*)_parameter;
188 
189 	// wait until all CPUs have arrived here
190 	smp_cpu_rendezvous(&sCpuRendezvous);
191 
192 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
193 	// that initiated the call_all_cpus() from doing that again and clearing
194 	// sCpuRendezvous2 before the last CPU has actually left the loop in
195 	// smp_cpu_rendezvous();
196 	if (cpu == 0)
197 		atomic_set((int32*)&sCpuRendezvous3, 0);
198 
199 	disable_caches();
200 
201 	sCpuModule->set_mtrr(parameter->index, parameter->base, parameter->length,
202 		parameter->type);
203 
204 	enable_caches();
205 
206 	// wait until all CPUs have arrived here
207 	smp_cpu_rendezvous(&sCpuRendezvous2);
208 	smp_cpu_rendezvous(&sCpuRendezvous3);
209 }
210 
211 
212 static void
set_mtrrs(void * _parameter,int cpu)213 set_mtrrs(void* _parameter, int cpu)
214 {
215 	set_mtrrs_parameter* parameter = (set_mtrrs_parameter*)_parameter;
216 
217 	// wait until all CPUs have arrived here
218 	smp_cpu_rendezvous(&sCpuRendezvous);
219 
220 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
221 	// that initiated the call_all_cpus() from doing that again and clearing
222 	// sCpuRendezvous2 before the last CPU has actually left the loop in
223 	// smp_cpu_rendezvous();
224 	if (cpu == 0)
225 		atomic_set((int32*)&sCpuRendezvous3, 0);
226 
227 	disable_caches();
228 
229 	sCpuModule->set_mtrrs(parameter->defaultType, parameter->infos,
230 		parameter->count);
231 
232 	enable_caches();
233 
234 	// wait until all CPUs have arrived here
235 	smp_cpu_rendezvous(&sCpuRendezvous2);
236 	smp_cpu_rendezvous(&sCpuRendezvous3);
237 }
238 
239 
240 static void
init_mtrrs(void * _unused,int cpu)241 init_mtrrs(void* _unused, int cpu)
242 {
243 	// wait until all CPUs have arrived here
244 	smp_cpu_rendezvous(&sCpuRendezvous);
245 
246 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
247 	// that initiated the call_all_cpus() from doing that again and clearing
248 	// sCpuRendezvous2 before the last CPU has actually left the loop in
249 	// smp_cpu_rendezvous();
250 	if (cpu == 0)
251 		atomic_set((int32*)&sCpuRendezvous3, 0);
252 
253 	disable_caches();
254 
255 	sCpuModule->init_mtrrs();
256 
257 	enable_caches();
258 
259 	// wait until all CPUs have arrived here
260 	smp_cpu_rendezvous(&sCpuRendezvous2);
261 	smp_cpu_rendezvous(&sCpuRendezvous3);
262 }
263 
264 
265 uint32
x86_count_mtrrs(void)266 x86_count_mtrrs(void)
267 {
268 	if (sUsePAT) {
269 		// When PAT is supported, we completely ignore MTRRs and leave them as
270 		// initialized by firmware. This follows the suggestion in Intel SDM
271 		// that these don't usually need to be touched by anything after system
272 		// init. Using page attributes is the more flexible and modern approach
273 		// to memory type handling and they can override MTRRs in the critical
274 		// case of write-combining, usually used for framebuffers.
275 		dprintf("ignoring MTRRs due to PAT support\n");
276 		return 0;
277 	}
278 
279 	if (sCpuModule == NULL)
280 		return 0;
281 
282 	return sCpuModule->count_mtrrs();
283 }
284 
285 
286 void
x86_set_mtrr(uint32 index,uint64 base,uint64 length,uint8 type)287 x86_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type)
288 {
289 	struct set_mtrr_parameter parameter;
290 	parameter.index = index;
291 	parameter.base = base;
292 	parameter.length = length;
293 	parameter.type = type;
294 
295 	sCpuRendezvous = sCpuRendezvous2 = 0;
296 	call_all_cpus(&set_mtrr, &parameter);
297 }
298 
299 
300 status_t
x86_get_mtrr(uint32 index,uint64 * _base,uint64 * _length,uint8 * _type)301 x86_get_mtrr(uint32 index, uint64* _base, uint64* _length, uint8* _type)
302 {
303 	// the MTRRs are identical on all CPUs, so it doesn't matter
304 	// on which CPU this runs
305 	return sCpuModule->get_mtrr(index, _base, _length, _type);
306 }
307 
308 
309 void
x86_set_mtrrs(uint8 defaultType,const x86_mtrr_info * infos,uint32 count)310 x86_set_mtrrs(uint8 defaultType, const x86_mtrr_info* infos, uint32 count)
311 {
312 	if (sCpuModule == NULL)
313 		return;
314 
315 	struct set_mtrrs_parameter parameter;
316 	parameter.defaultType = defaultType;
317 	parameter.infos = infos;
318 	parameter.count = count;
319 
320 	sCpuRendezvous = sCpuRendezvous2 = 0;
321 	call_all_cpus(&set_mtrrs, &parameter);
322 }
323 
324 
325 static void
init_pat(int cpu)326 init_pat(int cpu)
327 {
328 	disable_caches();
329 
330 	uint64 value = x86_read_msr(IA32_MSR_PAT);
331 	dprintf("PAT MSR on CPU %d before init: %#" B_PRIx64 "\n", cpu, value);
332 
333 	// Use PAT entry 4 for write-combining, leave the rest as is
334 	value &= ~(IA32_MSR_PAT_ENTRY_MASK << IA32_MSR_PAT_ENTRY_SHIFT(4));
335 	value |= IA32_MSR_PAT_TYPE_WRITE_COMBINING << IA32_MSR_PAT_ENTRY_SHIFT(4);
336 
337 	dprintf("PAT MSR on CPU %d after init: %#" B_PRIx64 "\n", cpu, value);
338 	x86_write_msr(IA32_MSR_PAT, value);
339 
340 	enable_caches();
341 }
342 
343 
344 void
x86_init_fpu(void)345 x86_init_fpu(void)
346 {
347 	// All x86_64 CPUs support SSE, don't need to bother checking for it.
348 #ifndef __x86_64__
349 	if (!x86_check_feature(IA32_FEATURE_FPU, FEATURE_COMMON)) {
350 		// No FPU... time to install one in your 386?
351 		dprintf("%s: Warning: CPU has no reported FPU.\n", __func__);
352 		gX86SwapFPUFunc = x86_noop_swap;
353 		return;
354 	}
355 
356 	if (!x86_check_feature(IA32_FEATURE_SSE, FEATURE_COMMON)
357 		|| !x86_check_feature(IA32_FEATURE_FXSR, FEATURE_COMMON)) {
358 		dprintf("%s: CPU has no SSE... just enabling FPU.\n", __func__);
359 		// we don't have proper SSE support, just enable FPU
360 		x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
361 		gX86SwapFPUFunc = x86_fnsave_swap;
362 		return;
363 	}
364 #endif
365 
366 	dprintf("%s: CPU has SSE... enabling FXSR and XMM.\n", __func__);
367 #ifndef __x86_64__
368 	// enable OS support for SSE
369 	x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
370 	x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
371 
372 	gX86SwapFPUFunc = x86_fxsave_swap;
373 	gHasSSE = true;
374 #endif
375 }
376 
377 
378 #if DUMP_FEATURE_STRING
379 static void
dump_feature_string(int currentCPU,cpu_ent * cpu)380 dump_feature_string(int currentCPU, cpu_ent* cpu)
381 {
382 	char features[768];
383 	features[0] = 0;
384 
385 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FPU)
386 		strlcat(features, "fpu ", sizeof(features));
387 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_VME)
388 		strlcat(features, "vme ", sizeof(features));
389 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DE)
390 		strlcat(features, "de ", sizeof(features));
391 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE)
392 		strlcat(features, "pse ", sizeof(features));
393 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TSC)
394 		strlcat(features, "tsc ", sizeof(features));
395 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MSR)
396 		strlcat(features, "msr ", sizeof(features));
397 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAE)
398 		strlcat(features, "pae ", sizeof(features));
399 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCE)
400 		strlcat(features, "mce ", sizeof(features));
401 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CX8)
402 		strlcat(features, "cx8 ", sizeof(features));
403 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_APIC)
404 		strlcat(features, "apic ", sizeof(features));
405 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SEP)
406 		strlcat(features, "sep ", sizeof(features));
407 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MTRR)
408 		strlcat(features, "mtrr ", sizeof(features));
409 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PGE)
410 		strlcat(features, "pge ", sizeof(features));
411 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCA)
412 		strlcat(features, "mca ", sizeof(features));
413 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CMOV)
414 		strlcat(features, "cmov ", sizeof(features));
415 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAT)
416 		strlcat(features, "pat ", sizeof(features));
417 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE36)
418 		strlcat(features, "pse36 ", sizeof(features));
419 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSN)
420 		strlcat(features, "psn ", sizeof(features));
421 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CLFSH)
422 		strlcat(features, "clfsh ", sizeof(features));
423 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DS)
424 		strlcat(features, "ds ", sizeof(features));
425 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_ACPI)
426 		strlcat(features, "acpi ", sizeof(features));
427 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MMX)
428 		strlcat(features, "mmx ", sizeof(features));
429 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FXSR)
430 		strlcat(features, "fxsr ", sizeof(features));
431 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE)
432 		strlcat(features, "sse ", sizeof(features));
433 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE2)
434 		strlcat(features, "sse2 ", sizeof(features));
435 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SS)
436 		strlcat(features, "ss ", sizeof(features));
437 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT)
438 		strlcat(features, "htt ", sizeof(features));
439 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TM)
440 		strlcat(features, "tm ", sizeof(features));
441 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PBE)
442 		strlcat(features, "pbe ", sizeof(features));
443 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE3)
444 		strlcat(features, "sse3 ", sizeof(features));
445 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCLMULQDQ)
446 		strlcat(features, "pclmulqdq ", sizeof(features));
447 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DTES64)
448 		strlcat(features, "dtes64 ", sizeof(features));
449 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MONITOR)
450 		strlcat(features, "monitor ", sizeof(features));
451 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DSCPL)
452 		strlcat(features, "dscpl ", sizeof(features));
453 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_VMX)
454 		strlcat(features, "vmx ", sizeof(features));
455 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SMX)
456 		strlcat(features, "smx ", sizeof(features));
457 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_EST)
458 		strlcat(features, "est ", sizeof(features));
459 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TM2)
460 		strlcat(features, "tm2 ", sizeof(features));
461 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSSE3)
462 		strlcat(features, "ssse3 ", sizeof(features));
463 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CNXTID)
464 		strlcat(features, "cnxtid ", sizeof(features));
465 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_FMA)
466 		strlcat(features, "fma ", sizeof(features));
467 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CX16)
468 		strlcat(features, "cx16 ", sizeof(features));
469 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XTPR)
470 		strlcat(features, "xtpr ", sizeof(features));
471 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PDCM)
472 		strlcat(features, "pdcm ", sizeof(features));
473 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCID)
474 		strlcat(features, "pcid ", sizeof(features));
475 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DCA)
476 		strlcat(features, "dca ", sizeof(features));
477 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_1)
478 		strlcat(features, "sse4_1 ", sizeof(features));
479 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_2)
480 		strlcat(features, "sse4_2 ", sizeof(features));
481 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_X2APIC)
482 		strlcat(features, "x2apic ", sizeof(features));
483 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MOVBE)
484 		strlcat(features, "movbe ", sizeof(features));
485 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_POPCNT)
486 		strlcat(features, "popcnt ", sizeof(features));
487 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TSCDEADLINE)
488 		strlcat(features, "tscdeadline ", sizeof(features));
489 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AES)
490 		strlcat(features, "aes ", sizeof(features));
491 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XSAVE)
492 		strlcat(features, "xsave ", sizeof(features));
493 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_OSXSAVE)
494 		strlcat(features, "osxsave ", sizeof(features));
495 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AVX)
496 		strlcat(features, "avx ", sizeof(features));
497 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_F16C)
498 		strlcat(features, "f16c ", sizeof(features));
499 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_RDRND)
500 		strlcat(features, "rdrnd ", sizeof(features));
501 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR)
502 		strlcat(features, "hypervisor ", sizeof(features));
503 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_SYSCALL)
504 		strlcat(features, "syscall ", sizeof(features));
505 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_NX)
506 		strlcat(features, "nx ", sizeof(features));
507 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_MMXEXT)
508 		strlcat(features, "mmxext ", sizeof(features));
509 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_FFXSR)
510 		strlcat(features, "ffxsr ", sizeof(features));
511 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_PDPE1GB)
512 		strlcat(features, "pdpe1gb ", sizeof(features));
513 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_LONG)
514 		strlcat(features, "long ", sizeof(features));
515 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOWEXT)
516 		strlcat(features, "3dnowext ", sizeof(features));
517 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOW)
518 		strlcat(features, "3dnow ", sizeof(features));
519 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_DTS)
520 		strlcat(features, "dts ", sizeof(features));
521 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ITB)
522 		strlcat(features, "itb ", sizeof(features));
523 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ARAT)
524 		strlcat(features, "arat ", sizeof(features));
525 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PLN)
526 		strlcat(features, "pln ", sizeof(features));
527 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ECMD)
528 		strlcat(features, "ecmd ", sizeof(features));
529 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PTM)
530 		strlcat(features, "ptm ", sizeof(features));
531 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP)
532 		strlcat(features, "hwp ", sizeof(features));
533 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_NOTIFY)
534 		strlcat(features, "hwp_notify ", sizeof(features));
535 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_ACTWIN)
536 		strlcat(features, "hwp_actwin ", sizeof(features));
537 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_EPP)
538 		strlcat(features, "hwp_epp ", sizeof(features));
539 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_PLR)
540 		strlcat(features, "hwp_plr ", sizeof(features));
541 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HDC)
542 		strlcat(features, "hdc ", sizeof(features));
543 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_TBMT3)
544 		strlcat(features, "tbmt3 ", sizeof(features));
545 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_CAP)
546 		strlcat(features, "hwp_cap ", sizeof(features));
547 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_PECI)
548 		strlcat(features, "hwp_peci ", sizeof(features));
549 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_FLEX)
550 		strlcat(features, "hwp_flex ", sizeof(features));
551 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_FAST)
552 		strlcat(features, "hwp_fast ", sizeof(features));
553 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HW_FEEDBACK)
554 		strlcat(features, "hw_feedback ", sizeof(features));
555 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_HWP_IGNIDL)
556 		strlcat(features, "hwp_ignidl ", sizeof(features));
557 	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_APERFMPERF)
558 		strlcat(features, "aperfmperf ", sizeof(features));
559 	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_EPB)
560 		strlcat(features, "epb ", sizeof(features));
561 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_TSC_ADJUST)
562 		strlcat(features, "tsc_adjust ", sizeof(features));
563 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SGX)
564 		strlcat(features, "sgx ", sizeof(features));
565 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_BMI1)
566 		strlcat(features, "bmi1 ", sizeof(features));
567 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_HLE)
568 		strlcat(features, "hle ", sizeof(features));
569 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX2)
570 		strlcat(features, "avx2 ", sizeof(features));
571 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SMEP)
572 		strlcat(features, "smep ", sizeof(features));
573 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_BMI2)
574 		strlcat(features, "bmi2 ", sizeof(features));
575 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_ERMS)
576 		strlcat(features, "erms ", sizeof(features));
577 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_INVPCID)
578 		strlcat(features, "invpcid ", sizeof(features));
579 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RTM)
580 		strlcat(features, "rtm ", sizeof(features));
581 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CQM)
582 		strlcat(features, "cqm ", sizeof(features));
583 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_MPX)
584 		strlcat(features, "mpx ", sizeof(features));
585 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RDT_A)
586 		strlcat(features, "rdt_a ", sizeof(features));
587 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512F)
588 		strlcat(features, "avx512f ", sizeof(features));
589 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512DQ)
590 		strlcat(features, "avx512dq ", sizeof(features));
591 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_RDSEED)
592 		strlcat(features, "rdseed ", sizeof(features));
593 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_ADX)
594 		strlcat(features, "adx ", sizeof(features));
595 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SMAP)
596 		strlcat(features, "smap ", sizeof(features));
597 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512IFMA)
598 		strlcat(features, "avx512ifma ", sizeof(features));
599 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_PCOMMIT)
600 		strlcat(features, "pcommit ", sizeof(features));
601 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CLFLUSHOPT)
602 		strlcat(features, "cflushopt ", sizeof(features));
603 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_CLWB)
604 		strlcat(features, "clwb ", sizeof(features));
605 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_INTEL_PT)
606 		strlcat(features, "intel_pt ", sizeof(features));
607 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512PF)
608 		strlcat(features, "avx512pf ", sizeof(features));
609 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512ER)
610 		strlcat(features, "avx512er ", sizeof(features));
611 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512CD)
612 		strlcat(features, "avx512cd ", sizeof(features));
613 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_SHA_NI)
614 		strlcat(features, "sha_ni ", sizeof(features));
615 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512BW)
616 		strlcat(features, "avx512bw ", sizeof(features));
617 	if (cpu->arch.feature[FEATURE_7_EBX] & IA32_FEATURE_AVX512VI)
618 		strlcat(features, "avx512vi ", sizeof(features));
619 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512VMBI)
620 		strlcat(features, "avx512vmbi ", sizeof(features));
621 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_UMIP)
622 		strlcat(features, "umip ", sizeof(features));
623 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_PKU)
624 		strlcat(features, "pku ", sizeof(features));
625 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_OSPKE)
626 		strlcat(features, "ospke ", sizeof(features));
627 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512VMBI2)
628 		strlcat(features, "avx512vmbi2 ", sizeof(features));
629 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_GFNI)
630 		strlcat(features, "gfni ", sizeof(features));
631 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_VAES)
632 		strlcat(features, "vaes ", sizeof(features));
633 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_VPCLMULQDQ)
634 		strlcat(features, "vpclmulqdq ", sizeof(features));
635 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_VNNI)
636 		strlcat(features, "avx512vnni ", sizeof(features));
637 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_BITALG)
638 		strlcat(features, "avx512bitalg ", sizeof(features));
639 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_AVX512_VPOPCNTDQ)
640 		strlcat(features, "avx512vpopcntdq ", sizeof(features));
641 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_LA57)
642 		strlcat(features, "la57 ", sizeof(features));
643 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_RDPID)
644 		strlcat(features, "rdpid ", sizeof(features));
645 	if (cpu->arch.feature[FEATURE_7_ECX] & IA32_FEATURE_SGX_LC)
646 		strlcat(features, "sgx_lc ", sizeof(features));
647 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_HYBRID_CPU)
648 		strlcat(features, "hybrid ", sizeof(features));
649 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_IBRS)
650 		strlcat(features, "ibrs ", sizeof(features));
651 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_STIBP)
652 		strlcat(features, "stibp ", sizeof(features));
653 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_L1D_FLUSH)
654 		strlcat(features, "l1d_flush ", sizeof(features));
655 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_ARCH_CAPABILITIES)
656 		strlcat(features, "msr_arch ", sizeof(features));
657 	if (cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_SSBD)
658 		strlcat(features, "ssbd ", sizeof(features));
659 	if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_AMD_HW_PSTATE)
660 		strlcat(features, "hwpstate ", sizeof(features));
661 	if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_INVARIANT_TSC)
662 		strlcat(features, "constant_tsc ", sizeof(features));
663 	if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_CPB)
664 		strlcat(features, "cpb ", sizeof(features));
665 	if (cpu->arch.feature[FEATURE_EXT_7_EDX] & IA32_FEATURE_PROC_FEEDBACK)
666 		strlcat(features, "proc_feedback ", sizeof(features));
667 	if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVEOPT)
668 		strlcat(features, "xsaveopt ", sizeof(features));
669 	if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVEC)
670 		strlcat(features, "xsavec ", sizeof(features));
671 	if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XGETBV1)
672 		strlcat(features, "xgetbv1 ", sizeof(features));
673 	if (cpu->arch.feature[FEATURE_D_1_EAX] & IA32_FEATURE_XSAVES)
674 		strlcat(features, "xsaves ", sizeof(features));
675 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_CLZERO)
676 		strlcat(features, "clzero ", sizeof(features));
677 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_IBPB)
678 		strlcat(features, "ibpb ", sizeof(features));
679 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_AMD_SSBD)
680 		strlcat(features, "amd_ssbd ", sizeof(features));
681 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_VIRT_SSBD)
682 		strlcat(features, "virt_ssbd ", sizeof(features));
683 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_AMD_SSB_NO)
684 		strlcat(features, "amd_ssb_no ", sizeof(features));
685 	if (cpu->arch.feature[FEATURE_EXT_8_EBX] & IA32_FEATURE_CPPC)
686 		strlcat(features, "cppc ", sizeof(features));
687 	dprintf("CPU %d: features: %s\n", currentCPU, features);
688 }
689 #endif	// DUMP_FEATURE_STRING
690 
691 
692 static void
compute_cpu_hierarchy_masks(int maxLogicalID,int maxCoreID)693 compute_cpu_hierarchy_masks(int maxLogicalID, int maxCoreID)
694 {
695 	ASSERT(maxLogicalID >= maxCoreID);
696 	const int kMaxSMTID = maxLogicalID / maxCoreID;
697 
698 	sHierarchyMask[CPU_TOPOLOGY_SMT] = kMaxSMTID - 1;
699 	sHierarchyShift[CPU_TOPOLOGY_SMT] = 0;
700 
701 	sHierarchyMask[CPU_TOPOLOGY_CORE] = (maxCoreID - 1) * kMaxSMTID;
702 	sHierarchyShift[CPU_TOPOLOGY_CORE]
703 		= count_set_bits(sHierarchyMask[CPU_TOPOLOGY_SMT]);
704 
705 	const uint32 kSinglePackageMask = sHierarchyMask[CPU_TOPOLOGY_SMT]
706 		| sHierarchyMask[CPU_TOPOLOGY_CORE];
707 	sHierarchyMask[CPU_TOPOLOGY_PACKAGE] = ~kSinglePackageMask;
708 	sHierarchyShift[CPU_TOPOLOGY_PACKAGE] = count_set_bits(kSinglePackageMask);
709 }
710 
711 
712 static uint32
get_cpu_legacy_initial_apic_id(int)713 get_cpu_legacy_initial_apic_id(int /* currentCPU */)
714 {
715 	cpuid_info cpuid;
716 	get_current_cpuid(&cpuid, 1, 0);
717 	return cpuid.regs.ebx >> 24;
718 }
719 
720 
721 static inline status_t
detect_amd_cpu_topology(uint32 maxBasicLeaf,uint32 maxExtendedLeaf)722 detect_amd_cpu_topology(uint32 maxBasicLeaf, uint32 maxExtendedLeaf)
723 {
724 	sGetCPUTopologyID = get_cpu_legacy_initial_apic_id;
725 
726 	cpuid_info cpuid;
727 	get_current_cpuid(&cpuid, 1, 0);
728 	int maxLogicalID = next_power_of_2((cpuid.regs.ebx >> 16) & 0xff);
729 
730 	int maxCoreID = 1;
731 	if (maxExtendedLeaf >= 0x80000008) {
732 		get_current_cpuid(&cpuid, 0x80000008, 0);
733 		maxCoreID = (cpuid.regs.ecx >> 12) & 0xf;
734 		if (maxCoreID != 0)
735 			maxCoreID = 1 << maxCoreID;
736 		else
737 			maxCoreID = next_power_of_2((cpuid.regs.edx & 0xf) + 1);
738 	}
739 
740 	if (maxExtendedLeaf >= 0x80000001) {
741 		get_current_cpuid(&cpuid, 0x80000001, 0);
742 		if (x86_check_feature(IA32_FEATURE_AMD_EXT_CMPLEGACY,
743 				FEATURE_EXT_AMD_ECX))
744 			maxCoreID = maxLogicalID;
745 	}
746 
747 	compute_cpu_hierarchy_masks(maxLogicalID, maxCoreID);
748 
749 	return B_OK;
750 }
751 
752 
753 static void
detect_amd_cache_topology(uint32 maxExtendedLeaf)754 detect_amd_cache_topology(uint32 maxExtendedLeaf)
755 {
756 	if (!x86_check_feature(IA32_FEATURE_AMD_EXT_TOPOLOGY, FEATURE_EXT_AMD_ECX))
757 		return;
758 
759 	if (maxExtendedLeaf < 0x8000001d)
760 		return;
761 
762 	uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL];
763 	int maxCacheLevel = 0;
764 
765 	int currentLevel = 0;
766 	int cacheType;
767 	do {
768 		cpuid_info cpuid;
769 		get_current_cpuid(&cpuid, 0x8000001d, currentLevel);
770 
771 		cacheType = cpuid.regs.eax & 0x1f;
772 		if (cacheType == 0)
773 			break;
774 
775 		int cacheLevel = (cpuid.regs.eax >> 5) & 0x7;
776 		int coresCount = next_power_of_2(((cpuid.regs.eax >> 14) & 0x3f) + 1);
777 		hierarchyLevels[cacheLevel - 1]
778 			= coresCount * (sHierarchyMask[CPU_TOPOLOGY_SMT] + 1);
779 		maxCacheLevel = std::max(maxCacheLevel, cacheLevel);
780 
781 		currentLevel++;
782 	} while (true);
783 
784 	for (int i = 0; i < maxCacheLevel; i++)
785 		sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1);
786 	gCPUCacheLevelCount = maxCacheLevel;
787 }
788 
789 
790 static uint32
get_intel_cpu_initial_x2apic_id(int)791 get_intel_cpu_initial_x2apic_id(int /* currentCPU */)
792 {
793 	cpuid_info cpuid;
794 	get_current_cpuid(&cpuid, 11, 0);
795 	return cpuid.regs.edx;
796 }
797 
798 
799 static inline status_t
detect_intel_cpu_topology_x2apic(uint32 maxBasicLeaf)800 detect_intel_cpu_topology_x2apic(uint32 maxBasicLeaf)
801 {
802 
803 	uint32 leaf = 0;
804 	cpuid_info cpuid;
805 	if (maxBasicLeaf >= 0x1f) {
806 		get_current_cpuid(&cpuid, 0x1f, 0);
807 		if (cpuid.regs.ebx != 0)
808 			leaf = 0x1f;
809 	}
810 	if (maxBasicLeaf >= 0xb && leaf == 0) {
811 		get_current_cpuid(&cpuid, 0xb, 0);
812 		if (cpuid.regs.ebx != 0)
813 			leaf = 0xb;
814 	}
815 	if (leaf == 0)
816 		return B_UNSUPPORTED;
817 
818 	uint8 hierarchyLevels[CPU_TOPOLOGY_LEVELS] = { 0 };
819 
820 	int currentLevel = 0;
821 	unsigned int levelsSet = 0;
822 	do {
823 		cpuid_info cpuid;
824 		get_current_cpuid(&cpuid, leaf, currentLevel++);
825 		int levelType = (cpuid.regs.ecx >> 8) & 0xff;
826 		int levelValue = cpuid.regs.eax & 0x1f;
827 
828 		if (levelType == 0)
829 			break;
830 
831 		switch (levelType) {
832 			case 1:	// SMT
833 				hierarchyLevels[CPU_TOPOLOGY_SMT] = levelValue;
834 				levelsSet |= 1;
835 				break;
836 			case 2:	// core
837 				hierarchyLevels[CPU_TOPOLOGY_CORE] = levelValue;
838 				levelsSet |= 2;
839 				break;
840 		}
841 
842 	} while (levelsSet != 3);
843 
844 	sGetCPUTopologyID = get_intel_cpu_initial_x2apic_id;
845 
846 	for (int i = 1; i < CPU_TOPOLOGY_LEVELS; i++) {
847 		if ((levelsSet & (1u << i)) != 0)
848 			continue;
849 		hierarchyLevels[i] = hierarchyLevels[i - 1];
850 	}
851 
852 	for (int i = 0; i < CPU_TOPOLOGY_LEVELS; i++) {
853 		uint32 mask = ~uint32(0);
854 		if (i < CPU_TOPOLOGY_LEVELS - 1)
855 			mask = (1u << hierarchyLevels[i]) - 1;
856 		if (i > 0)
857 			mask &= ~sHierarchyMask[i - 1];
858 		sHierarchyMask[i] = mask;
859 		sHierarchyShift[i] = i > 0 ? hierarchyLevels[i - 1] : 0;
860 	}
861 
862 	return B_OK;
863 }
864 
865 
866 static inline status_t
detect_intel_cpu_topology_legacy(uint32 maxBasicLeaf)867 detect_intel_cpu_topology_legacy(uint32 maxBasicLeaf)
868 {
869 	sGetCPUTopologyID = get_cpu_legacy_initial_apic_id;
870 
871 	cpuid_info cpuid;
872 
873 	get_current_cpuid(&cpuid, 1, 0);
874 	int maxLogicalID = next_power_of_2((cpuid.regs.ebx >> 16) & 0xff);
875 
876 	int maxCoreID = 1;
877 	if (maxBasicLeaf >= 4) {
878 		get_current_cpuid(&cpuid, 4, 0);
879 		maxCoreID = next_power_of_2((cpuid.regs.eax >> 26) + 1);
880 	}
881 
882 	compute_cpu_hierarchy_masks(maxLogicalID, maxCoreID);
883 
884 	return B_OK;
885 }
886 
887 
888 static void
detect_intel_cache_topology(uint32 maxBasicLeaf)889 detect_intel_cache_topology(uint32 maxBasicLeaf)
890 {
891 	if (maxBasicLeaf < 4)
892 		return;
893 
894 	uint8 hierarchyLevels[CPU_MAX_CACHE_LEVEL];
895 	int maxCacheLevel = 0;
896 
897 	int currentLevel = 0;
898 	int cacheType;
899 	do {
900 		cpuid_info cpuid;
901 		get_current_cpuid(&cpuid, 4, currentLevel);
902 
903 		cacheType = cpuid.regs.eax & 0x1f;
904 		if (cacheType == 0)
905 			break;
906 
907 		int cacheLevel = (cpuid.regs.eax >> 5) & 0x7;
908 		hierarchyLevels[cacheLevel - 1]
909 			= next_power_of_2(((cpuid.regs.eax >> 14) & 0x3f) + 1);
910 		maxCacheLevel = std::max(maxCacheLevel, cacheLevel);
911 
912 		currentLevel++;
913 	} while (true);
914 
915 	for (int i = 0; i < maxCacheLevel; i++)
916 		sCacheSharingMask[i] = ~uint32(hierarchyLevels[i] - 1);
917 
918 	gCPUCacheLevelCount = maxCacheLevel;
919 }
920 
921 
922 static uint32
get_simple_cpu_topology_id(int currentCPU)923 get_simple_cpu_topology_id(int currentCPU)
924 {
925 	return currentCPU;
926 }
927 
928 
929 static inline int
get_topology_level_id(uint32 id,cpu_topology_level level)930 get_topology_level_id(uint32 id, cpu_topology_level level)
931 {
932 	ASSERT(level < CPU_TOPOLOGY_LEVELS);
933 	return (id & sHierarchyMask[level]) >> sHierarchyShift[level];
934 }
935 
936 
937 static void
detect_cpu_topology(int currentCPU,cpu_ent * cpu,uint32 maxBasicLeaf,uint32 maxExtendedLeaf)938 detect_cpu_topology(int currentCPU, cpu_ent* cpu, uint32 maxBasicLeaf,
939 	uint32 maxExtendedLeaf)
940 {
941 	if (currentCPU == 0) {
942 		memset(sCacheSharingMask, 0xff, sizeof(sCacheSharingMask));
943 
944 		status_t result = B_UNSUPPORTED;
945 		if (x86_check_feature(IA32_FEATURE_HTT, FEATURE_COMMON)) {
946 			if (cpu->arch.vendor == VENDOR_AMD
947 				|| cpu->arch.vendor == VENDOR_HYGON) {
948 				result = detect_amd_cpu_topology(maxBasicLeaf, maxExtendedLeaf);
949 
950 				if (result == B_OK)
951 					detect_amd_cache_topology(maxExtendedLeaf);
952 			}
953 
954 			if (cpu->arch.vendor == VENDOR_INTEL) {
955 				result = detect_intel_cpu_topology_x2apic(maxBasicLeaf);
956 				if (result != B_OK)
957 					result = detect_intel_cpu_topology_legacy(maxBasicLeaf);
958 
959 				if (result == B_OK)
960 					detect_intel_cache_topology(maxBasicLeaf);
961 			}
962 		}
963 
964 		if (result != B_OK) {
965 			dprintf("No CPU topology information available.\n");
966 
967 			sGetCPUTopologyID = get_simple_cpu_topology_id;
968 
969 			sHierarchyMask[CPU_TOPOLOGY_PACKAGE] = ~uint32(0);
970 		}
971 	}
972 
973 	ASSERT(sGetCPUTopologyID != NULL);
974 	int topologyID = sGetCPUTopologyID(currentCPU);
975 	cpu->topology_id[CPU_TOPOLOGY_SMT]
976 		= get_topology_level_id(topologyID, CPU_TOPOLOGY_SMT);
977 	cpu->topology_id[CPU_TOPOLOGY_CORE]
978 		= get_topology_level_id(topologyID, CPU_TOPOLOGY_CORE);
979 	cpu->topology_id[CPU_TOPOLOGY_PACKAGE]
980 		= get_topology_level_id(topologyID, CPU_TOPOLOGY_PACKAGE);
981 
982 	unsigned int i;
983 	for (i = 0; i < gCPUCacheLevelCount; i++)
984 		cpu->cache_id[i] = topologyID & sCacheSharingMask[i];
985 	for (; i < CPU_MAX_CACHE_LEVEL; i++)
986 		cpu->cache_id[i] = -1;
987 
988 #if DUMP_CPU_TOPOLOGY
989 	dprintf("CPU %d: apic id %d, package %d, core %d, smt %d\n", currentCPU,
990 		topologyID, cpu->topology_id[CPU_TOPOLOGY_PACKAGE],
991 		cpu->topology_id[CPU_TOPOLOGY_CORE],
992 		cpu->topology_id[CPU_TOPOLOGY_SMT]);
993 
994 	if (gCPUCacheLevelCount > 0) {
995 		char cacheLevels[256];
996 		unsigned int offset = 0;
997 		for (i = 0; i < gCPUCacheLevelCount; i++) {
998 			offset += snprintf(cacheLevels + offset,
999 					sizeof(cacheLevels) - offset,
1000 					" L%d id %d%s", i + 1, cpu->cache_id[i],
1001 					i < gCPUCacheLevelCount - 1 ? "," : "");
1002 
1003 			if (offset >= sizeof(cacheLevels))
1004 				break;
1005 		}
1006 
1007 		dprintf("CPU %d: cache sharing:%s\n", currentCPU, cacheLevels);
1008 	}
1009 #endif
1010 }
1011 
1012 
1013 static void
detect_intel_patch_level(cpu_ent * cpu)1014 detect_intel_patch_level(cpu_ent* cpu)
1015 {
1016 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) {
1017 		cpu->arch.patch_level = 0;
1018 		return;
1019 	}
1020 
1021 	x86_write_msr(IA32_MSR_UCODE_REV, 0);
1022 	cpuid_info cpuid;
1023 	get_current_cpuid(&cpuid, 1, 0);
1024 
1025 	uint64 value = x86_read_msr(IA32_MSR_UCODE_REV);
1026 	cpu->arch.patch_level = value >> 32;
1027 }
1028 
1029 
1030 static void
detect_amd_patch_level(cpu_ent * cpu)1031 detect_amd_patch_level(cpu_ent* cpu)
1032 {
1033 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) {
1034 		cpu->arch.patch_level = 0;
1035 		return;
1036 	}
1037 
1038 	uint64 value = x86_read_msr(IA32_MSR_UCODE_REV);
1039 	cpu->arch.patch_level = (uint32)value;
1040 }
1041 
1042 
1043 static struct intel_microcode_header*
find_microcode_intel(addr_t data,size_t size,uint32 patchLevel)1044 find_microcode_intel(addr_t data, size_t size, uint32 patchLevel)
1045 {
1046 	// 9.11.3 Processor Identification
1047 	cpuid_info cpuid;
1048 	get_current_cpuid(&cpuid, 1, 0);
1049 	uint32 signature = cpuid.regs.eax;
1050 	// 9.11.4 Platform Identification
1051 	uint64 platformBits = (x86_read_msr(IA32_MSR_PLATFORM_ID) >> 50) & 0x7;
1052 	uint64 mask = 1 << platformBits;
1053 
1054 	while (size > 0) {
1055 		if (size < sizeof(struct intel_microcode_header)) {
1056 			dprintf("find_microcode_intel update is too small for header\n");
1057 			break;
1058 		}
1059 		struct intel_microcode_header* header =
1060 			(struct intel_microcode_header*)data;
1061 
1062 		uint32 totalSize = header->total_size;
1063 		uint32 dataSize = header->data_size;
1064 		if (dataSize == 0) {
1065 			dataSize = 2000;
1066 			totalSize = sizeof(struct intel_microcode_header)
1067 				+ dataSize;
1068 		}
1069 		if (totalSize > size) {
1070 			dprintf("find_microcode_intel update is too small for data\n");
1071 			break;
1072 		}
1073 
1074 		uint32* dwords = (uint32*)data;
1075 		// prepare the next update
1076 		size -= totalSize;
1077 		data += totalSize;
1078 
1079 		if (header->loader_revision != 1) {
1080 			dprintf("find_microcode_intel incorrect loader version\n");
1081 			continue;
1082 		}
1083 		// 9.11.6 The microcode update data requires a 16-byte boundary
1084 		// alignment.
1085 		if (((addr_t)header % 16) != 0) {
1086 			dprintf("find_microcode_intel incorrect alignment\n");
1087 			continue;
1088 		}
1089 		uint32 sum = 0;
1090 		for (uint32 i = 0; i < totalSize / 4; i++) {
1091 			sum += dwords[i];
1092 		}
1093 		if (sum != 0) {
1094 			dprintf("find_microcode_intel incorrect checksum\n");
1095 			continue;
1096 		}
1097 		if (patchLevel > header->update_revision) {
1098 			dprintf("find_microcode_intel update_revision is lower\n");
1099 			continue;
1100 		}
1101 		if (signature == header->processor_signature
1102 			&& (mask & header->processor_flags) != 0) {
1103 			return header;
1104 		}
1105 		if (totalSize <= (sizeof(struct intel_microcode_header) + dataSize
1106 			+ sizeof(struct intel_microcode_extended_signature_header))) {
1107 			continue;
1108 		}
1109 		struct intel_microcode_extended_signature_header* extSigHeader =
1110 			(struct intel_microcode_extended_signature_header*)((addr_t)header
1111 				+ sizeof(struct intel_microcode_header) + dataSize);
1112 		struct intel_microcode_extended_signature* extended_signature =
1113 			(struct intel_microcode_extended_signature*)((addr_t)extSigHeader
1114 				+ sizeof(struct intel_microcode_extended_signature_header));
1115 		for (uint32 i = 0; i < extSigHeader->extended_signature_count; i++) {
1116 			if (signature == extended_signature[i].processor_signature
1117 				&& (mask & extended_signature[i].processor_flags) != 0)
1118 				return header;
1119 		}
1120 	}
1121 	return NULL;
1122 }
1123 
1124 
1125 static void
load_microcode_intel(int currentCPU,cpu_ent * cpu)1126 load_microcode_intel(int currentCPU, cpu_ent* cpu)
1127 {
1128 	// serialize for HT cores
1129 	if (currentCPU != 0)
1130 		acquire_spinlock(&sUcodeUpdateLock);
1131 	detect_intel_patch_level(cpu);
1132 	uint32 revision = cpu->arch.patch_level;
1133 	struct intel_microcode_header* update = (struct intel_microcode_header*)sLoadedUcodeUpdate;
1134 	if (update == NULL) {
1135 		update = find_microcode_intel((addr_t)sUcodeData, sUcodeDataSize,
1136 			revision);
1137 	}
1138 	if (update == NULL) {
1139 		dprintf("CPU %d: no update found\n", currentCPU);
1140 	} else if (update->update_revision != revision) {
1141 		addr_t data = (addr_t)update + sizeof(struct intel_microcode_header);
1142 		wbinvd();
1143 		x86_write_msr(IA32_MSR_UCODE_WRITE, data);
1144 		detect_intel_patch_level(cpu);
1145 		if (revision == cpu->arch.patch_level) {
1146 			dprintf("CPU %d: update failed\n", currentCPU);
1147 		} else {
1148 			if (sLoadedUcodeUpdate == NULL)
1149 				sLoadedUcodeUpdate = update;
1150 			dprintf("CPU %d: updated from revision 0x%" B_PRIx32 " to 0x%" B_PRIx32
1151 				"\n", currentCPU, revision, cpu->arch.patch_level);
1152 		}
1153 	}
1154 	if (currentCPU != 0)
1155 		release_spinlock(&sUcodeUpdateLock);
1156 }
1157 
1158 
1159 static struct amd_microcode_header*
find_microcode_amd(addr_t data,size_t size,uint32 patchLevel)1160 find_microcode_amd(addr_t data, size_t size, uint32 patchLevel)
1161 {
1162 	// 9.11.3 Processor Identification
1163 	cpuid_info cpuid;
1164 	get_current_cpuid(&cpuid, 1, 0);
1165 	uint32 signature = cpuid.regs.eax;
1166 
1167 	if (size < sizeof(struct amd_container_header)) {
1168 		dprintf("find_microcode_amd update is too small for header\n");
1169 		return NULL;
1170 	}
1171 	struct amd_container_header* container = (struct amd_container_header*)data;
1172 	if (container->magic != 0x414d44) {
1173 		dprintf("find_microcode_amd update invalid magic\n");
1174 		return NULL;
1175 	}
1176 
1177 	size -= sizeof(*container);
1178 	data += sizeof(*container);
1179 
1180 	struct amd_section_header* section =
1181 		(struct amd_section_header*)data;
1182 	if (section->type != 0 || section->size == 0) {
1183 		dprintf("find_microcode_amd update first section invalid\n");
1184 		return NULL;
1185 	}
1186 
1187 	size -= sizeof(*section);
1188 	data += sizeof(*section);
1189 
1190 	amd_equiv_cpu_entry* table = (amd_equiv_cpu_entry*)data;
1191 	size -= section->size;
1192 	data += section->size;
1193 
1194 	uint16 equiv_id = 0;
1195 	for (uint32 i = 0; table[i].installed_cpu != 0; i++) {
1196 		if (signature == table[i].equiv_cpu) {
1197 			equiv_id = table[i].equiv_cpu;
1198 			dprintf("find_microcode_amd found equiv cpu: %x\n", equiv_id);
1199 			break;
1200 		}
1201 	}
1202 	if (equiv_id == 0) {
1203 		dprintf("find_microcode_amd update cpu not found in equiv table\n");
1204 		return NULL;
1205 	}
1206 
1207 	while (size > sizeof(amd_section_header)) {
1208 		struct amd_section_header* section = (struct amd_section_header*)data;
1209 		size -= sizeof(*section);
1210 		data += sizeof(*section);
1211 
1212 		if (section->type != 1 || section->size > size
1213 			|| section->size < sizeof(amd_microcode_header)) {
1214 			dprintf("find_microcode_amd update firmware section invalid\n");
1215 			return NULL;
1216 		}
1217 		struct amd_microcode_header* header = (struct amd_microcode_header*)data;
1218 		size -= section->size;
1219 		data += section->size;
1220 
1221 		if (header->processor_rev_id != equiv_id) {
1222 			dprintf("find_microcode_amd update found rev_id %x\n", header->processor_rev_id);
1223 			continue;
1224 		}
1225 		if (patchLevel >= header->patch_id) {
1226 			dprintf("find_microcode_intel update_revision is lower\n");
1227 			continue;
1228 		}
1229 		if (header->nb_dev_id != 0 || header->sb_dev_id != 0) {
1230 			dprintf("find_microcode_amd update chipset specific firmware\n");
1231 			continue;
1232 		}
1233 		if (((addr_t)header % 16) != 0) {
1234 			dprintf("find_microcode_amd incorrect alignment\n");
1235 			continue;
1236 		}
1237 
1238 		return header;
1239 	}
1240 	dprintf("find_microcode_amd no fw update found for this cpu\n");
1241 	return NULL;
1242 }
1243 
1244 
1245 static void
load_microcode_amd(int currentCPU,cpu_ent * cpu)1246 load_microcode_amd(int currentCPU, cpu_ent* cpu)
1247 {
1248 	// serialize for HT cores
1249 	if (currentCPU != 0)
1250 		acquire_spinlock(&sUcodeUpdateLock);
1251 	detect_amd_patch_level(cpu);
1252 	uint32 revision = cpu->arch.patch_level;
1253 	struct amd_microcode_header* update = (struct amd_microcode_header*)sLoadedUcodeUpdate;
1254 	if (update == NULL) {
1255 		update = find_microcode_amd((addr_t)sUcodeData, sUcodeDataSize,
1256 			revision);
1257 	}
1258 	if (update != NULL) {
1259 		addr_t data = (addr_t)update;
1260 		wbinvd();
1261 
1262 		x86_write_msr(MSR_K8_UCODE_UPDATE, data);
1263 
1264 		detect_amd_patch_level(cpu);
1265 		if (revision == cpu->arch.patch_level) {
1266 			dprintf("CPU %d: update failed\n", currentCPU);
1267 		} else {
1268 			if (sLoadedUcodeUpdate == NULL)
1269 				sLoadedUcodeUpdate = update;
1270 			dprintf("CPU %d: updated from revision 0x%" B_PRIx32 " to 0x%" B_PRIx32
1271 				"\n", currentCPU, revision, cpu->arch.patch_level);
1272 		}
1273 
1274 	} else {
1275 		dprintf("CPU %d: no update found\n", currentCPU);
1276 	}
1277 
1278 	if (currentCPU != 0)
1279 		release_spinlock(&sUcodeUpdateLock);
1280 }
1281 
1282 
1283 static void
load_microcode(int currentCPU)1284 load_microcode(int currentCPU)
1285 {
1286 	if (sUcodeData == NULL)
1287 		return;
1288 	cpu_ent* cpu = get_cpu_struct();
1289 	if ((cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR) != 0)
1290 		return;
1291 	if (cpu->arch.vendor == VENDOR_INTEL)
1292 		load_microcode_intel(currentCPU, cpu);
1293 	else if (cpu->arch.vendor == VENDOR_AMD)
1294 		load_microcode_amd(currentCPU, cpu);
1295 }
1296 
1297 
1298 static uint8
get_hybrid_cpu_type()1299 get_hybrid_cpu_type()
1300 {
1301 	cpu_ent* cpu = get_cpu_struct();
1302 	if ((cpu->arch.feature[FEATURE_7_EDX] & IA32_FEATURE_HYBRID_CPU) == 0)
1303 		return 0;
1304 
1305 #define X86_HYBRID_CPU_TYPE_ID_SHIFT       24
1306 	cpuid_info cpuid;
1307 	get_current_cpuid(&cpuid, 0x1a, 0);
1308 	return cpuid.regs.eax >> X86_HYBRID_CPU_TYPE_ID_SHIFT;
1309 }
1310 
1311 
1312 static const char*
get_hybrid_cpu_type_string(uint8 type)1313 get_hybrid_cpu_type_string(uint8 type)
1314 {
1315 	switch (type) {
1316 		case 0x20:
1317 			return "Atom";
1318 		case 0x40:
1319 			return "Core";
1320 		default:
1321 			return "";
1322 	}
1323 }
1324 
1325 
1326 static void
detect_cpu(int currentCPU,bool full=true)1327 detect_cpu(int currentCPU, bool full = true)
1328 {
1329 	cpu_ent* cpu = get_cpu_struct();
1330 	char vendorString[17];
1331 	cpuid_info cpuid;
1332 
1333 	// clear out the cpu info data
1334 	cpu->arch.vendor = VENDOR_UNKNOWN;
1335 	cpu->arch.vendor_name = "UNKNOWN VENDOR";
1336 	cpu->arch.feature[FEATURE_COMMON] = 0;
1337 	cpu->arch.feature[FEATURE_EXT] = 0;
1338 	cpu->arch.feature[FEATURE_EXT_AMD] = 0;
1339 	cpu->arch.feature[FEATURE_7_EBX] = 0;
1340 	cpu->arch.feature[FEATURE_7_ECX] = 0;
1341 	cpu->arch.feature[FEATURE_7_EDX] = 0;
1342 	cpu->arch.feature[FEATURE_D_1_EAX] = 0;
1343 	cpu->arch.model_name[0] = 0;
1344 
1345 	// print some fun data
1346 	get_current_cpuid(&cpuid, 0, 0);
1347 	uint32 maxBasicLeaf = cpuid.eax_0.max_eax;
1348 
1349 	// build the vendor string
1350 	memset(vendorString, 0, sizeof(vendorString));
1351 	memcpy(vendorString, cpuid.eax_0.vendor_id, sizeof(cpuid.eax_0.vendor_id));
1352 
1353 	// get the family, model, stepping
1354 	get_current_cpuid(&cpuid, 1, 0);
1355 	cpu->arch.type = cpuid.eax_1.type;
1356 	cpu->arch.family = cpuid.eax_1.family;
1357 	cpu->arch.extended_family = cpuid.eax_1.extended_family;
1358 	cpu->arch.model = cpuid.eax_1.model;
1359 	cpu->arch.extended_model = cpuid.eax_1.extended_model;
1360 	cpu->arch.stepping = cpuid.eax_1.stepping;
1361 	if (full) {
1362 		dprintf("CPU %d: type %d family %d extended_family %d model %d "
1363 			"extended_model %d stepping %d, string '%s'\n",
1364 			currentCPU, cpu->arch.type, cpu->arch.family,
1365 			cpu->arch.extended_family, cpu->arch.model,
1366 			cpu->arch.extended_model, cpu->arch.stepping, vendorString);
1367 	}
1368 
1369 	// figure out what vendor we have here
1370 
1371 	for (int32 i = 0; i < VENDOR_NUM; i++) {
1372 		if (vendor_info[i].ident_string[0]
1373 			&& !strcmp(vendorString, vendor_info[i].ident_string[0])) {
1374 			cpu->arch.vendor = (x86_vendors)i;
1375 			cpu->arch.vendor_name = vendor_info[i].vendor;
1376 			break;
1377 		}
1378 		if (vendor_info[i].ident_string[1]
1379 			&& !strcmp(vendorString, vendor_info[i].ident_string[1])) {
1380 			cpu->arch.vendor = (x86_vendors)i;
1381 			cpu->arch.vendor_name = vendor_info[i].vendor;
1382 			break;
1383 		}
1384 	}
1385 
1386 	// see if we can get the model name
1387 	get_current_cpuid(&cpuid, 0x80000000, 0);
1388 	uint32 maxExtendedLeaf = cpuid.eax_0.max_eax;
1389 	if (maxExtendedLeaf >= 0x80000004) {
1390 		// build the model string (need to swap ecx/edx data before copying)
1391 		unsigned int temp;
1392 		memset(cpu->arch.model_name, 0, sizeof(cpu->arch.model_name));
1393 
1394 		get_current_cpuid(&cpuid, 0x80000002, 0);
1395 		temp = cpuid.regs.edx;
1396 		cpuid.regs.edx = cpuid.regs.ecx;
1397 		cpuid.regs.ecx = temp;
1398 		memcpy(cpu->arch.model_name, cpuid.as_chars, sizeof(cpuid.as_chars));
1399 
1400 		get_current_cpuid(&cpuid, 0x80000003, 0);
1401 		temp = cpuid.regs.edx;
1402 		cpuid.regs.edx = cpuid.regs.ecx;
1403 		cpuid.regs.ecx = temp;
1404 		memcpy(cpu->arch.model_name + 16, cpuid.as_chars,
1405 			sizeof(cpuid.as_chars));
1406 
1407 		get_current_cpuid(&cpuid, 0x80000004, 0);
1408 		temp = cpuid.regs.edx;
1409 		cpuid.regs.edx = cpuid.regs.ecx;
1410 		cpuid.regs.ecx = temp;
1411 		memcpy(cpu->arch.model_name + 32, cpuid.as_chars,
1412 			sizeof(cpuid.as_chars));
1413 
1414 		// some cpus return a right-justified string
1415 		int32 i = 0;
1416 		while (cpu->arch.model_name[i] == ' ')
1417 			i++;
1418 		if (i > 0) {
1419 			memmove(cpu->arch.model_name, &cpu->arch.model_name[i],
1420 				strlen(&cpu->arch.model_name[i]) + 1);
1421 		}
1422 
1423 		if (full) {
1424 			dprintf("CPU %d: vendor '%s' model name '%s'\n",
1425 				currentCPU, cpu->arch.vendor_name, cpu->arch.model_name);
1426 		}
1427 	} else {
1428 		strlcpy(cpu->arch.model_name, "unknown", sizeof(cpu->arch.model_name));
1429 	}
1430 
1431 	// load feature bits
1432 	get_current_cpuid(&cpuid, 1, 0);
1433 	cpu->arch.feature[FEATURE_COMMON] = cpuid.eax_1.features; // edx
1434 	cpu->arch.feature[FEATURE_EXT] = cpuid.eax_1.extended_features; // ecx
1435 
1436 	if (!full)
1437 		return;
1438 
1439 	if (maxExtendedLeaf >= 0x80000001) {
1440 		get_current_cpuid(&cpuid, 0x80000001, 0);
1441 		if (cpu->arch.vendor == VENDOR_AMD)
1442 			cpu->arch.feature[FEATURE_EXT_AMD_ECX] = cpuid.regs.ecx; // ecx
1443 		cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx
1444 		if (cpu->arch.vendor != VENDOR_AMD)
1445 			cpu->arch.feature[FEATURE_EXT_AMD] &= IA32_FEATURES_INTEL_EXT;
1446 	}
1447 
1448 	if (maxBasicLeaf >= 5) {
1449 		get_current_cpuid(&cpuid, 5, 0);
1450 		cpu->arch.feature[FEATURE_5_ECX] = cpuid.regs.ecx;
1451 	}
1452 
1453 	if (maxBasicLeaf >= 6) {
1454 		get_current_cpuid(&cpuid, 6, 0);
1455 		cpu->arch.feature[FEATURE_6_EAX] = cpuid.regs.eax;
1456 		cpu->arch.feature[FEATURE_6_ECX] = cpuid.regs.ecx;
1457 	}
1458 
1459 	if (maxBasicLeaf >= 7) {
1460 		get_current_cpuid(&cpuid, 7, 0);
1461 		cpu->arch.feature[FEATURE_7_EBX] = cpuid.regs.ebx;
1462 		cpu->arch.feature[FEATURE_7_ECX] = cpuid.regs.ecx;
1463 		cpu->arch.feature[FEATURE_7_EDX] = cpuid.regs.edx;
1464 	}
1465 
1466 	if (maxBasicLeaf >= 0xd) {
1467 		get_current_cpuid(&cpuid, 0xd, 1);
1468 		cpu->arch.feature[FEATURE_D_1_EAX] = cpuid.regs.eax;
1469 	}
1470 
1471 	if (maxExtendedLeaf >= 0x80000007) {
1472 		get_current_cpuid(&cpuid, 0x80000007, 0);
1473 		cpu->arch.feature[FEATURE_EXT_7_EDX] = cpuid.regs.edx;
1474 	}
1475 
1476 	if (maxExtendedLeaf >= 0x80000008) {
1477 		get_current_cpuid(&cpuid, 0x80000008, 0);
1478 			cpu->arch.feature[FEATURE_EXT_8_EBX] = cpuid.regs.ebx;
1479 	}
1480 
1481 	detect_cpu_topology(currentCPU, cpu, maxBasicLeaf, maxExtendedLeaf);
1482 
1483 	if (cpu->arch.vendor == VENDOR_INTEL)
1484 		detect_intel_patch_level(cpu);
1485 	else if (cpu->arch.vendor == VENDOR_AMD)
1486 		detect_amd_patch_level(cpu);
1487 
1488 	cpu->arch.hybrid_type = get_hybrid_cpu_type();
1489 
1490 #if DUMP_FEATURE_STRING
1491 	dump_feature_string(currentCPU, cpu);
1492 #endif
1493 #if DUMP_CPU_PATCHLEVEL_TYPE
1494 	dprintf("CPU %d: patch_level 0x%" B_PRIx32 "%s%s\n", currentCPU,
1495 		cpu->arch.patch_level,
1496 		cpu->arch.hybrid_type != 0 ? ", hybrid type ": "",
1497 		get_hybrid_cpu_type_string(cpu->arch.hybrid_type));
1498 #endif
1499 }
1500 
1501 
1502 bool
x86_check_feature(uint32 feature,enum x86_feature_type type)1503 x86_check_feature(uint32 feature, enum x86_feature_type type)
1504 {
1505 	cpu_ent* cpu = get_cpu_struct();
1506 
1507 #if 0
1508 	int i;
1509 	dprintf("x86_check_feature: feature 0x%x, type %d\n", feature, type);
1510 	for (i = 0; i < FEATURE_NUM; i++) {
1511 		dprintf("features %d: 0x%x\n", i, cpu->arch.feature[i]);
1512 	}
1513 #endif
1514 
1515 	return (cpu->arch.feature[type] & feature) != 0;
1516 }
1517 
1518 
1519 bool
x86_use_pat()1520 x86_use_pat()
1521 {
1522 	return sUsePAT;
1523 }
1524 
1525 
1526 void*
x86_get_double_fault_stack(int32 cpu,size_t * _size)1527 x86_get_double_fault_stack(int32 cpu, size_t* _size)
1528 {
1529 	*_size = kDoubleFaultStackSize;
1530 	return (void*)(sDoubleFaultStacks + kDoubleFaultStackSize * cpu);
1531 }
1532 
1533 
1534 /*!	Returns the index of the current CPU. Can only be called from the double
1535 	fault handler.
1536 */
1537 int32
x86_double_fault_get_cpu(void)1538 x86_double_fault_get_cpu(void)
1539 {
1540 	addr_t stack = x86_get_stack_frame();
1541 	return (stack - sDoubleFaultStacks) / kDoubleFaultStackSize;
1542 }
1543 
1544 
1545 //	#pragma mark -
1546 
1547 
1548 status_t
arch_cpu_preboot_init_percpu(kernel_args * args,int cpu)1549 arch_cpu_preboot_init_percpu(kernel_args* args, int cpu)
1550 {
1551 	if (cpu == 0) {
1552 		// We can't allocate pages at this stage in the boot process, only virtual addresses.
1553 		sDoubleFaultStacks = vm_allocate_early(args,
1554 			kDoubleFaultStackSize * smp_get_num_cpus(), 0, 0, 0);
1555 	}
1556 
1557 	// On SMP system we want to synchronize the CPUs' TSCs, so system_time()
1558 	// will return consistent values.
1559 	if (smp_get_num_cpus() > 1) {
1560 		// let the first CPU prepare the rendezvous point
1561 		if (cpu == 0)
1562 			sTSCSyncRendezvous = smp_get_num_cpus() - 1;
1563 
1564 		// One CPU after the other will drop out of this loop and be caught by
1565 		// the loop below, until the last CPU (0) gets there. Save for +/- a few
1566 		// cycles the CPUs should pass the second loop at the same time.
1567 		while (sTSCSyncRendezvous != cpu) {
1568 		}
1569 
1570 		sTSCSyncRendezvous = cpu - 1;
1571 
1572 		while (sTSCSyncRendezvous != -1) {
1573 		}
1574 
1575 		// reset TSC to 0
1576 		x86_write_msr(IA32_MSR_TSC, 0);
1577 	}
1578 
1579 	x86_descriptors_preboot_init_percpu(args, cpu);
1580 
1581 	return B_OK;
1582 }
1583 
1584 
1585 static void
halt_idle(void)1586 halt_idle(void)
1587 {
1588 	asm("hlt");
1589 }
1590 
1591 
1592 static void
amdc1e_noarat_idle(void)1593 amdc1e_noarat_idle(void)
1594 {
1595 	uint64 msr = x86_read_msr(K8_MSR_IPM);
1596 	if (msr & K8_CMPHALT)
1597 		x86_write_msr(K8_MSR_IPM, msr & ~K8_CMPHALT);
1598 	halt_idle();
1599 }
1600 
1601 
1602 static bool
detect_amdc1e_noarat()1603 detect_amdc1e_noarat()
1604 {
1605 	cpu_ent* cpu = get_cpu_struct();
1606 
1607 	if (cpu->arch.vendor != VENDOR_AMD)
1608 		return false;
1609 
1610 	// Family 0x12 and higher processors support ARAT
1611 	// Family lower than 0xf processors doesn't support C1E
1612 	// Family 0xf with model <= 0x40 procssors doesn't support C1E
1613 	uint32 family = cpu->arch.family + cpu->arch.extended_family;
1614 	uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model;
1615 	return (family < 0x12 && family > 0xf) || (family == 0xf && model > 0x40);
1616 }
1617 
1618 
1619 static void
init_tsc_with_cpuid(kernel_args * args,uint32 * conversionFactor)1620 init_tsc_with_cpuid(kernel_args* args, uint32* conversionFactor)
1621 {
1622 	cpu_ent* cpu = get_cpu_struct();
1623 	if (cpu->arch.vendor != VENDOR_INTEL)
1624 		return;
1625 
1626 	uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model;
1627 	cpuid_info cpuid;
1628 	get_current_cpuid(&cpuid, 0, 0);
1629 	uint32 maxBasicLeaf = cpuid.eax_0.max_eax;
1630 	if (maxBasicLeaf < 0x15)
1631 		return;
1632 
1633 	get_current_cpuid(&cpuid, 0x15, 0);
1634 	if (cpuid.regs.eax == 0 || cpuid.regs.ebx == 0)
1635 		return;
1636 	uint32 khz = cpuid.regs.ecx / 1000;
1637 	uint32 denominator = cpuid.regs.eax;
1638 	uint32 numerator = cpuid.regs.ebx;
1639 	if (khz == 0 && model == 0x5f) {
1640 		// CPUID 0x16 isn't supported, hardcoding
1641 		khz = 25000;
1642 	}
1643 
1644 	if (khz == 0 && maxBasicLeaf >= 0x16) {
1645 		// for these CPUs the base frequency is also the tsc frequency
1646 		get_current_cpuid(&cpuid, 0x16, 0);
1647 		khz = cpuid.regs.eax * 1000 * denominator / numerator;
1648 	}
1649 	if (khz == 0)
1650 		return;
1651 
1652 	dprintf("CPU: using TSC frequency from CPUID\n");
1653 	// compute for microseconds as follows (1000000 << 32) / (tsc freq in Hz),
1654 	// or (1000 << 32) / (tsc freq in kHz)
1655 	*conversionFactor = (1000ULL << 32) / (khz * numerator / denominator);
1656 	// overwrite the bootloader value
1657 	args->arch_args.system_time_cv_factor = *conversionFactor;
1658 }
1659 
1660 
1661 static void
init_tsc_with_msr(kernel_args * args,uint32 * conversionFactor)1662 init_tsc_with_msr(kernel_args* args, uint32* conversionFactor)
1663 {
1664 	cpu_ent* cpuEnt = get_cpu_struct();
1665 	if (cpuEnt->arch.vendor != VENDOR_AMD)
1666 		return;
1667 
1668 	uint32 family = cpuEnt->arch.family + cpuEnt->arch.extended_family;
1669 	if (family < 0x10)
1670 		return;
1671 	uint64 value = x86_read_msr(MSR_F10H_HWCR);
1672 	if ((value & HWCR_TSCFREQSEL) == 0)
1673 		return;
1674 
1675 	value = x86_read_msr(MSR_F10H_PSTATEDEF(0));
1676 	if ((value & PSTATEDEF_EN) == 0)
1677 		return;
1678 	if (family != 0x17 && family != 0x19)
1679 		return;
1680 
1681 	uint64 khz = 200 * 1000;
1682 	uint32 denominator = (value >> 8) & 0x3f;
1683 	if (denominator < 0x8 || denominator > 0x2c)
1684 		return;
1685 	if (denominator > 0x1a && (denominator % 2) == 1)
1686 		return;
1687 	uint32 numerator = value & 0xff;
1688 	if (numerator < 0x10)
1689 		return;
1690 
1691 	dprintf("CPU: using TSC frequency from MSR %" B_PRIu64 "\n", khz * numerator / denominator);
1692 	// compute for microseconds as follows (1000000 << 32) / (tsc freq in Hz),
1693 	// or (1000 << 32) / (tsc freq in kHz)
1694 	*conversionFactor = (1000ULL << 32) / (khz * numerator / denominator);
1695 	// overwrite the bootloader value
1696 	args->arch_args.system_time_cv_factor = *conversionFactor;
1697 }
1698 
1699 
1700 static void
init_tsc(kernel_args * args)1701 init_tsc(kernel_args* args)
1702 {
1703 	// init the TSC -> system_time() conversion factors
1704 
1705 	// try to find the TSC frequency with CPUID
1706 	uint32 conversionFactor = args->arch_args.system_time_cv_factor;
1707 	init_tsc_with_cpuid(args, &conversionFactor);
1708 	init_tsc_with_msr(args, &conversionFactor);
1709 	uint64 conversionFactorNsecs = (uint64)conversionFactor * 1000;
1710 
1711 #ifdef __x86_64__
1712 	// The x86_64 system_time() implementation uses 64-bit multiplication and
1713 	// therefore shifting is not necessary for low frequencies (it's also not
1714 	// too likely that there'll be any x86_64 CPUs clocked under 1GHz).
1715 	__x86_setup_system_time((uint64)conversionFactor << 32,
1716 		conversionFactorNsecs);
1717 #else
1718 	if (conversionFactorNsecs >> 32 != 0) {
1719 		// the TSC frequency is < 1 GHz, which forces us to shift the factor
1720 		__x86_setup_system_time(conversionFactor, conversionFactorNsecs >> 16,
1721 			true);
1722 	} else {
1723 		// the TSC frequency is >= 1 GHz
1724 		__x86_setup_system_time(conversionFactor, conversionFactorNsecs, false);
1725 	}
1726 #endif
1727 }
1728 
1729 
1730 status_t
arch_cpu_init_percpu(kernel_args * args,int cpu)1731 arch_cpu_init_percpu(kernel_args* args, int cpu)
1732 {
1733 	detect_cpu(cpu, false);
1734 	load_microcode(cpu);
1735 	detect_cpu(cpu);
1736 
1737 	if (cpu == 0)
1738 		init_tsc(args);
1739 
1740 	if (!gCpuIdleFunc) {
1741 		if (detect_amdc1e_noarat())
1742 			gCpuIdleFunc = amdc1e_noarat_idle;
1743 		else
1744 			gCpuIdleFunc = halt_idle;
1745 	}
1746 
1747 	if (x86_check_feature(IA32_FEATURE_MCE, FEATURE_COMMON))
1748 		x86_write_cr4(x86_read_cr4() | IA32_CR4_MCE);
1749 
1750 	cpu_ent* cpuEnt = get_cpu_struct();
1751 	if (cpu == 0) {
1752 		bool supportsPAT = x86_check_feature(IA32_FEATURE_PAT, FEATURE_COMMON);
1753 
1754 		// Pentium II Errata A52 and Pentium III Errata E27 say the upper four
1755 		// entries of the PAT are not useable as the PAT bit is ignored for 4K
1756 		// PTEs. Pentium 4 Errata N46 says the PAT bit can be assumed 0 in some
1757 		// specific cases. To avoid issues, disable PAT on such CPUs.
1758 		bool brokenPAT = cpuEnt->arch.vendor == VENDOR_INTEL
1759 			&& cpuEnt->arch.extended_family == 0
1760 			&& cpuEnt->arch.extended_model == 0
1761 			&& ((cpuEnt->arch.family == 6 && cpuEnt->arch.model <= 13)
1762 				|| (cpuEnt->arch.family == 15 && cpuEnt->arch.model <= 6));
1763 
1764 		sUsePAT = supportsPAT && !brokenPAT
1765 			&& !get_safemode_boolean_early(args, B_SAFEMODE_DISABLE_PAT, false);
1766 
1767 		if (sUsePAT) {
1768 			dprintf("using PAT for memory type configuration\n");
1769 		} else {
1770 			dprintf("not using PAT for memory type configuration (%s)\n",
1771 				supportsPAT ? (brokenPAT ? "broken" : "disabled")
1772 					: "unsupported");
1773 		}
1774 	}
1775 
1776 	if (sUsePAT)
1777 		init_pat(cpu);
1778 
1779 #ifdef __x86_64__
1780 	// if RDTSCP or RDPID are available write cpu number in TSC_AUX
1781 	if (x86_check_feature(IA32_FEATURE_AMD_EXT_RDTSCP, FEATURE_EXT_AMD)
1782 		|| x86_check_feature(IA32_FEATURE_RDPID, FEATURE_7_ECX)) {
1783 		x86_write_msr(IA32_MSR_TSC_AUX, cpu);
1784 	}
1785 
1786 	// make LFENCE a dispatch serializing instruction on AMD 64bit
1787 	if (cpuEnt->arch.vendor == VENDOR_AMD) {
1788 		uint32 family = cpuEnt->arch.family + cpuEnt->arch.extended_family;
1789 		if (family >= 0x10 && family != 0x11) {
1790 			uint64 value = x86_read_msr(MSR_F10H_DE_CFG);
1791 			if ((value & DE_CFG_SERIALIZE_LFENCE) == 0)
1792 				x86_write_msr(MSR_F10H_DE_CFG, value | DE_CFG_SERIALIZE_LFENCE);
1793 		}
1794 	}
1795 #endif
1796 
1797 	if (x86_check_feature(IA32_FEATURE_APERFMPERF, FEATURE_6_ECX)) {
1798 		gCPU[cpu].arch.mperf_prev = x86_read_msr(IA32_MSR_MPERF);
1799 		gCPU[cpu].arch.aperf_prev = x86_read_msr(IA32_MSR_APERF);
1800 		gCPU[cpu].arch.frequency = 0;
1801 		gCPU[cpu].arch.perf_timestamp = 0;
1802 	}
1803 	return __x86_patch_errata_percpu(cpu);
1804 }
1805 
1806 
1807 status_t
arch_cpu_init(kernel_args * args)1808 arch_cpu_init(kernel_args* args)
1809 {
1810 	if (args->ucode_data != NULL
1811 		&& args->ucode_data_size > 0) {
1812 		sUcodeData = args->ucode_data;
1813 		sUcodeDataSize = args->ucode_data_size;
1814 	} else {
1815 		dprintf("CPU: no microcode provided\n");
1816 	}
1817 
1818 	// Initialize descriptor tables.
1819 	x86_descriptors_init(args);
1820 
1821 	return B_OK;
1822 }
1823 
1824 
1825 #ifdef __x86_64__
1826 static void
enable_smap(void * dummy,int cpu)1827 enable_smap(void* dummy, int cpu)
1828 {
1829 	x86_write_cr4(x86_read_cr4() | IA32_CR4_SMAP);
1830 }
1831 
1832 
1833 static void
enable_smep(void * dummy,int cpu)1834 enable_smep(void* dummy, int cpu)
1835 {
1836 	x86_write_cr4(x86_read_cr4() | IA32_CR4_SMEP);
1837 }
1838 
1839 
1840 static void
enable_osxsave(void * dummy,int cpu)1841 enable_osxsave(void* dummy, int cpu)
1842 {
1843 	x86_write_cr4(x86_read_cr4() | IA32_CR4_OSXSAVE);
1844 }
1845 
1846 
1847 static void
enable_xsavemask(void * dummy,int cpu)1848 enable_xsavemask(void* dummy, int cpu)
1849 {
1850 	xsetbv(0, gXsaveMask);
1851 }
1852 #endif
1853 
1854 
1855 status_t
arch_cpu_init_post_vm(kernel_args * args)1856 arch_cpu_init_post_vm(kernel_args* args)
1857 {
1858 	// allocate the area for the double fault stacks
1859 	area_id stacks = create_area("double fault stacks",
1860 		(void**)&sDoubleFaultStacks, B_EXACT_ADDRESS,
1861 		kDoubleFaultStackSize * smp_get_num_cpus(),
1862 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1863 	if (stacks < B_OK)
1864 		panic("failed to create double fault stacks area: %" B_PRId32, stacks);
1865 
1866 	X86PagingStructures* kernelPagingStructures
1867 		= static_cast<X86VMTranslationMap*>(
1868 			VMAddressSpace::Kernel()->TranslationMap())->PagingStructures();
1869 
1870 	// Set active translation map on each CPU.
1871 	for (uint32 i = 0; i < args->num_cpus; i++) {
1872 		gCPU[i].arch.active_paging_structures = kernelPagingStructures;
1873 		kernelPagingStructures->AddReference();
1874 	}
1875 
1876 	if (!apic_available())
1877 		x86_init_fpu();
1878 	// else fpu gets set up in smp code
1879 
1880 #ifdef __x86_64__
1881 	// if available enable SMEP (Supervisor Memory Execution Protection)
1882 	if (x86_check_feature(IA32_FEATURE_SMEP, FEATURE_7_EBX)) {
1883 		if (!get_safemode_boolean(B_SAFEMODE_DISABLE_SMEP_SMAP, false)) {
1884 			dprintf("enable SMEP\n");
1885 			call_all_cpus_sync(&enable_smep, NULL);
1886 		} else
1887 			dprintf("SMEP disabled per safemode setting\n");
1888 	}
1889 
1890 	// if available enable SMAP (Supervisor Memory Access Protection)
1891 	if (x86_check_feature(IA32_FEATURE_SMAP, FEATURE_7_EBX)) {
1892 		if (!get_safemode_boolean(B_SAFEMODE_DISABLE_SMEP_SMAP, false)) {
1893 			dprintf("enable SMAP\n");
1894 			call_all_cpus_sync(&enable_smap, NULL);
1895 
1896 			arch_altcodepatch_replace(ALTCODEPATCH_TAG_STAC, &_stac, 3);
1897 			arch_altcodepatch_replace(ALTCODEPATCH_TAG_CLAC, &_clac, 3);
1898 		} else
1899 			dprintf("SMAP disabled per safemode setting\n");
1900 	}
1901 
1902 	// if available enable XSAVE (XSAVE and extended states)
1903 	gHasXsave = x86_check_feature(IA32_FEATURE_EXT_XSAVE, FEATURE_EXT);
1904 	if (gHasXsave) {
1905 		gHasXsavec = x86_check_feature(IA32_FEATURE_XSAVEC,
1906 			FEATURE_D_1_EAX);
1907 
1908 		call_all_cpus_sync(&enable_osxsave, NULL);
1909 		gXsaveMask = IA32_XCR0_X87 | IA32_XCR0_SSE;
1910 		cpuid_info cpuid;
1911 		get_current_cpuid(&cpuid, 0xd, 0);
1912 		gXsaveMask |= (cpuid.regs.eax & IA32_XCR0_AVX);
1913 		call_all_cpus_sync(&enable_xsavemask, NULL);
1914 		get_current_cpuid(&cpuid, 0xd, 0);
1915 		gFPUSaveLength = cpuid.regs.ebx;
1916 		if (gFPUSaveLength > sizeof(((struct arch_thread *)0)->fpu_state))
1917 			gFPUSaveLength = 832;
1918 
1919 		arch_altcodepatch_replace(ALTCODEPATCH_TAG_XSAVE,
1920 			gHasXsavec ? &_xsavec : &_xsave, 4);
1921 		arch_altcodepatch_replace(ALTCODEPATCH_TAG_XRSTOR,
1922 			&_xrstor, 4);
1923 
1924 		dprintf("enable %s 0x%" B_PRIx64 " %" B_PRId64 "\n",
1925 			gHasXsavec ? "XSAVEC" : "XSAVE", gXsaveMask, gFPUSaveLength);
1926 	}
1927 #endif
1928 
1929 	return B_OK;
1930 }
1931 
1932 
1933 status_t
arch_cpu_init_post_modules(kernel_args * args)1934 arch_cpu_init_post_modules(kernel_args* args)
1935 {
1936 	// initialize CPU module
1937 
1938 	void* cookie = open_module_list("cpu");
1939 
1940 	while (true) {
1941 		char name[B_FILE_NAME_LENGTH];
1942 		size_t nameLength = sizeof(name);
1943 
1944 		if (read_next_module_name(cookie, name, &nameLength) != B_OK
1945 			|| get_module(name, (module_info**)&sCpuModule) == B_OK)
1946 			break;
1947 	}
1948 
1949 	close_module_list(cookie);
1950 
1951 	// initialize MTRRs if available
1952 	if (x86_count_mtrrs() > 0) {
1953 		sCpuRendezvous = sCpuRendezvous2 = 0;
1954 		call_all_cpus(&init_mtrrs, NULL);
1955 	}
1956 
1957 	size_t threadExitLen = (addr_t)x86_end_userspace_thread_exit
1958 		- (addr_t)x86_userspace_thread_exit;
1959 	addr_t threadExitPosition = fill_commpage_entry(
1960 		COMMPAGE_ENTRY_X86_THREAD_EXIT, (const void*)x86_userspace_thread_exit,
1961 		threadExitLen);
1962 
1963 	// add the functions to the commpage image
1964 	image_id image = get_commpage_image();
1965 
1966 	elf_add_memory_image_symbol(image, "commpage_thread_exit",
1967 		threadExitPosition, threadExitLen, B_SYMBOL_TYPE_TEXT);
1968 
1969 	return B_OK;
1970 }
1971 
1972 
1973 void
arch_cpu_user_TLB_invalidate(void)1974 arch_cpu_user_TLB_invalidate(void)
1975 {
1976 	x86_write_cr3(x86_read_cr3());
1977 }
1978 
1979 
1980 void
arch_cpu_global_TLB_invalidate(void)1981 arch_cpu_global_TLB_invalidate(void)
1982 {
1983 	uint32 flags = x86_read_cr4();
1984 
1985 	if (flags & IA32_CR4_GLOBAL_PAGES) {
1986 		// disable and reenable the global pages to flush all TLBs regardless
1987 		// of the global page bit
1988 		x86_write_cr4(flags & ~IA32_CR4_GLOBAL_PAGES);
1989 		x86_write_cr4(flags | IA32_CR4_GLOBAL_PAGES);
1990 	} else {
1991 		cpu_status state = disable_interrupts();
1992 		arch_cpu_user_TLB_invalidate();
1993 		restore_interrupts(state);
1994 	}
1995 }
1996 
1997 
1998 void
arch_cpu_invalidate_TLB_range(addr_t start,addr_t end)1999 arch_cpu_invalidate_TLB_range(addr_t start, addr_t end)
2000 {
2001 	int32 num_pages = end / B_PAGE_SIZE - start / B_PAGE_SIZE;
2002 	while (num_pages-- >= 0) {
2003 		invalidate_TLB(start);
2004 		start += B_PAGE_SIZE;
2005 	}
2006 }
2007 
2008 
2009 void
arch_cpu_invalidate_TLB_list(addr_t pages[],int num_pages)2010 arch_cpu_invalidate_TLB_list(addr_t pages[], int num_pages)
2011 {
2012 	int i;
2013 	for (i = 0; i < num_pages; i++) {
2014 		invalidate_TLB(pages[i]);
2015 	}
2016 }
2017 
2018 
2019 status_t
arch_cpu_shutdown(bool rebootSystem)2020 arch_cpu_shutdown(bool rebootSystem)
2021 {
2022 	if (acpi_shutdown(rebootSystem) == B_OK)
2023 		return B_OK;
2024 
2025 	if (!rebootSystem) {
2026 #ifndef __x86_64__
2027 		return apm_shutdown();
2028 #else
2029 		return B_NOT_SUPPORTED;
2030 #endif
2031 	}
2032 
2033 	cpu_status state = disable_interrupts();
2034 
2035 	// try to reset the system using the keyboard controller
2036 	out8(0xfe, 0x64);
2037 
2038 	// Give some time to the controller to do its job (0.5s)
2039 	snooze(500000);
2040 
2041 	// if that didn't help, try it this way
2042 	x86_reboot();
2043 
2044 	restore_interrupts(state);
2045 	return B_ERROR;
2046 }
2047 
2048 
2049 void
arch_cpu_sync_icache(void * address,size_t length)2050 arch_cpu_sync_icache(void* address, size_t length)
2051 {
2052 	// instruction cache is always consistent on x86
2053 }
2054 
2055