xref: /haiku/src/system/kernel/arch/x86/arch_cpu.cpp (revision 776c58b2b56d8bcf33638a2ecb6c697f95a1cbf3)
1 /*
2  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <cpu.h>
12 
13 #include <string.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 
17 #include <ACPI.h>
18 
19 #include <boot_device.h>
20 #include <commpage.h>
21 #include <debug.h>
22 #include <elf.h>
23 #include <smp.h>
24 #include <vm/vm.h>
25 #include <vm/vm_types.h>
26 #include <vm/VMAddressSpace.h>
27 
28 #include <arch_system_info.h>
29 #include <arch/x86/apic.h>
30 #include <boot/kernel_args.h>
31 
32 #include "paging/X86PagingStructures.h"
33 #include "paging/X86VMTranslationMap.h"
34 
35 
36 #define DUMP_FEATURE_STRING 1
37 
38 
39 /* cpu vendor info */
40 struct cpu_vendor_info {
41 	const char *vendor;
42 	const char *ident_string[2];
43 };
44 
45 static const struct cpu_vendor_info vendor_info[VENDOR_NUM] = {
46 	{ "Intel", { "GenuineIntel" } },
47 	{ "AMD", { "AuthenticAMD" } },
48 	{ "Cyrix", { "CyrixInstead" } },
49 	{ "UMC", { "UMC UMC UMC" } },
50 	{ "NexGen", { "NexGenDriven" } },
51 	{ "Centaur", { "CentaurHauls" } },
52 	{ "Rise", { "RiseRiseRise" } },
53 	{ "Transmeta", { "GenuineTMx86", "TransmetaCPU" } },
54 	{ "NSC", { "Geode by NSC" } },
55 };
56 
57 #define CR0_CACHE_DISABLE		(1UL << 30)
58 #define CR0_NOT_WRITE_THROUGH	(1UL << 29)
59 #define CR0_FPU_EMULATION		(1UL << 2)
60 #define CR0_MONITOR_FPU			(1UL << 1)
61 
62 #define CR4_OS_FXSR				(1UL << 9)
63 #define CR4_OS_XMM_EXCEPTION	(1UL << 10)
64 
65 #define K8_SMIONCMPHALT			(1ULL << 27)
66 #define K8_C1EONCMPHALT			(1ULL << 28)
67 
68 #define K8_CMPHALT				(K8_SMIONCMPHALT | K8_C1EONCMPHALT)
69 
70 /*
71  * 0 favors highest performance while 15 corresponds to the maximum energy
72  * savings. 7 means balance between performance and energy savings.
73  * Refer to Section 14.3.4 in <Intel 64 and IA-32 Architectures Software
74  * Developer's Manual Volume 3>  for details
75  */
76 #define ENERGY_PERF_BIAS_PERFORMANCE	0
77 #define ENERGY_PERF_BIAS_BALANCE		7
78 #define ENERGY_PERF_BIAS_POWERSAVE		15
79 
80 struct set_mtrr_parameter {
81 	int32	index;
82 	uint64	base;
83 	uint64	length;
84 	uint8	type;
85 };
86 
87 struct set_mtrrs_parameter {
88 	const x86_mtrr_info*	infos;
89 	uint32					count;
90 	uint8					defaultType;
91 };
92 
93 
94 extern "C" void x86_reboot(void);
95 	// from arch.S
96 
97 void (*gCpuIdleFunc)(void);
98 void (*gX86SwapFPUFunc)(void* oldState, const void* newState) = x86_noop_swap;
99 bool gHasSSE = false;
100 
101 static uint32 sCpuRendezvous;
102 static uint32 sCpuRendezvous2;
103 static uint32 sCpuRendezvous3;
104 static vint32 sTSCSyncRendezvous;
105 
106 /* Some specials for the double fault handler */
107 static uint8* sDoubleFaultStacks;
108 static const size_t kDoubleFaultStackSize = 4096;	// size per CPU
109 
110 static x86_cpu_module_info* sCpuModule;
111 
112 
113 extern "C" void memcpy_generic(void* dest, const void* source, size_t count);
114 extern int memcpy_generic_end;
115 extern "C" void memset_generic(void* dest, int value, size_t count);
116 extern int memset_generic_end;
117 
118 x86_optimized_functions gOptimizedFunctions = {
119 	memcpy_generic,
120 	&memcpy_generic_end,
121 	memset_generic,
122 	&memset_generic_end
123 };
124 
125 
126 static status_t
127 acpi_shutdown(bool rebootSystem)
128 {
129 	if (debug_debugger_running() || !are_interrupts_enabled())
130 		return B_ERROR;
131 
132 	acpi_module_info* acpi;
133 	if (get_module(B_ACPI_MODULE_NAME, (module_info**)&acpi) != B_OK)
134 		return B_NOT_SUPPORTED;
135 
136 	status_t status;
137 	if (rebootSystem) {
138 		status = acpi->reboot();
139 	} else {
140 		// Make sure we run on the boot CPU (apparently needed for some ACPI
141 		// implementations)
142 		_user_set_cpu_enabled(0, true);
143 		for (int32 cpu = 1; cpu < smp_get_num_cpus(); cpu++) {
144 			_user_set_cpu_enabled(cpu, false);
145 		}
146 		// TODO: must not be called from the idle thread!
147 		thread_yield(true);
148 
149 		status = acpi->prepare_sleep_state(ACPI_POWER_STATE_OFF, NULL, 0);
150 		if (status == B_OK) {
151 			//cpu_status state = disable_interrupts();
152 			status = acpi->enter_sleep_state(ACPI_POWER_STATE_OFF);
153 			//restore_interrupts(state);
154 		}
155 	}
156 
157 	put_module(B_ACPI_MODULE_NAME);
158 	return status;
159 }
160 
161 
162 /*!	Disable CPU caches, and invalidate them. */
163 static void
164 disable_caches()
165 {
166 	x86_write_cr0((x86_read_cr0() | CR0_CACHE_DISABLE)
167 		& ~CR0_NOT_WRITE_THROUGH);
168 	wbinvd();
169 	arch_cpu_global_TLB_invalidate();
170 }
171 
172 
173 /*!	Invalidate CPU caches, and enable them. */
174 static void
175 enable_caches()
176 {
177 	wbinvd();
178 	arch_cpu_global_TLB_invalidate();
179 	x86_write_cr0(x86_read_cr0()
180 		& ~(CR0_CACHE_DISABLE | CR0_NOT_WRITE_THROUGH));
181 }
182 
183 
184 static void
185 set_mtrr(void* _parameter, int cpu)
186 {
187 	struct set_mtrr_parameter* parameter
188 		= (struct set_mtrr_parameter*)_parameter;
189 
190 	// wait until all CPUs have arrived here
191 	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
192 
193 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
194 	// that initiated the call_all_cpus() from doing that again and clearing
195 	// sCpuRendezvous2 before the last CPU has actually left the loop in
196 	// smp_cpu_rendezvous();
197 	if (cpu == 0)
198 		atomic_set((vint32*)&sCpuRendezvous3, 0);
199 
200 	disable_caches();
201 
202 	sCpuModule->set_mtrr(parameter->index, parameter->base, parameter->length,
203 		parameter->type);
204 
205 	enable_caches();
206 
207 	// wait until all CPUs have arrived here
208 	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
209 	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
210 }
211 
212 
213 static void
214 set_mtrrs(void* _parameter, int cpu)
215 {
216 	set_mtrrs_parameter* parameter = (set_mtrrs_parameter*)_parameter;
217 
218 	// wait until all CPUs have arrived here
219 	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
220 
221 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
222 	// that initiated the call_all_cpus() from doing that again and clearing
223 	// sCpuRendezvous2 before the last CPU has actually left the loop in
224 	// smp_cpu_rendezvous();
225 	if (cpu == 0)
226 		atomic_set((vint32*)&sCpuRendezvous3, 0);
227 
228 	disable_caches();
229 
230 	sCpuModule->set_mtrrs(parameter->defaultType, parameter->infos,
231 		parameter->count);
232 
233 	enable_caches();
234 
235 	// wait until all CPUs have arrived here
236 	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
237 	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
238 }
239 
240 
241 static void
242 init_mtrrs(void* _unused, int cpu)
243 {
244 	// wait until all CPUs have arrived here
245 	smp_cpu_rendezvous(&sCpuRendezvous, cpu);
246 
247 	// One CPU has to reset sCpuRendezvous3 -- it is needed to prevent the CPU
248 	// that initiated the call_all_cpus() from doing that again and clearing
249 	// sCpuRendezvous2 before the last CPU has actually left the loop in
250 	// smp_cpu_rendezvous();
251 	if (cpu == 0)
252 		atomic_set((vint32*)&sCpuRendezvous3, 0);
253 
254 	disable_caches();
255 
256 	sCpuModule->init_mtrrs();
257 
258 	enable_caches();
259 
260 	// wait until all CPUs have arrived here
261 	smp_cpu_rendezvous(&sCpuRendezvous2, cpu);
262 	smp_cpu_rendezvous(&sCpuRendezvous3, cpu);
263 }
264 
265 
266 uint32
267 x86_count_mtrrs(void)
268 {
269 	if (sCpuModule == NULL)
270 		return 0;
271 
272 	return sCpuModule->count_mtrrs();
273 }
274 
275 
276 void
277 x86_set_mtrr(uint32 index, uint64 base, uint64 length, uint8 type)
278 {
279 	struct set_mtrr_parameter parameter;
280 	parameter.index = index;
281 	parameter.base = base;
282 	parameter.length = length;
283 	parameter.type = type;
284 
285 	sCpuRendezvous = sCpuRendezvous2 = 0;
286 	call_all_cpus(&set_mtrr, &parameter);
287 }
288 
289 
290 status_t
291 x86_get_mtrr(uint32 index, uint64* _base, uint64* _length, uint8* _type)
292 {
293 	// the MTRRs are identical on all CPUs, so it doesn't matter
294 	// on which CPU this runs
295 	return sCpuModule->get_mtrr(index, _base, _length, _type);
296 }
297 
298 
299 void
300 x86_set_mtrrs(uint8 defaultType, const x86_mtrr_info* infos, uint32 count)
301 {
302 	if (sCpuModule == NULL)
303 		return;
304 
305 	struct set_mtrrs_parameter parameter;
306 	parameter.defaultType = defaultType;
307 	parameter.infos = infos;
308 	parameter.count = count;
309 
310 	sCpuRendezvous = sCpuRendezvous2 = 0;
311 	call_all_cpus(&set_mtrrs, &parameter);
312 }
313 
314 
315 void
316 x86_init_fpu(void)
317 {
318 	// All x86_64 CPUs support SSE, don't need to bother checking for it.
319 #ifndef __x86_64__
320 	if (!x86_check_feature(IA32_FEATURE_FPU, FEATURE_COMMON)) {
321 		// No FPU... time to install one in your 386?
322 		dprintf("%s: Warning: CPU has no reported FPU.\n", __func__);
323 		gX86SwapFPUFunc = x86_noop_swap;
324 		return;
325 	}
326 
327 	if (!x86_check_feature(IA32_FEATURE_SSE, FEATURE_COMMON)
328 		|| !x86_check_feature(IA32_FEATURE_FXSR, FEATURE_COMMON)) {
329 		dprintf("%s: CPU has no SSE... just enabling FPU.\n", __func__);
330 		// we don't have proper SSE support, just enable FPU
331 		x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
332 		gX86SwapFPUFunc = x86_fnsave_swap;
333 		return;
334 	}
335 #endif
336 
337 	dprintf("%s: CPU has SSE... enabling FXSR and XMM.\n", __func__);
338 
339 	// enable OS support for SSE
340 	x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
341 	x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
342 
343 	gX86SwapFPUFunc = x86_fxsave_swap;
344 	gHasSSE = true;
345 }
346 
347 
348 #if DUMP_FEATURE_STRING
349 static void
350 dump_feature_string(int currentCPU, cpu_ent* cpu)
351 {
352 	char features[384];
353 	features[0] = 0;
354 
355 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FPU)
356 		strlcat(features, "fpu ", sizeof(features));
357 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_VME)
358 		strlcat(features, "vme ", sizeof(features));
359 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DE)
360 		strlcat(features, "de ", sizeof(features));
361 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE)
362 		strlcat(features, "pse ", sizeof(features));
363 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TSC)
364 		strlcat(features, "tsc ", sizeof(features));
365 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MSR)
366 		strlcat(features, "msr ", sizeof(features));
367 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAE)
368 		strlcat(features, "pae ", sizeof(features));
369 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCE)
370 		strlcat(features, "mce ", sizeof(features));
371 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CX8)
372 		strlcat(features, "cx8 ", sizeof(features));
373 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_APIC)
374 		strlcat(features, "apic ", sizeof(features));
375 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SEP)
376 		strlcat(features, "sep ", sizeof(features));
377 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MTRR)
378 		strlcat(features, "mtrr ", sizeof(features));
379 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PGE)
380 		strlcat(features, "pge ", sizeof(features));
381 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MCA)
382 		strlcat(features, "mca ", sizeof(features));
383 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CMOV)
384 		strlcat(features, "cmov ", sizeof(features));
385 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PAT)
386 		strlcat(features, "pat ", sizeof(features));
387 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSE36)
388 		strlcat(features, "pse36 ", sizeof(features));
389 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PSN)
390 		strlcat(features, "psn ", sizeof(features));
391 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_CLFSH)
392 		strlcat(features, "clfsh ", sizeof(features));
393 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_DS)
394 		strlcat(features, "ds ", sizeof(features));
395 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_ACPI)
396 		strlcat(features, "acpi ", sizeof(features));
397 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_MMX)
398 		strlcat(features, "mmx ", sizeof(features));
399 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_FXSR)
400 		strlcat(features, "fxsr ", sizeof(features));
401 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE)
402 		strlcat(features, "sse ", sizeof(features));
403 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SSE2)
404 		strlcat(features, "sse2 ", sizeof(features));
405 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_SS)
406 		strlcat(features, "ss ", sizeof(features));
407 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT)
408 		strlcat(features, "htt ", sizeof(features));
409 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_TM)
410 		strlcat(features, "tm ", sizeof(features));
411 	if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_PBE)
412 		strlcat(features, "pbe ", sizeof(features));
413 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE3)
414 		strlcat(features, "sse3 ", sizeof(features));
415 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCLMULQDQ)
416 		strlcat(features, "pclmulqdq ", sizeof(features));
417 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DTES64)
418 		strlcat(features, "dtes64 ", sizeof(features));
419 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MONITOR)
420 		strlcat(features, "monitor ", sizeof(features));
421 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DSCPL)
422 		strlcat(features, "dscpl ", sizeof(features));
423 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_VMX)
424 		strlcat(features, "vmx ", sizeof(features));
425 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SMX)
426 		strlcat(features, "smx ", sizeof(features));
427 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_EST)
428 		strlcat(features, "est ", sizeof(features));
429 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TM2)
430 		strlcat(features, "tm2 ", sizeof(features));
431 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSSE3)
432 		strlcat(features, "ssse3 ", sizeof(features));
433 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CNXTID)
434 		strlcat(features, "cnxtid ", sizeof(features));
435 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_FMA)
436 		strlcat(features, "fma ", sizeof(features));
437 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_CX16)
438 		strlcat(features, "cx16 ", sizeof(features));
439 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XTPR)
440 		strlcat(features, "xtpr ", sizeof(features));
441 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PDCM)
442 		strlcat(features, "pdcm ", sizeof(features));
443 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_PCID)
444 		strlcat(features, "pcid ", sizeof(features));
445 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_DCA)
446 		strlcat(features, "dca ", sizeof(features));
447 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_1)
448 		strlcat(features, "sse4_1 ", sizeof(features));
449 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_SSE4_2)
450 		strlcat(features, "sse4_2 ", sizeof(features));
451 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_X2APIC)
452 		strlcat(features, "x2apic ", sizeof(features));
453 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_MOVBE)
454 		strlcat(features, "movbe ", sizeof(features));
455 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_POPCNT)
456 		strlcat(features, "popcnt ", sizeof(features));
457 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_TSCDEADLINE)
458 		strlcat(features, "tscdeadline ", sizeof(features));
459 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AES)
460 		strlcat(features, "aes ", sizeof(features));
461 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_XSAVE)
462 		strlcat(features, "xsave ", sizeof(features));
463 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_OSXSAVE)
464 		strlcat(features, "osxsave ", sizeof(features));
465 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_AVX)
466 		strlcat(features, "avx ", sizeof(features));
467 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_F16C)
468 		strlcat(features, "f16c ", sizeof(features));
469 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_RDRND)
470 		strlcat(features, "rdrnd ", sizeof(features));
471 	if (cpu->arch.feature[FEATURE_EXT] & IA32_FEATURE_EXT_HYPERVISOR)
472 		strlcat(features, "hypervisor ", sizeof(features));
473 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_SYSCALL)
474 		strlcat(features, "syscall ", sizeof(features));
475 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_NX)
476 		strlcat(features, "nx ", sizeof(features));
477 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_MMXEXT)
478 		strlcat(features, "mmxext ", sizeof(features));
479 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_FFXSR)
480 		strlcat(features, "ffxsr ", sizeof(features));
481 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_LONG)
482 		strlcat(features, "long ", sizeof(features));
483 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOWEXT)
484 		strlcat(features, "3dnowext ", sizeof(features));
485 	if (cpu->arch.feature[FEATURE_EXT_AMD] & IA32_FEATURE_AMD_EXT_3DNOW)
486 		strlcat(features, "3dnow ", sizeof(features));
487 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_DTS)
488 		strlcat(features, "dts ", sizeof(features));
489 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ITB)
490 		strlcat(features, "itb ", sizeof(features));
491 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ARAT)
492 		strlcat(features, "arat ", sizeof(features));
493 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PLN)
494 		strlcat(features, "pln ", sizeof(features));
495 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_ECMD)
496 		strlcat(features, "ecmd ", sizeof(features));
497 	if (cpu->arch.feature[FEATURE_6_EAX] & IA32_FEATURE_PTM)
498 		strlcat(features, "ptm ", sizeof(features));
499 	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_APERFMPERF)
500 		strlcat(features, "aperfmperf ", sizeof(features));
501 	if (cpu->arch.feature[FEATURE_6_ECX] & IA32_FEATURE_EPB)
502 		strlcat(features, "epb ", sizeof(features));
503 
504 	dprintf("CPU %d: features: %s\n", currentCPU, features);
505 }
506 #endif	// DUMP_FEATURE_STRING
507 
508 
509 static void
510 detect_cpu(int currentCPU)
511 {
512 	cpu_ent* cpu = get_cpu_struct();
513 	char vendorString[17];
514 	cpuid_info cpuid;
515 
516 	// clear out the cpu info data
517 	cpu->arch.vendor = VENDOR_UNKNOWN;
518 	cpu->arch.vendor_name = "UNKNOWN VENDOR";
519 	cpu->arch.feature[FEATURE_COMMON] = 0;
520 	cpu->arch.feature[FEATURE_EXT] = 0;
521 	cpu->arch.feature[FEATURE_EXT_AMD] = 0;
522 	cpu->arch.model_name[0] = 0;
523 
524 	// print some fun data
525 	get_current_cpuid(&cpuid, 0);
526 
527 	// build the vendor string
528 	memset(vendorString, 0, sizeof(vendorString));
529 	memcpy(vendorString, cpuid.eax_0.vendor_id, sizeof(cpuid.eax_0.vendor_id));
530 
531 	// get the family, model, stepping
532 	get_current_cpuid(&cpuid, 1);
533 	cpu->arch.type = cpuid.eax_1.type;
534 	cpu->arch.family = cpuid.eax_1.family;
535 	cpu->arch.extended_family = cpuid.eax_1.extended_family;
536 	cpu->arch.model = cpuid.eax_1.model;
537 	cpu->arch.extended_model = cpuid.eax_1.extended_model;
538 	cpu->arch.stepping = cpuid.eax_1.stepping;
539 	dprintf("CPU %d: type %d family %d extended_family %d model %d "
540 		"extended_model %d stepping %d, string '%s'\n",
541 		currentCPU, cpu->arch.type, cpu->arch.family,
542 		cpu->arch.extended_family, cpu->arch.model,
543 		cpu->arch.extended_model, cpu->arch.stepping, vendorString);
544 
545 	// figure out what vendor we have here
546 
547 	for (int32 i = 0; i < VENDOR_NUM; i++) {
548 		if (vendor_info[i].ident_string[0]
549 			&& !strcmp(vendorString, vendor_info[i].ident_string[0])) {
550 			cpu->arch.vendor = (x86_vendors)i;
551 			cpu->arch.vendor_name = vendor_info[i].vendor;
552 			break;
553 		}
554 		if (vendor_info[i].ident_string[1]
555 			&& !strcmp(vendorString, vendor_info[i].ident_string[1])) {
556 			cpu->arch.vendor = (x86_vendors)i;
557 			cpu->arch.vendor_name = vendor_info[i].vendor;
558 			break;
559 		}
560 	}
561 
562 	// see if we can get the model name
563 	get_current_cpuid(&cpuid, 0x80000000);
564 	if (cpuid.eax_0.max_eax >= 0x80000004) {
565 		// build the model string (need to swap ecx/edx data before copying)
566 		unsigned int temp;
567 		memset(cpu->arch.model_name, 0, sizeof(cpu->arch.model_name));
568 
569 		get_current_cpuid(&cpuid, 0x80000002);
570 		temp = cpuid.regs.edx;
571 		cpuid.regs.edx = cpuid.regs.ecx;
572 		cpuid.regs.ecx = temp;
573 		memcpy(cpu->arch.model_name, cpuid.as_chars, sizeof(cpuid.as_chars));
574 
575 		get_current_cpuid(&cpuid, 0x80000003);
576 		temp = cpuid.regs.edx;
577 		cpuid.regs.edx = cpuid.regs.ecx;
578 		cpuid.regs.ecx = temp;
579 		memcpy(cpu->arch.model_name + 16, cpuid.as_chars,
580 			sizeof(cpuid.as_chars));
581 
582 		get_current_cpuid(&cpuid, 0x80000004);
583 		temp = cpuid.regs.edx;
584 		cpuid.regs.edx = cpuid.regs.ecx;
585 		cpuid.regs.ecx = temp;
586 		memcpy(cpu->arch.model_name + 32, cpuid.as_chars,
587 			sizeof(cpuid.as_chars));
588 
589 		// some cpus return a right-justified string
590 		int32 i = 0;
591 		while (cpu->arch.model_name[i] == ' ')
592 			i++;
593 		if (i > 0) {
594 			memmove(cpu->arch.model_name, &cpu->arch.model_name[i],
595 				strlen(&cpu->arch.model_name[i]) + 1);
596 		}
597 
598 		dprintf("CPU %d: vendor '%s' model name '%s'\n",
599 			currentCPU, cpu->arch.vendor_name, cpu->arch.model_name);
600 	} else {
601 		strlcpy(cpu->arch.model_name, "unknown", sizeof(cpu->arch.model_name));
602 	}
603 
604 	// load feature bits
605 	get_current_cpuid(&cpuid, 1);
606 	cpu->arch.feature[FEATURE_COMMON] = cpuid.eax_1.features; // edx
607 	cpu->arch.feature[FEATURE_EXT] = cpuid.eax_1.extended_features; // ecx
608 	if (cpu->arch.vendor == VENDOR_AMD) {
609 		get_current_cpuid(&cpuid, 0x80000001);
610 		cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx
611 	}
612 	get_current_cpuid(&cpuid, 6);
613 	cpu->arch.feature[FEATURE_6_EAX] = cpuid.regs.eax;
614 	cpu->arch.feature[FEATURE_6_ECX] = cpuid.regs.ecx;
615 
616 #if DUMP_FEATURE_STRING
617 	dump_feature_string(currentCPU, cpu);
618 #endif
619 }
620 
621 
622 bool
623 x86_check_feature(uint32 feature, enum x86_feature_type type)
624 {
625 	cpu_ent* cpu = get_cpu_struct();
626 
627 #if 0
628 	int i;
629 	dprintf("x86_check_feature: feature 0x%x, type %d\n", feature, type);
630 	for (i = 0; i < FEATURE_NUM; i++) {
631 		dprintf("features %d: 0x%x\n", i, cpu->arch.feature[i]);
632 	}
633 #endif
634 
635 	return (cpu->arch.feature[type] & feature) != 0;
636 }
637 
638 
639 void*
640 x86_get_double_fault_stack(int32 cpu, size_t* _size)
641 {
642 	*_size = kDoubleFaultStackSize;
643 	return sDoubleFaultStacks + kDoubleFaultStackSize * cpu;
644 }
645 
646 
647 /*!	Returns the index of the current CPU. Can only be called from the double
648 	fault handler.
649 */
650 int32
651 x86_double_fault_get_cpu(void)
652 {
653 	addr_t stack = x86_get_stack_frame();
654 	return (stack - (addr_t)sDoubleFaultStacks) / kDoubleFaultStackSize;
655 }
656 
657 
658 //	#pragma mark -
659 
660 
661 status_t
662 arch_cpu_preboot_init_percpu(kernel_args* args, int cpu)
663 {
664 	// On SMP system we want to synchronize the CPUs' TSCs, so system_time()
665 	// will return consistent values.
666 	if (smp_get_num_cpus() > 1) {
667 		// let the first CPU prepare the rendezvous point
668 		if (cpu == 0)
669 			sTSCSyncRendezvous = smp_get_num_cpus() - 1;
670 
671 		// One CPU after the other will drop out of this loop and be caught by
672 		// the loop below, until the last CPU (0) gets there. Save for +/- a few
673 		// cycles the CPUs should pass the second loop at the same time.
674 		while (sTSCSyncRendezvous != cpu) {
675 		}
676 
677 		sTSCSyncRendezvous = cpu - 1;
678 
679 		while (sTSCSyncRendezvous != -1) {
680 		}
681 
682 		// reset TSC to 0
683 		x86_write_msr(IA32_MSR_TSC, 0);
684 	}
685 
686 	return B_OK;
687 }
688 
689 
690 static void
691 halt_idle(void)
692 {
693 	asm("hlt");
694 }
695 
696 
697 static void
698 amdc1e_noarat_idle(void)
699 {
700 	uint64 msr = x86_read_msr(K8_MSR_IPM);
701 	if (msr & K8_CMPHALT)
702 		x86_write_msr(K8_MSR_IPM, msr & ~K8_CMPHALT);
703 	halt_idle();
704 }
705 
706 
707 static bool
708 detect_amdc1e_noarat()
709 {
710 	cpu_ent* cpu = get_cpu_struct();
711 
712 	if (cpu->arch.vendor != VENDOR_AMD)
713 		return false;
714 
715 	// Family 0x12 and higher processors support ARAT
716 	// Family lower than 0xf processors doesn't support C1E
717 	// Family 0xf with model <= 0x40 procssors doesn't support C1E
718 	uint32 family = cpu->arch.family + cpu->arch.extended_family;
719 	uint32 model = (cpu->arch.extended_model << 4) | cpu->arch.model;
720 	return (family < 0x12 && family > 0xf) || (family == 0xf && model > 0x40);
721 }
722 
723 
724 status_t
725 arch_cpu_init_percpu(kernel_args* args, int cpu)
726 {
727 	// Load descriptor tables for this CPU.
728 	x86_descriptors_init_percpu(args, cpu);
729 
730 	detect_cpu(cpu);
731 
732 	if (!gCpuIdleFunc) {
733 		if (detect_amdc1e_noarat())
734 			gCpuIdleFunc = amdc1e_noarat_idle;
735 		else
736 			gCpuIdleFunc = halt_idle;
737 	}
738 
739 	if (x86_check_feature(IA32_FEATURE_EPB, FEATURE_6_ECX)) {
740 		uint64 msr = x86_read_msr(IA32_MSR_ENERGY_PERF_BIAS);
741 		if ((msr & 0xf) == ENERGY_PERF_BIAS_PERFORMANCE) {
742 			msr &= ~0xf;
743 			msr |= ENERGY_PERF_BIAS_BALANCE;
744 			x86_write_msr(IA32_MSR_ENERGY_PERF_BIAS, msr);
745 		}
746 	}
747 
748 	return B_OK;
749 }
750 
751 
752 status_t
753 arch_cpu_init(kernel_args* args)
754 {
755 	// init the TSC -> system_time() conversion factors
756 
757 	uint32 conversionFactor = args->arch_args.system_time_cv_factor;
758 	uint64 conversionFactorNsecs = (uint64)conversionFactor * 1000;
759 
760 #ifdef __x86_64__
761 	// The x86_64 system_time() implementation uses 64-bit multiplication and
762 	// therefore shifting is not necessary for low frequencies (it's also not
763 	// too likely that there'll be any x86_64 CPUs clocked under 1GHz).
764 	__x86_setup_system_time((uint64)conversionFactor << 32,
765 		conversionFactorNsecs);
766 #else
767 	if (conversionFactorNsecs >> 32 != 0) {
768 		// the TSC frequency is < 1 GHz, which forces us to shift the factor
769 		__x86_setup_system_time(conversionFactor, conversionFactorNsecs >> 16,
770 			true);
771 	} else {
772 		// the TSC frequency is >= 1 GHz
773 		__x86_setup_system_time(conversionFactor, conversionFactorNsecs, false);
774 	}
775 #endif
776 
777 	// Initialize descriptor tables.
778 	x86_descriptors_init(args);
779 
780 	return B_OK;
781 }
782 
783 
784 status_t
785 arch_cpu_init_post_vm(kernel_args* args)
786 {
787 	uint32 i;
788 
789 	// allocate an area for the double fault stacks
790 	virtual_address_restrictions virtualRestrictions = {};
791 	virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
792 	physical_address_restrictions physicalRestrictions = {};
793 	create_area_etc(B_SYSTEM_TEAM, "double fault stacks",
794 		kDoubleFaultStackSize * smp_get_num_cpus(), B_FULL_LOCK,
795 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, CREATE_AREA_DONT_WAIT, 0,
796 		&virtualRestrictions, &physicalRestrictions,
797 		(void**)&sDoubleFaultStacks);
798 
799 	// More descriptor table setup.
800 	x86_descriptors_init_post_vm(args);
801 
802 	X86PagingStructures* kernelPagingStructures
803 		= static_cast<X86VMTranslationMap*>(
804 			VMAddressSpace::Kernel()->TranslationMap())->PagingStructures();
805 
806 	// Set active translation map on each CPU.
807 	for (i = 0; i < args->num_cpus; i++) {
808 		gCPU[i].arch.active_paging_structures = kernelPagingStructures;
809 		kernelPagingStructures->AddReference();
810 	}
811 
812 	if (!apic_available())
813 		x86_init_fpu();
814 	// else fpu gets set up in smp code
815 
816 	return B_OK;
817 }
818 
819 
820 status_t
821 arch_cpu_init_post_modules(kernel_args* args)
822 {
823 	// initialize CPU module
824 
825 	void* cookie = open_module_list("cpu");
826 
827 	while (true) {
828 		char name[B_FILE_NAME_LENGTH];
829 		size_t nameLength = sizeof(name);
830 
831 		if (read_next_module_name(cookie, name, &nameLength) != B_OK
832 			|| get_module(name, (module_info**)&sCpuModule) == B_OK)
833 			break;
834 	}
835 
836 	close_module_list(cookie);
837 
838 	// initialize MTRRs if available
839 	if (x86_count_mtrrs() > 0) {
840 		sCpuRendezvous = sCpuRendezvous2 = 0;
841 		call_all_cpus(&init_mtrrs, NULL);
842 	}
843 
844 	// get optimized functions from the CPU module
845 	if (sCpuModule != NULL && sCpuModule->get_optimized_functions != NULL) {
846 		x86_optimized_functions functions;
847 		memset(&functions, 0, sizeof(functions));
848 
849 		sCpuModule->get_optimized_functions(&functions);
850 
851 		if (functions.memcpy != NULL) {
852 			gOptimizedFunctions.memcpy = functions.memcpy;
853 			gOptimizedFunctions.memcpy_end = functions.memcpy_end;
854 		}
855 
856 		if (functions.memset != NULL) {
857 			gOptimizedFunctions.memset = functions.memset;
858 			gOptimizedFunctions.memset_end = functions.memset_end;
859 		}
860 	}
861 
862 	// put the optimized functions into the commpage
863 	size_t memcpyLen = (addr_t)gOptimizedFunctions.memcpy_end
864 		- (addr_t)gOptimizedFunctions.memcpy;
865 	fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMCPY,
866 		(const void*)gOptimizedFunctions.memcpy, memcpyLen);
867 	size_t memsetLen = (addr_t)gOptimizedFunctions.memset_end
868 		- (addr_t)gOptimizedFunctions.memset;
869 	fill_commpage_entry(COMMPAGE_ENTRY_X86_MEMSET,
870 		(const void*)gOptimizedFunctions.memset, memsetLen);
871 
872 	// add the functions to the commpage image
873 	image_id image = get_commpage_image();
874 	elf_add_memory_image_symbol(image, "commpage_memcpy",
875 		((addr_t*)USER_COMMPAGE_ADDR)[COMMPAGE_ENTRY_X86_MEMCPY], memcpyLen,
876 		B_SYMBOL_TYPE_TEXT);
877 	elf_add_memory_image_symbol(image, "commpage_memset",
878 		((addr_t*)USER_COMMPAGE_ADDR)[COMMPAGE_ENTRY_X86_MEMSET], memsetLen,
879 		B_SYMBOL_TYPE_TEXT);
880 
881 	return B_OK;
882 }
883 
884 
885 void
886 arch_cpu_user_TLB_invalidate(void)
887 {
888 	x86_write_cr3(x86_read_cr3());
889 }
890 
891 
892 void
893 arch_cpu_global_TLB_invalidate(void)
894 {
895 	uint32 flags = x86_read_cr4();
896 
897 	if (flags & IA32_CR4_GLOBAL_PAGES) {
898 		// disable and reenable the global pages to flush all TLBs regardless
899 		// of the global page bit
900 		x86_write_cr4(flags & ~IA32_CR4_GLOBAL_PAGES);
901 		x86_write_cr4(flags | IA32_CR4_GLOBAL_PAGES);
902 	} else {
903 		cpu_status state = disable_interrupts();
904 		arch_cpu_user_TLB_invalidate();
905 		restore_interrupts(state);
906 	}
907 }
908 
909 
910 void
911 arch_cpu_invalidate_TLB_range(addr_t start, addr_t end)
912 {
913 	int32 num_pages = end / B_PAGE_SIZE - start / B_PAGE_SIZE;
914 	while (num_pages-- >= 0) {
915 		invalidate_TLB(start);
916 		start += B_PAGE_SIZE;
917 	}
918 }
919 
920 
921 void
922 arch_cpu_invalidate_TLB_list(addr_t pages[], int num_pages)
923 {
924 	int i;
925 	for (i = 0; i < num_pages; i++) {
926 		invalidate_TLB(pages[i]);
927 	}
928 }
929 
930 
931 status_t
932 arch_cpu_shutdown(bool rebootSystem)
933 {
934 	if (acpi_shutdown(rebootSystem) == B_OK)
935 		return B_OK;
936 
937 	if (!rebootSystem) {
938 #ifndef __x86_64__
939 		return apm_shutdown();
940 #else
941 		return B_NOT_SUPPORTED;
942 #endif
943 	}
944 
945 	cpu_status state = disable_interrupts();
946 
947 	// try to reset the system using the keyboard controller
948 	out8(0xfe, 0x64);
949 
950 	// Give some time to the controller to do its job (0.5s)
951 	snooze(500000);
952 
953 	// if that didn't help, try it this way
954 	x86_reboot();
955 
956 	restore_interrupts(state);
957 	return B_ERROR;
958 }
959 
960 
961 void
962 arch_cpu_idle(void)
963 {
964 	gCpuIdleFunc();
965 }
966 
967 
968 void
969 arch_cpu_sync_icache(void* address, size_t length)
970 {
971 	// instruction cache is always consistent on x86
972 }
973 
974 
975 void
976 arch_cpu_memory_read_barrier(void)
977 {
978 #ifdef __x86_64__
979 	asm volatile("lfence" : : : "memory");
980 #else
981 	asm volatile ("lock;" : : : "memory");
982 	asm volatile ("addl $0, 0(%%esp);" : : : "memory");
983 #endif
984 }
985 
986 
987 void
988 arch_cpu_memory_write_barrier(void)
989 {
990 #ifdef __x86_64__
991 	asm volatile("sfence" : : : "memory");
992 #else
993 	asm volatile ("lock;" : : : "memory");
994 	asm volatile ("addl $0, 0(%%esp);" : : : "memory");
995 #endif
996 }
997 
998