xref: /haiku/src/system/boot/arch/x86/arch_cpu.cpp (revision dd8a03b78da057c78734e4d34c360f8ef4cab73d)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7  * licensed under the NewOS license.
8  */
9 
10 
11 #include <OS.h>
12 
13 #include <boot/arch/x86/arch_cpu.h>
14 #include <boot/kernel_args.h>
15 #include <boot/platform.h>
16 #include <boot/stage2.h>
17 #include <boot/stdio.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/x86/arch_cpu.h>
21 #include <arch_kernel.h>
22 #include <arch_system_info.h>
23 
24 #include <string.h>
25 
26 #include <x86intrin.h>
27 
28 
29 uint32 gTimeConversionFactor;
30 
31 // PIT definitions
32 #define TIMER_CLKNUM_HZ					(14318180 / 12)
33 
34 // PIT IO Ports
35 #define PIT_CHANNEL_PORT_BASE			0x40
36 #define PIT_CONTROL						0x43
37 
38 // Channel selection
39 #define PIT_SELECT_CHANNEL_SHIFT		6
40 
41 // Access mode
42 #define PIT_ACCESS_LATCH_COUNTER		(0 << 4)
43 #define PIT_ACCESS_LOW_BYTE_ONLY		(1 << 4)
44 #define PIT_ACCESS_HIGH_BYTE_ONLY		(2 << 4)
45 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE	(3 << 4)
46 
47 // Operating modes
48 #define PIT_MODE_INTERRUPT_ON_0			(0 << 1)
49 #define PIT_MODE_HARDWARE_COUNTDOWN		(1 << 1)
50 #define PIT_MODE_RATE_GENERATOR			(2 << 1)
51 #define PIT_MODE_SQUARE_WAVE_GENERATOR	(3 << 1)
52 #define PIT_MODE_SOFTWARE_STROBE		(4 << 1)
53 #define PIT_MODE_HARDWARE_STROBE		(5 << 1)
54 
55 // BCD/Binary mode
56 #define PIT_BINARY_MODE					0
57 #define PIT_BCD_MODE					1
58 
59 // Channel 2 control (speaker)
60 #define PIT_CHANNEL_2_CONTROL			0x61
61 #define PIT_CHANNEL_2_GATE_HIGH			0x01
62 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK	~0x02
63 
64 // Maximum values
65 #define MAX_QUICK_SAMPLES				20
66 #define MAX_SLOW_SAMPLES				20
67 	// TODO: These are arbitrary. They are here to avoid spinning indefinitely
68 	// if the TSC just isn't stable and we can't get our desired error range.
69 
70 
71 #ifdef __SIZEOF_INT128__
72 typedef unsigned __int128 uint128;
73 #else
74 struct uint128 {
uint128uint12875 	uint128(uint64 low, uint64 high = 0)
76 		:
77 		low(low),
78 		high(high)
79 	{
80 	}
81 
operator <uint12882 	bool operator<(const uint128& other) const
83 	{
84 		return high < other.high || (high == other.high && low < other.low);
85 	}
86 
operator <=uint12887 	bool operator<=(const uint128& other) const
88 	{
89 		return !(other < *this);
90 	}
91 
operator <<uint12892 	uint128 operator<<(int count) const
93 	{
94 		if (count == 0)
95 			return *this;
96 
97 		if (count >= 128)
98 			return 0;
99 
100 		if (count >= 64)
101 			return uint128(0, low << (count - 64));
102 
103 		return uint128(low << count, (high << count) | (low >> (64 - count)));
104 	}
105 
operator >>uint128106 	uint128 operator>>(int count) const
107 	{
108 		if (count == 0)
109 			return *this;
110 
111 		if (count >= 128)
112 			return 0;
113 
114 		if (count >= 64)
115 			return uint128(high >> (count - 64), 0);
116 
117 		return uint128((low >> count) | (high << (64 - count)), high >> count);
118 	}
119 
operator +uint128120 	uint128 operator+(const uint128& other) const
121 	{
122 		uint64 resultLow = low + other.low;
123 		return uint128(resultLow,
124 			high + other.high + (resultLow < low ? 1 : 0));
125 	}
126 
operator -uint128127 	uint128 operator-(const uint128& other) const
128 	{
129 		uint64 resultLow = low - other.low;
130 		return uint128(resultLow,
131 			high - other.high - (resultLow > low ? 1 : 0));
132 	}
133 
operator *uint128134 	uint128 operator*(uint32 other) const
135 	{
136 		uint64 resultMid = (low >> 32) * other;
137 		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
138 		return uint128(resultLow,
139 			high * other + (resultMid >> 32)
140 				+ (resultLow < resultMid << 32 ? 1 : 0));
141 	}
142 
operator /uint128143 	uint128 operator/(const uint128& other) const
144 	{
145 		int shift = 0;
146 		uint128 shiftedDivider = other;
147 		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
148 			shiftedDivider = shiftedDivider << 1;
149 			shift++;
150 		}
151 
152 		uint128 result = 0;
153 		uint128 temp = *this;
154 		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
155 			if (shiftedDivider <= temp) {
156 				result = result + (uint128(1) << shift);
157 				temp = temp - shiftedDivider;
158 			}
159 		}
160 
161 		return result;
162 	}
163 
operator uint64uint128164 	operator uint64() const
165 	{
166 		return low;
167 	}
168 
169 private:
170 	uint64	low;
171 	uint64	high;
172 };
173 #endif
174 
175 
176 static inline uint64_t
rdtsc_fenced()177 rdtsc_fenced()
178 {
179 	// RDTSC is not serializing, nor does it drain the instruction stream.
180 	// RDTSCP does, but is not available everywhere. Other OSes seem to use
181 	// "CPUID" rather than MFENCE/LFENCE for serializing here during boot.
182 	asm volatile ("cpuid" : : : "eax", "ebx", "ecx", "edx");
183 
184 	return __rdtsc();
185 }
186 
187 
188 static inline void
calibration_loop(uint8 desiredHighByte,uint8 channel,uint64 & tscDelta,double & conversionFactor,uint16 & expired)189 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
190 	double& conversionFactor, uint16& expired)
191 {
192 	uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
193 	out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
194 		| PIT_BINARY_MODE, PIT_CONTROL);
195 
196 	// Fill in count of 0xffff, low then high byte
197 	uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
198 	out8(0xff, channelPort);
199 	out8(0xff, channelPort);
200 
201 	// Read the count back once to delay the start. This ensures that we've
202 	// waited long enough for the counter to actually start counting down, as
203 	// this only happens on the next clock cycle after reload.
204 	in8(channelPort);
205 	in8(channelPort);
206 
207 	// We're expecting the PIT to be at the starting position (high byte 0xff)
208 	// as we just programmed it, but if it isn't we wait for it to wrap.
209 	uint8 startLow;
210 	uint8 startHigh;
211 	do {
212 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
213 		startLow = in8(channelPort);
214 		startHigh = in8(channelPort);
215 	} while (startHigh != 255);
216 
217 	// Read in the first TSC value
218 	uint64 startTSC = rdtsc_fenced();
219 
220 	// Wait for the PIT to count down to our desired value
221 	uint8 endLow;
222 	uint8 endHigh;
223 	do {
224 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
225 		endLow = in8(channelPort);
226 		endHigh = in8(channelPort);
227 	} while (endHigh > desiredHighByte);
228 
229 	// And read the second TSC value
230 	uint64 endTSC = rdtsc_fenced();
231 
232 	tscDelta = endTSC - startTSC;
233 	expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
234 	conversionFactor = (double)tscDelta / (double)expired;
235 }
236 
237 
238 static void
calculate_cpu_conversion_factor(uint8 channel)239 calculate_cpu_conversion_factor(uint8 channel)
240 {
241 	// When using channel 2, enable the input and disable the speaker.
242 	if (channel == 2) {
243 		uint8 control = in8(PIT_CHANNEL_2_CONTROL);
244 		control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
245 		control |= PIT_CHANNEL_2_GATE_HIGH;
246 		out8(control, PIT_CHANNEL_2_CONTROL);
247 	}
248 
249 	uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
250 	double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
251 	uint16 expired;
252 
253 	uint32 quickSampleCount = 1;
254 	uint32 slowSampleCount = 1;
255 
256 quick_sample:
257 	calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
258 		expired);
259 
260 slower_sample:
261 	calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
262 		expired);
263 
264 	double deviation = conversionFactorQuick / conversionFactorSlower;
265 	if (deviation < 0.99 || deviation > 1.01) {
266 		// We might have been hit by a SMI or were otherwise stalled
267 		if (quickSampleCount++ < MAX_QUICK_SAMPLES)
268 			goto quick_sample;
269 	}
270 
271 	// Slow sample
272 	calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
273 		expired);
274 
275 	deviation = conversionFactorSlower / conversionFactorSlow;
276 	if (deviation < 0.99 || deviation > 1.01) {
277 		// We might have been hit by a SMI or were otherwise stalled
278 		if (slowSampleCount++ < MAX_SLOW_SAMPLES)
279 			goto slower_sample;
280 	}
281 
282 	// Scale the TSC delta to timer units
283 	tscDeltaSlow *= TIMER_CLKNUM_HZ;
284 
285 	uint64 clockSpeed = tscDeltaSlow / expired;
286 	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
287 		/ uint128(tscDeltaSlow);
288 
289 #ifdef TRACE_CPU
290 	if (clockSpeed > 1000000000LL) {
291 		dprintf("CPU at %lld.%03Ld GHz\n", clockSpeed / 1000000000LL,
292 			(clockSpeed % 1000000000LL) / 1000000LL);
293 	} else {
294 		dprintf("CPU at %lld.%03Ld MHz\n", clockSpeed / 1000000LL,
295 			(clockSpeed % 1000000LL) / 1000LL);
296 	}
297 #endif
298 
299 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
300 	gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
301 	//dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
302 
303 	if (quickSampleCount > 1) {
304 		dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n",
305 			quickSampleCount);
306 	}
307 
308 	if (slowSampleCount > 1) {
309 		dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n",
310 			slowSampleCount);
311 	}
312 
313 	if (channel == 2) {
314 		// Set the gate low again
315 		out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
316 			PIT_CHANNEL_2_CONTROL);
317 	}
318 }
319 
320 
321 void
determine_cpu_conversion_factor(uint8 channel)322 determine_cpu_conversion_factor(uint8 channel)
323 {
324 	// Before using the calibration loop, check if we are on a hypervisor.
325 	cpuid_info info;
326 	if (get_current_cpuid(&info, 1, 0) == B_OK
327 			&& (info.regs.ecx & IA32_FEATURE_EXT_HYPERVISOR) != 0) {
328 		get_current_cpuid(&info, 0x40000000, 0);
329 		const uint32 maxVMM = info.regs.eax;
330 		if (maxVMM >= 0x40000010) {
331 			get_current_cpuid(&info, 0x40000010, 0);
332 
333 			uint64 clockSpeed = uint64(info.regs.eax) * 1000;
334 			gTimeConversionFactor = (uint64(1000) << 32) / info.regs.eax;
335 
336 			gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
337 			gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
338 
339 			dprintf("TSC frequency read from hypervisor CPUID leaf\n");
340 			return;
341 		}
342 	}
343 
344 	calculate_cpu_conversion_factor(channel);
345 }
346 
347 
348 void
ucode_load(BootVolume & volume)349 ucode_load(BootVolume& volume)
350 {
351 	cpuid_info info;
352 	if (get_current_cpuid(&info, 0, 0) != B_OK)
353 		return;
354 
355 	bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0;
356 	bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0;
357 
358 	if (!isIntel && !isAmd)
359 		return;
360 
361 	if (get_current_cpuid(&info, 1, 0) != B_OK)
362 		return;
363 
364 	char path[128];
365 	int family = info.eax_1.family;
366 	int model = info.eax_1.model;
367 	if (family == 0x6 || family == 0xf) {
368 		family += info.eax_1.extended_family;
369 		model += (info.eax_1.extended_model << 4);
370 	}
371 	if (isIntel) {
372 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/"
373 			"%02x-%02x-%02x", family, model, info.eax_1.stepping);
374 	} else if (family < 0x15) {
375 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
376 			"microcode_amd.bin");
377 	} else {
378 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
379 			"microcode_amd_fam%02xh.bin", family);
380 	}
381 	dprintf("ucode_load: %s\n", path);
382 
383 	int fd = open_from(volume.RootDirectory(), path, O_RDONLY);
384 	if (fd < B_OK) {
385 		dprintf("ucode_load: couldn't find microcode\n");
386 		return;
387 	}
388 	struct stat stat;
389 	if (fstat(fd, &stat) < 0) {
390 		dprintf("ucode_load: couldn't stat microcode file\n");
391 		close(fd);
392 		return;
393 	}
394 
395 	ssize_t length = stat.st_size;
396 
397 	// 16-byte alignment required
398 	void *buffer = kernel_args_malloc(length, 16);
399 	if (buffer != NULL) {
400 		if (read(fd, buffer, length) != length) {
401 			dprintf("ucode_load: couldn't read microcode file\n");
402 			kernel_args_free(buffer);
403 		} else {
404 			gKernelArgs.ucode_data = buffer;
405 			gKernelArgs.ucode_data_size = length;
406 			dprintf("ucode_load: microcode file read in memory\n");
407 		}
408 	}
409 
410 	close(fd);
411 }
412 
413 
414 extern "C" bigtime_t
system_time()415 system_time()
416 {
417 	uint64 tsc = rdtsc_fenced();
418 	uint64 lo = (uint32)tsc;
419 	uint64 hi = tsc >> 32;
420 	return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor;
421 }
422 
423 
424 extern "C" void
spin(bigtime_t microseconds)425 spin(bigtime_t microseconds)
426 {
427 	bigtime_t time = system_time();
428 
429 	while ((system_time() - time) < microseconds)
430 		asm volatile ("pause;");
431 }
432 
433 
434 extern "C" status_t
boot_arch_cpu_init()435 boot_arch_cpu_init()
436 {
437     // Nothing really to init on x86
438     return B_OK;
439 }
440 
441 
442 extern "C" void
arch_ucode_load(BootVolume & volume)443 arch_ucode_load(BootVolume& volume)
444 {
445     ucode_load(volume);
446 }
447