xref: /haiku/src/system/boot/arch/x86/arch_cpu.cpp (revision e1c4049fed1047bdb957b0529e1921e97ef94770)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7  * licensed under the NewOS license.
8  */
9 
10 
11 #include <OS.h>
12 
13 #include <boot/arch/x86/arch_cpu.h>
14 #include <boot/kernel_args.h>
15 #include <boot/platform.h>
16 #include <boot/stage2.h>
17 #include <boot/stdio.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/x86/arch_cpu.h>
21 #include <arch_kernel.h>
22 #include <arch_system_info.h>
23 
24 #include <string.h>
25 
26 
27 uint32 gTimeConversionFactor;
28 
29 // PIT definitions
30 #define TIMER_CLKNUM_HZ					(14318180 / 12)
31 
32 // PIT IO Ports
33 #define PIT_CHANNEL_PORT_BASE			0x40
34 #define PIT_CONTROL						0x43
35 
36 // Channel selection
37 #define PIT_SELECT_CHANNEL_SHIFT		6
38 
39 // Access mode
40 #define PIT_ACCESS_LATCH_COUNTER		(0 << 4)
41 #define PIT_ACCESS_LOW_BYTE_ONLY		(1 << 4)
42 #define PIT_ACCESS_HIGH_BYTE_ONLY		(2 << 4)
43 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE	(3 << 4)
44 
45 // Operating modes
46 #define PIT_MODE_INTERRUPT_ON_0			(0 << 1)
47 #define PIT_MODE_HARDWARE_COUNTDOWN		(1 << 1)
48 #define PIT_MODE_RATE_GENERATOR			(2 << 1)
49 #define PIT_MODE_SQUARE_WAVE_GENERATOR	(3 << 1)
50 #define PIT_MODE_SOFTWARE_STROBE		(4 << 1)
51 #define PIT_MODE_HARDWARE_STROBE		(5 << 1)
52 
53 // BCD/Binary mode
54 #define PIT_BINARY_MODE					0
55 #define PIT_BCD_MODE					1
56 
57 // Channel 2 control (speaker)
58 #define PIT_CHANNEL_2_CONTROL			0x61
59 #define PIT_CHANNEL_2_GATE_HIGH			0x01
60 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK	~0x02
61 
62 // Maximum values
63 #define MAX_QUICK_SAMPLES				20
64 #define MAX_SLOW_SAMPLES				20
65 	// TODO: These are arbitrary. They are here to avoid spinning indefinitely
66 	// if the TSC just isn't stable and we can't get our desired error range.
67 
68 
69 #ifdef __SIZEOF_INT128__
70 typedef unsigned __int128 uint128;
71 #else
72 struct uint128 {
73 	uint128(uint64 low, uint64 high = 0)
74 		:
75 		low(low),
76 		high(high)
77 	{
78 	}
79 
80 	bool operator<(const uint128& other) const
81 	{
82 		return high < other.high || (high == other.high && low < other.low);
83 	}
84 
85 	bool operator<=(const uint128& other) const
86 	{
87 		return !(other < *this);
88 	}
89 
90 	uint128 operator<<(int count) const
91 	{
92 		if (count == 0)
93 			return *this;
94 
95 		if (count >= 128)
96 			return 0;
97 
98 		if (count >= 64)
99 			return uint128(0, low << (count - 64));
100 
101 		return uint128(low << count, (high << count) | (low >> (64 - count)));
102 	}
103 
104 	uint128 operator>>(int count) const
105 	{
106 		if (count == 0)
107 			return *this;
108 
109 		if (count >= 128)
110 			return 0;
111 
112 		if (count >= 64)
113 			return uint128(high >> (count - 64), 0);
114 
115 		return uint128((low >> count) | (high << (64 - count)), high >> count);
116 	}
117 
118 	uint128 operator+(const uint128& other) const
119 	{
120 		uint64 resultLow = low + other.low;
121 		return uint128(resultLow,
122 			high + other.high + (resultLow < low ? 1 : 0));
123 	}
124 
125 	uint128 operator-(const uint128& other) const
126 	{
127 		uint64 resultLow = low - other.low;
128 		return uint128(resultLow,
129 			high - other.high - (resultLow > low ? 1 : 0));
130 	}
131 
132 	uint128 operator*(uint32 other) const
133 	{
134 		uint64 resultMid = (low >> 32) * other;
135 		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
136 		return uint128(resultLow,
137 			high * other + (resultMid >> 32)
138 				+ (resultLow < resultMid << 32 ? 1 : 0));
139 	}
140 
141 	uint128 operator/(const uint128& other) const
142 	{
143 		int shift = 0;
144 		uint128 shiftedDivider = other;
145 		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
146 			shiftedDivider = shiftedDivider << 1;
147 			shift++;
148 		}
149 
150 		uint128 result = 0;
151 		uint128 temp = *this;
152 		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
153 			if (shiftedDivider <= temp) {
154 				result = result + (uint128(1) << shift);
155 				temp = temp - shiftedDivider;
156 			}
157 		}
158 
159 		return result;
160 	}
161 
162 	operator uint64() const
163 	{
164 		return low;
165 	}
166 
167 private:
168 	uint64	low;
169 	uint64	high;
170 };
171 #endif
172 
173 
174 static inline uint64_t
175 rdtsc_fenced()
176 {
177 	uint64 tsc;
178 
179 	// RDTSC is not serializing, nor does it drain the instruction stream.
180 	// RDTSCP does, but is not available everywhere. Other OSes seem to use
181 	// "CPUID" rather than MFENCE/LFENCE for serializing here during boot.
182 	asm volatile ("cpuid" : : : "eax", "ebx", "ecx", "edx");
183 
184 	asm volatile ("rdtsc" : "=A"(tsc));
185 
186 	return tsc;
187 }
188 
189 
190 static inline void
191 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
192 	double& conversionFactor, uint16& expired)
193 {
194 	uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
195 	out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
196 		| PIT_BINARY_MODE, PIT_CONTROL);
197 
198 	// Fill in count of 0xffff, low then high byte
199 	uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
200 	out8(0xff, channelPort);
201 	out8(0xff, channelPort);
202 
203 	// Read the count back once to delay the start. This ensures that we've
204 	// waited long enough for the counter to actually start counting down, as
205 	// this only happens on the next clock cycle after reload.
206 	in8(channelPort);
207 	in8(channelPort);
208 
209 	// We're expecting the PIT to be at the starting position (high byte 0xff)
210 	// as we just programmed it, but if it isn't we wait for it to wrap.
211 	uint8 startLow;
212 	uint8 startHigh;
213 	do {
214 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
215 		startLow = in8(channelPort);
216 		startHigh = in8(channelPort);
217 	} while (startHigh != 255);
218 
219 	// Read in the first TSC value
220 	uint64 startTSC = rdtsc_fenced();
221 
222 	// Wait for the PIT to count down to our desired value
223 	uint8 endLow;
224 	uint8 endHigh;
225 	do {
226 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
227 		endLow = in8(channelPort);
228 		endHigh = in8(channelPort);
229 	} while (endHigh > desiredHighByte);
230 
231 	// And read the second TSC value
232 	uint64 endTSC = rdtsc_fenced();
233 
234 	tscDelta = endTSC - startTSC;
235 	expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
236 	conversionFactor = (double)tscDelta / (double)expired;
237 }
238 
239 
240 static void
241 calculate_cpu_conversion_factor(uint8 channel)
242 {
243 	// When using channel 2, enable the input and disable the speaker.
244 	if (channel == 2) {
245 		uint8 control = in8(PIT_CHANNEL_2_CONTROL);
246 		control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
247 		control |= PIT_CHANNEL_2_GATE_HIGH;
248 		out8(control, PIT_CHANNEL_2_CONTROL);
249 	}
250 
251 	uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
252 	double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
253 	uint16 expired;
254 
255 	uint32 quickSampleCount = 1;
256 	uint32 slowSampleCount = 1;
257 
258 quick_sample:
259 	calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
260 		expired);
261 
262 slower_sample:
263 	calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
264 		expired);
265 
266 	double deviation = conversionFactorQuick / conversionFactorSlower;
267 	if (deviation < 0.99 || deviation > 1.01) {
268 		// We might have been hit by a SMI or were otherwise stalled
269 		if (quickSampleCount++ < MAX_QUICK_SAMPLES)
270 			goto quick_sample;
271 	}
272 
273 	// Slow sample
274 	calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
275 		expired);
276 
277 	deviation = conversionFactorSlower / conversionFactorSlow;
278 	if (deviation < 0.99 || deviation > 1.01) {
279 		// We might have been hit by a SMI or were otherwise stalled
280 		if (slowSampleCount++ < MAX_SLOW_SAMPLES)
281 			goto slower_sample;
282 	}
283 
284 	// Scale the TSC delta to timer units
285 	tscDeltaSlow *= TIMER_CLKNUM_HZ;
286 
287 	uint64 clockSpeed = tscDeltaSlow / expired;
288 	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
289 		/ uint128(tscDeltaSlow);
290 
291 #ifdef TRACE_CPU
292 	if (clockSpeed > 1000000000LL) {
293 		dprintf("CPU at %lld.%03Ld GHz\n", clockSpeed / 1000000000LL,
294 			(clockSpeed % 1000000000LL) / 1000000LL);
295 	} else {
296 		dprintf("CPU at %lld.%03Ld MHz\n", clockSpeed / 1000000LL,
297 			(clockSpeed % 1000000LL) / 1000LL);
298 	}
299 #endif
300 
301 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
302 	gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
303 	//dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
304 
305 	if (quickSampleCount > 1) {
306 		dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n",
307 			quickSampleCount);
308 	}
309 
310 	if (slowSampleCount > 1) {
311 		dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n",
312 			slowSampleCount);
313 	}
314 
315 	if (channel == 2) {
316 		// Set the gate low again
317 		out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
318 			PIT_CHANNEL_2_CONTROL);
319 	}
320 }
321 
322 
323 void
324 determine_cpu_conversion_factor(uint8 channel)
325 {
326 	// Before using the calibration loop, check if we are on a hypervisor.
327 	cpuid_info info;
328 	if (get_current_cpuid(&info, 1, 0) == B_OK
329 			&& (info.regs.ecx & IA32_FEATURE_EXT_HYPERVISOR) != 0) {
330 		get_current_cpuid(&info, 0x40000000, 0);
331 		const uint32 maxVMM = info.regs.eax;
332 		if (maxVMM >= 0x40000010) {
333 			get_current_cpuid(&info, 0x40000010, 0);
334 
335 			uint64 clockSpeed = uint64(info.regs.eax) * 1000;
336 			gTimeConversionFactor = (uint64(1000) << 32) / info.regs.eax;
337 
338 			gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
339 			gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
340 
341 			dprintf("TSC frequency read from hypervisor CPUID leaf\n");
342 			return;
343 		}
344 	}
345 
346 	calculate_cpu_conversion_factor(channel);
347 }
348 
349 
350 void
351 ucode_load(BootVolume& volume)
352 {
353 	cpuid_info info;
354 	if (get_current_cpuid(&info, 0, 0) != B_OK)
355 		return;
356 
357 	bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0;
358 	bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0;
359 
360 	if (!isIntel && !isAmd)
361 		return;
362 
363 	if (get_current_cpuid(&info, 1, 0) != B_OK)
364 		return;
365 
366 	char path[128];
367 	int family = info.eax_1.family;
368 	int model = info.eax_1.model;
369 	if (family == 0x6 || family == 0xf) {
370 		family += info.eax_1.extended_family;
371 		model += (info.eax_1.extended_model << 4);
372 	}
373 	if (isIntel) {
374 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/"
375 			"%02x-%02x-%02x", family, model, info.eax_1.stepping);
376 	} else if (family < 0x15) {
377 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
378 			"microcode_amd.bin");
379 	} else {
380 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
381 			"microcode_amd_fam%02xh.bin", family);
382 	}
383 	dprintf("ucode_load: %s\n", path);
384 
385 	int fd = open_from(volume.RootDirectory(), path, O_RDONLY);
386 	if (fd < B_OK) {
387 		dprintf("ucode_load: couldn't find microcode\n");
388 		return;
389 	}
390 	struct stat stat;
391 	if (fstat(fd, &stat) < 0) {
392 		dprintf("ucode_load: couldn't stat microcode file\n");
393 		close(fd);
394 		return;
395 	}
396 
397 	ssize_t length = stat.st_size;
398 
399 	// 16-byte alignment required
400 	void *buffer = kernel_args_malloc(length, 16);
401 	if (buffer != NULL) {
402 		if (read(fd, buffer, length) != length) {
403 			dprintf("ucode_load: couldn't read microcode file\n");
404 			kernel_args_free(buffer);
405 		} else {
406 			gKernelArgs.ucode_data = buffer;
407 			gKernelArgs.ucode_data_size = length;
408 			dprintf("ucode_load: microcode file read in memory\n");
409 		}
410 	}
411 
412 	close(fd);
413 }
414 
415 
416 extern "C" bigtime_t
417 system_time()
418 {
419 	uint64 tsc = rdtsc_fenced();
420 	uint64 lo = (uint32)tsc;
421 	uint64 hi = tsc >> 32;
422 	return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor;
423 }
424 
425 
426 extern "C" void
427 spin(bigtime_t microseconds)
428 {
429 	bigtime_t time = system_time();
430 
431 	while ((system_time() - time) < microseconds)
432 		asm volatile ("pause;");
433 }
434 
435 
436 extern "C" status_t
437 boot_arch_cpu_init()
438 {
439     // Nothing really to init on x86
440     return B_OK;
441 }
442 
443 
444 extern "C" void
445 arch_ucode_load(BootVolume& volume)
446 {
447     ucode_load(volume);
448 }
449