xref: /haiku/src/system/boot/arch/x86/arch_cpu.cpp (revision 125b262675217084e0c59014b4a98f724f1c4fb3)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7  * licensed under the NewOS license.
8  */
9 
10 
11 #include <OS.h>
12 
13 #include <boot/arch/x86/arch_cpu.h>
14 #include <boot/kernel_args.h>
15 #include <boot/platform.h>
16 #include <boot/stage2.h>
17 #include <boot/stdio.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/x86/arch_cpu.h>
21 #include <arch_kernel.h>
22 #include <arch_system_info.h>
23 
24 #include <string.h>
25 
26 #if __GNUC__ > 2
27 #include <x86intrin.h>
28 #else
29 static inline uint64_t __rdtsc()
30 {
31 	uint64 tsc;
32 
33 	asm volatile ("rdtsc\n" : "=A"(tsc));
34 
35 	return tsc;
36 }
37 #endif
38 
39 
40 uint32 gTimeConversionFactor;
41 
42 // PIT definitions
43 #define TIMER_CLKNUM_HZ					(14318180 / 12)
44 
45 // PIT IO Ports
46 #define PIT_CHANNEL_PORT_BASE			0x40
47 #define PIT_CONTROL						0x43
48 
49 // Channel selection
50 #define PIT_SELECT_CHANNEL_SHIFT		6
51 
52 // Access mode
53 #define PIT_ACCESS_LATCH_COUNTER		(0 << 4)
54 #define PIT_ACCESS_LOW_BYTE_ONLY		(1 << 4)
55 #define PIT_ACCESS_HIGH_BYTE_ONLY		(2 << 4)
56 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE	(3 << 4)
57 
58 // Operating modes
59 #define PIT_MODE_INTERRUPT_ON_0			(0 << 1)
60 #define PIT_MODE_HARDWARE_COUNTDOWN		(1 << 1)
61 #define PIT_MODE_RATE_GENERATOR			(2 << 1)
62 #define PIT_MODE_SQUARE_WAVE_GENERATOR	(3 << 1)
63 #define PIT_MODE_SOFTWARE_STROBE		(4 << 1)
64 #define PIT_MODE_HARDWARE_STROBE		(5 << 1)
65 
66 // BCD/Binary mode
67 #define PIT_BINARY_MODE					0
68 #define PIT_BCD_MODE					1
69 
70 // Channel 2 control (speaker)
71 #define PIT_CHANNEL_2_CONTROL			0x61
72 #define PIT_CHANNEL_2_GATE_HIGH			0x01
73 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK	~0x02
74 
75 // Maximum values
76 #define MAX_QUICK_SAMPLES				20
77 #define MAX_SLOW_SAMPLES				20
78 	// TODO: These are arbitrary. They are here to avoid spinning indefinitely
79 	// if the TSC just isn't stable and we can't get our desired error range.
80 
81 
82 #ifdef __SIZEOF_INT128__
83 typedef unsigned __int128 uint128;
84 #else
85 struct uint128 {
86 	uint128(uint64 low, uint64 high = 0)
87 		:
88 		low(low),
89 		high(high)
90 	{
91 	}
92 
93 	bool operator<(const uint128& other) const
94 	{
95 		return high < other.high || (high == other.high && low < other.low);
96 	}
97 
98 	bool operator<=(const uint128& other) const
99 	{
100 		return !(other < *this);
101 	}
102 
103 	uint128 operator<<(int count) const
104 	{
105 		if (count == 0)
106 			return *this;
107 
108 		if (count >= 128)
109 			return 0;
110 
111 		if (count >= 64)
112 			return uint128(0, low << (count - 64));
113 
114 		return uint128(low << count, (high << count) | (low >> (64 - count)));
115 	}
116 
117 	uint128 operator>>(int count) const
118 	{
119 		if (count == 0)
120 			return *this;
121 
122 		if (count >= 128)
123 			return 0;
124 
125 		if (count >= 64)
126 			return uint128(high >> (count - 64), 0);
127 
128 		return uint128((low >> count) | (high << (64 - count)), high >> count);
129 	}
130 
131 	uint128 operator+(const uint128& other) const
132 	{
133 		uint64 resultLow = low + other.low;
134 		return uint128(resultLow,
135 			high + other.high + (resultLow < low ? 1 : 0));
136 	}
137 
138 	uint128 operator-(const uint128& other) const
139 	{
140 		uint64 resultLow = low - other.low;
141 		return uint128(resultLow,
142 			high - other.high - (resultLow > low ? 1 : 0));
143 	}
144 
145 	uint128 operator*(uint32 other) const
146 	{
147 		uint64 resultMid = (low >> 32) * other;
148 		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
149 		return uint128(resultLow,
150 			high * other + (resultMid >> 32)
151 				+ (resultLow < resultMid << 32 ? 1 : 0));
152 	}
153 
154 	uint128 operator/(const uint128& other) const
155 	{
156 		int shift = 0;
157 		uint128 shiftedDivider = other;
158 		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
159 			shiftedDivider = shiftedDivider << 1;
160 			shift++;
161 		}
162 
163 		uint128 result = 0;
164 		uint128 temp = *this;
165 		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
166 			if (shiftedDivider <= temp) {
167 				result = result + (uint128(1) << shift);
168 				temp = temp - shiftedDivider;
169 			}
170 		}
171 
172 		return result;
173 	}
174 
175 	operator uint64() const
176 	{
177 		return low;
178 	}
179 
180 private:
181 	uint64	low;
182 	uint64	high;
183 };
184 #endif
185 
186 
187 static inline void
188 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
189 	double& conversionFactor, uint16& expired)
190 {
191 	uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
192 	out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
193 		| PIT_BINARY_MODE, PIT_CONTROL);
194 
195 	// Fill in count of 0xffff, low then high byte
196 	uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
197 	out8(0xff, channelPort);
198 	out8(0xff, channelPort);
199 
200 	// Read the count back once to delay the start. This ensures that we've
201 	// waited long enough for the counter to actually start counting down, as
202 	// this only happens on the next clock cycle after reload.
203 	in8(channelPort);
204 	in8(channelPort);
205 
206 	// We're expecting the PIT to be at the starting position (high byte 0xff)
207 	// as we just programmed it, but if it isn't we wait for it to wrap.
208 	uint8 startLow;
209 	uint8 startHigh;
210 	do {
211 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
212 		startLow = in8(channelPort);
213 		startHigh = in8(channelPort);
214 	} while (startHigh != 255);
215 
216 	// Read in the first TSC value
217 	uint64 startTSC = __rdtsc();
218 
219 	// Wait for the PIT to count down to our desired value
220 	uint8 endLow;
221 	uint8 endHigh;
222 	do {
223 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
224 		endLow = in8(channelPort);
225 		endHigh = in8(channelPort);
226 	} while (endHigh > desiredHighByte);
227 
228 	// And read the second TSC value
229 	uint64 endTSC = __rdtsc();
230 
231 	tscDelta = endTSC - startTSC;
232 	expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
233 	conversionFactor = (double)tscDelta / (double)expired;
234 }
235 
236 
237 void
238 calculate_cpu_conversion_factor(uint8 channel)
239 {
240 	// When using channel 2, enable the input and disable the speaker.
241 	if (channel == 2) {
242 		uint8 control = in8(PIT_CHANNEL_2_CONTROL);
243 		control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
244 		control |= PIT_CHANNEL_2_GATE_HIGH;
245 		out8(control, PIT_CHANNEL_2_CONTROL);
246 	}
247 
248 	uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
249 	double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
250 	uint16 expired;
251 
252 	uint32 quickSampleCount = 1;
253 	uint32 slowSampleCount = 1;
254 
255 quick_sample:
256 	calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
257 		expired);
258 
259 slower_sample:
260 	calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
261 		expired);
262 
263 	double deviation = conversionFactorQuick / conversionFactorSlower;
264 	if (deviation < 0.99 || deviation > 1.01) {
265 		// We might have been hit by a SMI or were otherwise stalled
266 		if (quickSampleCount++ < MAX_QUICK_SAMPLES)
267 			goto quick_sample;
268 	}
269 
270 	// Slow sample
271 	calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
272 		expired);
273 
274 	deviation = conversionFactorSlower / conversionFactorSlow;
275 	if (deviation < 0.99 || deviation > 1.01) {
276 		// We might have been hit by a SMI or were otherwise stalled
277 		if (slowSampleCount++ < MAX_SLOW_SAMPLES)
278 			goto slower_sample;
279 	}
280 
281 	// Scale the TSC delta to timer units
282 	tscDeltaSlow *= TIMER_CLKNUM_HZ;
283 
284 	uint64 clockSpeed = tscDeltaSlow / expired;
285 	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
286 		/ uint128(tscDeltaSlow);
287 
288 #ifdef TRACE_CPU
289 	if (clockSpeed > 1000000000LL) {
290 		dprintf("CPU at %Ld.%03Ld GHz\n", clockSpeed / 1000000000LL,
291 			(clockSpeed % 1000000000LL) / 1000000LL);
292 	} else {
293 		dprintf("CPU at %Ld.%03Ld MHz\n", clockSpeed / 1000000LL,
294 			(clockSpeed % 1000000LL) / 1000LL);
295 	}
296 #endif
297 
298 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
299 	gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
300 	//dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
301 
302 	if (quickSampleCount > 1) {
303 		dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n",
304 			quickSampleCount);
305 	}
306 
307 	if (slowSampleCount > 1) {
308 		dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n",
309 			slowSampleCount);
310 	}
311 
312 	if (channel == 2) {
313 		// Set the gate low again
314 		out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
315 			PIT_CHANNEL_2_CONTROL);
316 	}
317 }
318 
319 
320 void
321 ucode_load(BootVolume& volume)
322 {
323 	cpuid_info info;
324 	if (get_current_cpuid(&info, 0, 0) != B_OK)
325 		return;
326 
327 	bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0;
328 	bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0;
329 
330 	if (!isIntel && !isAmd)
331 		return;
332 
333 	if (get_current_cpuid(&info, 1, 0) != B_OK)
334 		return;
335 
336 	char path[128];
337 	int family = info.eax_1.family;
338 	int model = info.eax_1.model;
339 	if (family == 0x6 || family == 0xf) {
340 		family += info.eax_1.extended_family;
341 		model += (info.eax_1.extended_model << 4);
342 	}
343 	if (isIntel) {
344 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/"
345 			"%02x-%02x-%02x", family, model, info.eax_1.stepping);
346 	} else if (family < 0x15) {
347 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
348 			"microcode_amd.bin");
349 	} else {
350 		snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
351 			"microcode_amd_fam%02xh.bin", family);
352 	}
353 	dprintf("ucode_load: %s\n", path);
354 
355 	int fd = open_from(volume.RootDirectory(), path, O_RDONLY);
356 	if (fd < B_OK) {
357 		dprintf("ucode_load: couldn't find microcode\n");
358 		return;
359 	}
360 	struct stat stat;
361 	if (fstat(fd, &stat) < 0) {
362 		dprintf("ucode_load: couldn't stat microcode file\n");
363 		close(fd);
364 		return;
365 	}
366 
367 	ssize_t length = stat.st_size;
368 
369 	// 16-byte alignment required
370 	void *buffer = kernel_args_malloc(length, 16);
371 	if (buffer != NULL) {
372 		if (read(fd, buffer, length) != length) {
373 			dprintf("ucode_load: couldn't read microcode file\n");
374 			kernel_args_free(buffer);
375 		} else {
376 			gKernelArgs.ucode_data = buffer;
377 			gKernelArgs.ucode_data_size = length;
378 			dprintf("ucode_load: microcode file read in memory\n");
379 		}
380 	}
381 
382 	close(fd);
383 }
384 
385 
386 extern "C" bigtime_t
387 system_time()
388 {
389 	uint64 tsc = __rdtsc();
390 	uint64 lo = (uint32)tsc;
391 	uint64 hi = tsc >> 32;
392 	return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor;
393 }
394 
395 
396 extern "C" void
397 spin(bigtime_t microseconds)
398 {
399 	bigtime_t time = system_time();
400 
401 	while ((system_time() - time) < microseconds)
402 		asm volatile ("pause;");
403 }
404 
405 
406 extern "C" status_t
407 boot_arch_cpu_init()
408 {
409     // Nothing really to init on x86
410     return B_OK;
411 }
412 
413 
414 extern "C" void
415 arch_ucode_load(BootVolume& volume)
416 {
417     ucode_load(volume);
418 }
419