xref: /haiku/src/system/boot/arch/x86/arch_cpu.cpp (revision 4c8e85b316c35a9161f5a1c50ad70bc91c83a76f)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7  * licensed under the NewOS license.
8  */
9 
10 
11 #include <OS.h>
12 
13 #include <boot/arch/x86/arch_cpu.h>
14 #include <boot/kernel_args.h>
15 #include <boot/platform.h>
16 #include <boot/stage2.h>
17 #include <boot/stdio.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/x86/arch_cpu.h>
21 #include <arch_kernel.h>
22 #include <arch_system_info.h>
23 
24 #include <string.h>
25 
26 #if __GNUC__ > 2
27 #include <x86intrin.h>
28 #else
29 static inline uint64_t __rdtsc()
30 {
31 	uint64 tsc;
32 
33 	asm volatile ("rdtsc\n" : "=A"(tsc));
34 
35 	return tsc;
36 }
37 #endif
38 
39 
40 uint32 gTimeConversionFactor;
41 
42 // PIT definitions
43 #define TIMER_CLKNUM_HZ					(14318180 / 12)
44 
45 // PIT IO Ports
46 #define PIT_CHANNEL_PORT_BASE			0x40
47 #define PIT_CONTROL						0x43
48 
49 // Channel selection
50 #define PIT_SELECT_CHANNEL_SHIFT		6
51 
52 // Access mode
53 #define PIT_ACCESS_LATCH_COUNTER		(0 << 4)
54 #define PIT_ACCESS_LOW_BYTE_ONLY		(1 << 4)
55 #define PIT_ACCESS_HIGH_BYTE_ONLY		(2 << 4)
56 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE	(3 << 4)
57 
58 // Operating modes
59 #define PIT_MODE_INTERRUPT_ON_0			(0 << 1)
60 #define PIT_MODE_HARDWARE_COUNTDOWN		(1 << 1)
61 #define PIT_MODE_RATE_GENERATOR			(2 << 1)
62 #define PIT_MODE_SQUARE_WAVE_GENERATOR	(3 << 1)
63 #define PIT_MODE_SOFTWARE_STROBE		(4 << 1)
64 #define PIT_MODE_HARDWARE_STROBE		(5 << 1)
65 
66 // BCD/Binary mode
67 #define PIT_BINARY_MODE					0
68 #define PIT_BCD_MODE					1
69 
70 // Channel 2 control (speaker)
71 #define PIT_CHANNEL_2_CONTROL			0x61
72 #define PIT_CHANNEL_2_GATE_HIGH			0x01
73 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK	~0x02
74 
75 // Maximum values
76 #define MAX_QUICK_SAMPLES				20
77 #define MAX_SLOW_SAMPLES				20
78 	// TODO: These are arbitrary. They are here to avoid spinning indefinitely
79 	// if the TSC just isn't stable and we can't get our desired error range.
80 
81 
82 struct uint128 {
83 	uint128(uint64 low, uint64 high = 0)
84 		:
85 		low(low),
86 		high(high)
87 	{
88 	}
89 
90 	bool operator<(const uint128& other) const
91 	{
92 		return high < other.high || (high == other.high && low < other.low);
93 	}
94 
95 	bool operator<=(const uint128& other) const
96 	{
97 		return !(other < *this);
98 	}
99 
100 	uint128 operator<<(int count) const
101 	{
102 		if (count == 0)
103 			return *this;
104 
105 		if (count >= 128)
106 			return 0;
107 
108 		if (count >= 64)
109 			return uint128(0, low << (count - 64));
110 
111 		return uint128(low << count, (high << count) | (low >> (64 - count)));
112 	}
113 
114 	uint128 operator>>(int count) const
115 	{
116 		if (count == 0)
117 			return *this;
118 
119 		if (count >= 128)
120 			return 0;
121 
122 		if (count >= 64)
123 			return uint128(high >> (count - 64), 0);
124 
125 		return uint128((low >> count) | (high << (64 - count)), high >> count);
126 	}
127 
128 	uint128 operator+(const uint128& other) const
129 	{
130 		uint64 resultLow = low + other.low;
131 		return uint128(resultLow,
132 			high + other.high + (resultLow < low ? 1 : 0));
133 	}
134 
135 	uint128 operator-(const uint128& other) const
136 	{
137 		uint64 resultLow = low - other.low;
138 		return uint128(resultLow,
139 			high - other.high - (resultLow > low ? 1 : 0));
140 	}
141 
142 	uint128 operator*(uint32 other) const
143 	{
144 		uint64 resultMid = (low >> 32) * other;
145 		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
146 		return uint128(resultLow,
147 			high * other + (resultMid >> 32)
148 				+ (resultLow < resultMid << 32 ? 1 : 0));
149 	}
150 
151 	uint128 operator/(const uint128& other) const
152 	{
153 		int shift = 0;
154 		uint128 shiftedDivider = other;
155 		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
156 			shiftedDivider = shiftedDivider << 1;
157 			shift++;
158 		}
159 
160 		uint128 result = 0;
161 		uint128 temp = *this;
162 		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
163 			if (shiftedDivider <= temp) {
164 				result = result + (uint128(1) << shift);
165 				temp = temp - shiftedDivider;
166 			}
167 		}
168 
169 		return result;
170 	}
171 
172 	operator uint64() const
173 	{
174 		return low;
175 	}
176 
177 private:
178 	uint64	low;
179 	uint64	high;
180 };
181 
182 
183 static inline void
184 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
185 	double& conversionFactor, uint16& expired)
186 {
187 	uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
188 	out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
189 		| PIT_BINARY_MODE, PIT_CONTROL);
190 
191 	// Fill in count of 0xffff, low then high byte
192 	uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
193 	out8(0xff, channelPort);
194 	out8(0xff, channelPort);
195 
196 	// Read the count back once to delay the start. This ensures that we've
197 	// waited long enough for the counter to actually start counting down, as
198 	// this only happens on the next clock cycle after reload.
199 	in8(channelPort);
200 	in8(channelPort);
201 
202 	// We're expecting the PIT to be at the starting position (high byte 0xff)
203 	// as we just programmed it, but if it isn't we wait for it to wrap.
204 	uint8 startLow;
205 	uint8 startHigh;
206 	do {
207 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
208 		startLow = in8(channelPort);
209 		startHigh = in8(channelPort);
210 	} while (startHigh != 255);
211 
212 	// Read in the first TSC value
213 	uint64 startTSC = __rdtsc();
214 
215 	// Wait for the PIT to count down to our desired value
216 	uint8 endLow;
217 	uint8 endHigh;
218 	do {
219 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
220 		endLow = in8(channelPort);
221 		endHigh = in8(channelPort);
222 	} while (endHigh > desiredHighByte);
223 
224 	// And read the second TSC value
225 	uint64 endTSC = __rdtsc();
226 
227 	tscDelta = endTSC - startTSC;
228 	expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
229 	conversionFactor = (double)tscDelta / (double)expired;
230 }
231 
232 
233 void
234 calculate_cpu_conversion_factor(uint8 channel)
235 {
236 	// When using channel 2, enable the input and disable the speaker.
237 	if (channel == 2) {
238 		uint8 control = in8(PIT_CHANNEL_2_CONTROL);
239 		control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
240 		control |= PIT_CHANNEL_2_GATE_HIGH;
241 		out8(control, PIT_CHANNEL_2_CONTROL);
242 	}
243 
244 	uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
245 	double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
246 	uint16 expired;
247 
248 	uint32 quickSampleCount = 1;
249 	uint32 slowSampleCount = 1;
250 
251 quick_sample:
252 	calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
253 		expired);
254 
255 slower_sample:
256 	calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
257 		expired);
258 
259 	double deviation = conversionFactorQuick / conversionFactorSlower;
260 	if (deviation < 0.99 || deviation > 1.01) {
261 		// We might have been hit by a SMI or were otherwise stalled
262 		if (quickSampleCount++ < MAX_QUICK_SAMPLES)
263 			goto quick_sample;
264 	}
265 
266 	// Slow sample
267 	calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
268 		expired);
269 
270 	deviation = conversionFactorSlower / conversionFactorSlow;
271 	if (deviation < 0.99 || deviation > 1.01) {
272 		// We might have been hit by a SMI or were otherwise stalled
273 		if (slowSampleCount++ < MAX_SLOW_SAMPLES)
274 			goto slower_sample;
275 	}
276 
277 	// Scale the TSC delta to timer units
278 	tscDeltaSlow *= TIMER_CLKNUM_HZ;
279 
280 	uint64 clockSpeed = tscDeltaSlow / expired;
281 	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
282 		/ uint128(tscDeltaSlow);
283 
284 #ifdef TRACE_CPU
285 	if (clockSpeed > 1000000000LL) {
286 		dprintf("CPU at %Ld.%03Ld GHz\n", clockSpeed / 1000000000LL,
287 			(clockSpeed % 1000000000LL) / 1000000LL);
288 	} else {
289 		dprintf("CPU at %Ld.%03Ld MHz\n", clockSpeed / 1000000LL,
290 			(clockSpeed % 1000000LL) / 1000LL);
291 	}
292 #endif
293 
294 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
295 	gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
296 	//dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
297 
298 	if (quickSampleCount > 1) {
299 		dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n",
300 			quickSampleCount);
301 	}
302 
303 	if (slowSampleCount > 1) {
304 		dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n",
305 			slowSampleCount);
306 	}
307 
308 	if (channel == 2) {
309 		// Set the gate low again
310 		out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
311 			PIT_CHANNEL_2_CONTROL);
312 	}
313 }
314 
315 extern int open_maybe_packaged(BootVolume& volume, const char* path,
316 	int openMode);
317 
318 void
319 ucode_load(BootVolume& volume)
320 {
321 	cpuid_info info;
322 	if (get_current_cpuid(&info, 0, 0) != B_OK
323 		|| strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) != 0)
324 		return;
325 
326 	if (get_current_cpuid(&info, 1, 0) != B_OK)
327 		return;
328 
329 	char path[128];
330 	int family = info.eax_1.family;
331 	int model = info.eax_1.model;
332 	if (family == 0x6 || family == 0xf) {
333 		family += info.eax_1.extended_family;
334 		model += (info.eax_1.extended_model << 4);
335 	}
336 	snprintf(path, sizeof(path), "system/data/firmware/intel-ucode/"
337 		"%02x-%02x-%02x", family, model, info.eax_1.stepping);
338 	dprintf("ucode_load: %s\n", path);
339 
340 	int fd = open_maybe_packaged(volume, path, O_RDONLY);
341 	if (fd < B_OK) {
342 		dprintf("ucode_load: couldn't find microcode\n");
343 		return;
344 	}
345 	struct stat stat;
346 	if (fstat(fd, &stat) < 0) {
347 		dprintf("ucode_load: couldn't stat microcode file\n");
348 		close(fd);
349 		return;
350 	}
351 
352 	ssize_t length = stat.st_size;
353 
354 	// 16-byte alignment required
355 	void *buffer = kernel_args_malloc(length, 16);
356 	if (buffer != NULL) {
357 		if (read(fd, buffer, length) != length) {
358 			dprintf("ucode_load: couldn't read microcode file\n");
359 			kernel_args_free(buffer);
360 		} else {
361 			gKernelArgs.ucode_data = buffer;
362 			gKernelArgs.ucode_data_size = length;
363 			dprintf("ucode_load: microcode file read in memory\n");
364 		}
365 	}
366 
367 	close(fd);
368 }
369 
370 
371 extern "C" bigtime_t
372 system_time()
373 {
374 	uint64 tsc = __rdtsc();
375 	uint64 lo = (uint32)tsc;
376 	uint64 hi = tsc >> 32;
377 	return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor;
378 }
379 
380 
381 extern "C" void
382 spin(bigtime_t microseconds)
383 {
384 	bigtime_t time = system_time();
385 
386 	while ((system_time() - time) < microseconds)
387 		asm volatile ("pause;");
388 }
389 
390 
391 extern "C" status_t
392 boot_arch_cpu_init()
393 {
394     // Nothing really to init on x86
395     return B_OK;
396 }
397 
398 
399 extern "C" void
400 arch_ucode_load(BootVolume& volume)
401 {
402     ucode_load(volume);
403 }
404