xref: /haiku/src/system/boot/platform/bios_ia32/cpu.cpp (revision 3b07762c548ec4016dea480d1061577cd15ec614)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4  * Distributed under the terms of the MIT License.
5  *
6  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7  * licensed under the NewOS license.
8  */
9 
10 
11 #include "cpu.h"
12 
13 #include <OS.h>
14 #include <boot/platform.h>
15 #include <boot/stdio.h>
16 #include <boot/kernel_args.h>
17 #include <boot/stage2.h>
18 #include <arch/cpu.h>
19 #include <arch_kernel.h>
20 #include <arch_system_info.h>
21 
22 #include <string.h>
23 
24 
25 //#define TRACE_CPU
26 #ifdef TRACE_CPU
27 #	define TRACE(x) dprintf x
28 #else
29 #	define TRACE(x) ;
30 #endif
31 
32 
33 extern "C" uint64 rdtsc();
34 
35 uint32 gTimeConversionFactor;
36 
37 // PIT definitions
38 #define TIMER_CLKNUM_HZ					(14318180 / 12)
39 
40 // PIT IO Ports
41 #define PIT_CHANNEL_PORT_BASE			0x40
42 #define PIT_CONTROL						0x43
43 
44 // Channel selection
45 #define PIT_SELECT_CHANNEL_SHIFT		6
46 
47 // Access mode
48 #define PIT_ACCESS_LATCH_COUNTER		(0 << 4)
49 #define PIT_ACCESS_LOW_BYTE_ONLY		(1 << 4)
50 #define PIT_ACCESS_HIGH_BYTE_ONLY		(2 << 4)
51 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE	(3 << 4)
52 
53 // Operating modes
54 #define PIT_MODE_INTERRUPT_ON_0			(0 << 1)
55 #define PIT_MODE_HARDWARE_COUNTDOWN		(1 << 1)
56 #define PIT_MODE_RATE_GENERATOR			(2 << 1)
57 #define PIT_MODE_SQUARE_WAVE_GENERATOR	(3 << 1)
58 #define PIT_MODE_SOFTWARE_STROBE		(4 << 1)
59 #define PIT_MODE_HARDWARE_STROBE		(5 << 1)
60 
61 // BCD/Binary mode
62 #define PIT_BINARY_MODE					0
63 #define PIT_BCD_MODE					1
64 
65 // Channel 2 control (speaker)
66 #define PIT_CHANNEL_2_CONTROL			0x61
67 #define PIT_CHANNEL_2_GATE_HIGH			0x01
68 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK	~0x02
69 
70 
71 // Maximum values
72 #define MAX_QUICK_SAMPLES				20
73 #define MAX_SLOW_SAMPLES				20
74 	// TODO: These are arbitrary. They are here to avoid spinning indefinitely
75 	// if the TSC just isn't stable and we can't get our desired error range.
76 
77 
78 #define CPUID_EFLAGS	(1UL << 21)
79 #define RDTSC_FEATURE	(1UL << 4)
80 
81 
82 struct uint128 {
83 	uint128(uint64 low, uint64 high = 0)
84 		:
85 		low(low),
86 		high(high)
87 	{
88 	}
89 
90 	bool operator<(const uint128& other) const
91 	{
92 		return high < other.high || (high == other.high && low < other.low);
93 	}
94 
95 	bool operator<=(const uint128& other) const
96 	{
97 		return !(other < *this);
98 	}
99 
100 	uint128 operator<<(int count) const
101 	{
102 		if (count == 0)
103 			return *this;
104 
105 		if (count >= 128)
106 			return 0;
107 
108 		if (count >= 64)
109 			return uint128(0, low << (count - 64));
110 
111 		return uint128(low << count, (high << count) | (low >> (64 - count)));
112 	}
113 
114 	uint128 operator>>(int count) const
115 	{
116 		if (count == 0)
117 			return *this;
118 
119 		if (count >= 128)
120 			return 0;
121 
122 		if (count >= 64)
123 			return uint128(high >> (count - 64), 0);
124 
125 		return uint128((low >> count) | (high << (64 - count)), high >> count);
126 	}
127 
128 	uint128 operator+(const uint128& other) const
129 	{
130 		uint64 resultLow = low + other.low;
131 		return uint128(resultLow,
132 			high + other.high + (resultLow < low ? 1 : 0));
133 	}
134 
135 	uint128 operator-(const uint128& other) const
136 	{
137 		uint64 resultLow = low - other.low;
138 		return uint128(resultLow,
139 			high - other.high - (resultLow > low ? 1 : 0));
140 	}
141 
142 	uint128 operator*(uint32 other) const
143 	{
144 		uint64 resultMid = (low >> 32) * other;
145 		uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
146 		return uint128(resultLow,
147 			high * other + (resultMid >> 32)
148 				+ (resultLow < resultMid << 32 ? 1 : 0));
149 	}
150 
151 	uint128 operator/(const uint128& other) const
152 	{
153 		int shift = 0;
154 		uint128 shiftedDivider = other;
155 		while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
156 			shiftedDivider = shiftedDivider << 1;
157 			shift++;
158 		}
159 
160 		uint128 result = 0;
161 		uint128 temp = *this;
162 		for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
163 			if (shiftedDivider <= temp) {
164 				result = result + (uint128(1) << shift);
165 				temp = temp - shiftedDivider;
166 			}
167 		}
168 
169 		return result;
170 	}
171 
172 	operator uint64() const
173 	{
174 		return low;
175 	}
176 
177 private:
178 	uint64	low;
179 	uint64	high;
180 };
181 
182 
183 static inline void
184 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
185 	double& conversionFactor, uint16& expired)
186 {
187 	uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
188 	out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
189 		| PIT_BINARY_MODE, PIT_CONTROL);
190 
191 	// Fill in count of 0xffff, low then high byte
192 	uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
193 	out8(0xff, channelPort);
194 	out8(0xff, channelPort);
195 
196 	// Read the count back once to delay the start. This ensures that we've
197 	// waited long enough for the counter to actually start counting down, as
198 	// this only happens on the next clock cycle after reload.
199 	in8(channelPort);
200 	in8(channelPort);
201 
202 	// We're expecting the PIT to be at the starting position (high byte 0xff)
203 	// as we just programmed it, but if it isn't we wait for it to wrap.
204 	uint8 startLow;
205 	uint8 startHigh;
206 	do {
207 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
208 		startLow = in8(channelPort);
209 		startHigh = in8(channelPort);
210 	} while (startHigh != 255);
211 
212 	// Read in the first TSC value
213 	uint64 startTSC = rdtsc();
214 
215 	// Wait for the PIT to count down to our desired value
216 	uint8 endLow;
217 	uint8 endHigh;
218 	do {
219 		out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
220 		endLow = in8(channelPort);
221 		endHigh = in8(channelPort);
222 	} while (endHigh > desiredHighByte);
223 
224 	// And read the second TSC value
225 	uint64 endTSC = rdtsc();
226 
227 	tscDelta = endTSC - startTSC;
228 	expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
229 	conversionFactor = (double)tscDelta / (double)expired;
230 }
231 
232 
233 static void
234 calculate_cpu_conversion_factor()
235 {
236 	uint8 channel = 0;
237 
238 	// When using channel 2, enable the input and disable the speaker.
239 	if (channel == 2) {
240 		uint8 control = in8(PIT_CHANNEL_2_CONTROL);
241 		control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
242 		control |= PIT_CHANNEL_2_GATE_HIGH;
243 		out8(control, PIT_CHANNEL_2_CONTROL);
244 	}
245 
246 	uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
247 	double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
248 	uint16 expired;
249 
250 	uint32 quickSampleCount = 1;
251 	uint32 slowSampleCount = 1;
252 
253 quick_sample:
254 	calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
255 		expired);
256 
257 slower_sample:
258 	calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
259 		expired);
260 
261 	double deviation = conversionFactorQuick / conversionFactorSlower;
262 	if (deviation < 0.99 || deviation > 1.01) {
263 		// We might have been hit by a SMI or were otherwise stalled
264 		if (quickSampleCount++ < MAX_QUICK_SAMPLES)
265 			goto quick_sample;
266 	}
267 
268 	// Slow sample
269 	calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
270 		expired);
271 
272 	deviation = conversionFactorSlower / conversionFactorSlow;
273 	if (deviation < 0.99 || deviation > 1.01) {
274 		// We might have been hit by a SMI or were otherwise stalled
275 		if (slowSampleCount++ < MAX_SLOW_SAMPLES)
276 			goto slower_sample;
277 	}
278 
279 	// Scale the TSC delta to timer units
280 	tscDeltaSlow *= TIMER_CLKNUM_HZ;
281 
282 	uint64 clockSpeed = tscDeltaSlow / expired;
283 	gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
284 		/ uint128(tscDeltaSlow);
285 
286 #ifdef TRACE_CPU
287 	if (clockSpeed > 1000000000LL) {
288 		dprintf("CPU at %Ld.%03Ld GHz\n", clockSpeed / 1000000000LL,
289 			(clockSpeed % 1000000000LL) / 1000000LL);
290 	} else {
291 		dprintf("CPU at %Ld.%03Ld MHz\n", clockSpeed / 1000000LL,
292 			(clockSpeed % 1000000LL) / 1000LL);
293 	}
294 #endif
295 
296 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
297 	gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
298 	//dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
299 
300 	if (quickSampleCount > 1) {
301 		dprintf("needed %lu quick samples for TSC calibration\n",
302 			quickSampleCount);
303 	}
304 
305 	if (slowSampleCount > 1) {
306 		dprintf("needed %lu slow samples for TSC calibration\n",
307 			slowSampleCount);
308 	}
309 
310 	if (channel == 2) {
311 		// Set the gate low again
312 		out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
313 			PIT_CHANNEL_2_CONTROL);
314 	}
315 }
316 
317 
318 static status_t
319 check_cpu_features()
320 {
321 	// check the eflags register to see if the cpuid instruction exists
322 	if ((get_eflags() & CPUID_EFLAGS) == 0) {
323 		// it's not set yet, but maybe we can set it manually
324 		set_eflags(get_eflags() | CPUID_EFLAGS);
325 		if ((get_eflags() & CPUID_EFLAGS) == 0)
326 			return B_ERROR;
327 	}
328 
329 	cpuid_info info;
330 	if (get_current_cpuid(&info, 1, 0) != B_OK)
331 		return B_ERROR;
332 
333 	if ((info.eax_1.features & RDTSC_FEATURE) == 0) {
334 		// we currently require RDTSC
335 		return B_ERROR;
336 	}
337 
338 	return B_OK;
339 }
340 
341 
342 //	#pragma mark -
343 
344 
345 extern "C" void
346 spin(bigtime_t microseconds)
347 {
348 	bigtime_t time = system_time();
349 
350 	while ((system_time() - time) < microseconds)
351 		asm volatile ("pause;");
352 }
353 
354 
355 extern "C" void
356 cpu_init()
357 {
358 	if (check_cpu_features() != B_OK)
359 		panic("You need a Pentium or higher in order to boot!\n");
360 
361 	calculate_cpu_conversion_factor();
362 
363 	gKernelArgs.num_cpus = 1;
364 		// this will eventually be corrected later on
365 }
366 
367