1 /*
2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
4 * Distributed under the terms of the MIT License.
5 *
6 * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
7 * licensed under the NewOS license.
8 */
9
10
11 #include <OS.h>
12
13 #include <boot/arch/x86/arch_cpu.h>
14 #include <boot/kernel_args.h>
15 #include <boot/platform.h>
16 #include <boot/stage2.h>
17 #include <boot/stdio.h>
18
19 #include <arch/cpu.h>
20 #include <arch/x86/arch_cpu.h>
21 #include <arch_kernel.h>
22 #include <arch_system_info.h>
23
24 #include <string.h>
25
26 #include <x86intrin.h>
27
28
29 uint32 gTimeConversionFactor;
30
31 // PIT definitions
32 #define TIMER_CLKNUM_HZ (14318180 / 12)
33
34 // PIT IO Ports
35 #define PIT_CHANNEL_PORT_BASE 0x40
36 #define PIT_CONTROL 0x43
37
38 // Channel selection
39 #define PIT_SELECT_CHANNEL_SHIFT 6
40
41 // Access mode
42 #define PIT_ACCESS_LATCH_COUNTER (0 << 4)
43 #define PIT_ACCESS_LOW_BYTE_ONLY (1 << 4)
44 #define PIT_ACCESS_HIGH_BYTE_ONLY (2 << 4)
45 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE (3 << 4)
46
47 // Operating modes
48 #define PIT_MODE_INTERRUPT_ON_0 (0 << 1)
49 #define PIT_MODE_HARDWARE_COUNTDOWN (1 << 1)
50 #define PIT_MODE_RATE_GENERATOR (2 << 1)
51 #define PIT_MODE_SQUARE_WAVE_GENERATOR (3 << 1)
52 #define PIT_MODE_SOFTWARE_STROBE (4 << 1)
53 #define PIT_MODE_HARDWARE_STROBE (5 << 1)
54
55 // BCD/Binary mode
56 #define PIT_BINARY_MODE 0
57 #define PIT_BCD_MODE 1
58
59 // Channel 2 control (speaker)
60 #define PIT_CHANNEL_2_CONTROL 0x61
61 #define PIT_CHANNEL_2_GATE_HIGH 0x01
62 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK ~0x02
63
64 // Maximum values
65 #define MAX_QUICK_SAMPLES 20
66 #define MAX_SLOW_SAMPLES 20
67 // TODO: These are arbitrary. They are here to avoid spinning indefinitely
68 // if the TSC just isn't stable and we can't get our desired error range.
69
70
71 #ifdef __SIZEOF_INT128__
72 typedef unsigned __int128 uint128;
73 #else
74 struct uint128 {
uint128uint12875 uint128(uint64 low, uint64 high = 0)
76 :
77 low(low),
78 high(high)
79 {
80 }
81
operator <uint12882 bool operator<(const uint128& other) const
83 {
84 return high < other.high || (high == other.high && low < other.low);
85 }
86
operator <=uint12887 bool operator<=(const uint128& other) const
88 {
89 return !(other < *this);
90 }
91
operator <<uint12892 uint128 operator<<(int count) const
93 {
94 if (count == 0)
95 return *this;
96
97 if (count >= 128)
98 return 0;
99
100 if (count >= 64)
101 return uint128(0, low << (count - 64));
102
103 return uint128(low << count, (high << count) | (low >> (64 - count)));
104 }
105
operator >>uint128106 uint128 operator>>(int count) const
107 {
108 if (count == 0)
109 return *this;
110
111 if (count >= 128)
112 return 0;
113
114 if (count >= 64)
115 return uint128(high >> (count - 64), 0);
116
117 return uint128((low >> count) | (high << (64 - count)), high >> count);
118 }
119
operator +uint128120 uint128 operator+(const uint128& other) const
121 {
122 uint64 resultLow = low + other.low;
123 return uint128(resultLow,
124 high + other.high + (resultLow < low ? 1 : 0));
125 }
126
operator -uint128127 uint128 operator-(const uint128& other) const
128 {
129 uint64 resultLow = low - other.low;
130 return uint128(resultLow,
131 high - other.high - (resultLow > low ? 1 : 0));
132 }
133
operator *uint128134 uint128 operator*(uint32 other) const
135 {
136 uint64 resultMid = (low >> 32) * other;
137 uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32);
138 return uint128(resultLow,
139 high * other + (resultMid >> 32)
140 + (resultLow < resultMid << 32 ? 1 : 0));
141 }
142
operator /uint128143 uint128 operator/(const uint128& other) const
144 {
145 int shift = 0;
146 uint128 shiftedDivider = other;
147 while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) {
148 shiftedDivider = shiftedDivider << 1;
149 shift++;
150 }
151
152 uint128 result = 0;
153 uint128 temp = *this;
154 for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) {
155 if (shiftedDivider <= temp) {
156 result = result + (uint128(1) << shift);
157 temp = temp - shiftedDivider;
158 }
159 }
160
161 return result;
162 }
163
operator uint64uint128164 operator uint64() const
165 {
166 return low;
167 }
168
169 private:
170 uint64 low;
171 uint64 high;
172 };
173 #endif
174
175
176 static inline uint64_t
rdtsc_fenced()177 rdtsc_fenced()
178 {
179 // RDTSC is not serializing, nor does it drain the instruction stream.
180 // RDTSCP does, but is not available everywhere. Other OSes seem to use
181 // "CPUID" rather than MFENCE/LFENCE for serializing here during boot.
182 asm volatile ("cpuid" : : : "eax", "ebx", "ecx", "edx");
183
184 return __rdtsc();
185 }
186
187
188 static inline void
calibration_loop(uint8 desiredHighByte,uint8 channel,uint64 & tscDelta,double & conversionFactor,uint16 & expired)189 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta,
190 double& conversionFactor, uint16& expired)
191 {
192 uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT;
193 out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0
194 | PIT_BINARY_MODE, PIT_CONTROL);
195
196 // Fill in count of 0xffff, low then high byte
197 uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel;
198 out8(0xff, channelPort);
199 out8(0xff, channelPort);
200
201 // Read the count back once to delay the start. This ensures that we've
202 // waited long enough for the counter to actually start counting down, as
203 // this only happens on the next clock cycle after reload.
204 in8(channelPort);
205 in8(channelPort);
206
207 // We're expecting the PIT to be at the starting position (high byte 0xff)
208 // as we just programmed it, but if it isn't we wait for it to wrap.
209 uint8 startLow;
210 uint8 startHigh;
211 do {
212 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
213 startLow = in8(channelPort);
214 startHigh = in8(channelPort);
215 } while (startHigh != 255);
216
217 // Read in the first TSC value
218 uint64 startTSC = rdtsc_fenced();
219
220 // Wait for the PIT to count down to our desired value
221 uint8 endLow;
222 uint8 endHigh;
223 do {
224 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL);
225 endLow = in8(channelPort);
226 endHigh = in8(channelPort);
227 } while (endHigh > desiredHighByte);
228
229 // And read the second TSC value
230 uint64 endTSC = rdtsc_fenced();
231
232 tscDelta = endTSC - startTSC;
233 expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow);
234 conversionFactor = (double)tscDelta / (double)expired;
235 }
236
237
238 static void
calculate_cpu_conversion_factor(uint8 channel)239 calculate_cpu_conversion_factor(uint8 channel)
240 {
241 // When using channel 2, enable the input and disable the speaker.
242 if (channel == 2) {
243 uint8 control = in8(PIT_CHANNEL_2_CONTROL);
244 control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK;
245 control |= PIT_CHANNEL_2_GATE_HIGH;
246 out8(control, PIT_CHANNEL_2_CONTROL);
247 }
248
249 uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow;
250 double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow;
251 uint16 expired;
252
253 uint32 quickSampleCount = 1;
254 uint32 slowSampleCount = 1;
255
256 quick_sample:
257 calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick,
258 expired);
259
260 slower_sample:
261 calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower,
262 expired);
263
264 double deviation = conversionFactorQuick / conversionFactorSlower;
265 if (deviation < 0.99 || deviation > 1.01) {
266 // We might have been hit by a SMI or were otherwise stalled
267 if (quickSampleCount++ < MAX_QUICK_SAMPLES)
268 goto quick_sample;
269 }
270
271 // Slow sample
272 calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow,
273 expired);
274
275 deviation = conversionFactorSlower / conversionFactorSlow;
276 if (deviation < 0.99 || deviation > 1.01) {
277 // We might have been hit by a SMI or were otherwise stalled
278 if (slowSampleCount++ < MAX_SLOW_SAMPLES)
279 goto slower_sample;
280 }
281
282 // Scale the TSC delta to timer units
283 tscDeltaSlow *= TIMER_CLKNUM_HZ;
284
285 uint64 clockSpeed = tscDeltaSlow / expired;
286 gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32)
287 / uint128(tscDeltaSlow);
288
289 #ifdef TRACE_CPU
290 if (clockSpeed > 1000000000LL) {
291 dprintf("CPU at %lld.%03Ld GHz\n", clockSpeed / 1000000000LL,
292 (clockSpeed % 1000000000LL) / 1000000LL);
293 } else {
294 dprintf("CPU at %lld.%03Ld MHz\n", clockSpeed / 1000000LL,
295 (clockSpeed % 1000000LL) / 1000LL);
296 }
297 #endif
298
299 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
300 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
301 //dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed);
302
303 if (quickSampleCount > 1) {
304 dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n",
305 quickSampleCount);
306 }
307
308 if (slowSampleCount > 1) {
309 dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n",
310 slowSampleCount);
311 }
312
313 if (channel == 2) {
314 // Set the gate low again
315 out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH,
316 PIT_CHANNEL_2_CONTROL);
317 }
318 }
319
320
321 void
determine_cpu_conversion_factor(uint8 channel)322 determine_cpu_conversion_factor(uint8 channel)
323 {
324 // Before using the calibration loop, check if we are on a hypervisor.
325 cpuid_info info;
326 if (get_current_cpuid(&info, 1, 0) == B_OK
327 && (info.regs.ecx & IA32_FEATURE_EXT_HYPERVISOR) != 0) {
328 get_current_cpuid(&info, 0x40000000, 0);
329 const uint32 maxVMM = info.regs.eax;
330 if (maxVMM >= 0x40000010) {
331 get_current_cpuid(&info, 0x40000010, 0);
332
333 uint64 clockSpeed = uint64(info.regs.eax) * 1000;
334 gTimeConversionFactor = (uint64(1000) << 32) / info.regs.eax;
335
336 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
337 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed;
338
339 dprintf("TSC frequency read from hypervisor CPUID leaf\n");
340 return;
341 }
342 }
343
344 calculate_cpu_conversion_factor(channel);
345 }
346
347
348 void
ucode_load(BootVolume & volume)349 ucode_load(BootVolume& volume)
350 {
351 cpuid_info info;
352 if (get_current_cpuid(&info, 0, 0) != B_OK)
353 return;
354
355 bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0;
356 bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0;
357
358 if (!isIntel && !isAmd)
359 return;
360
361 if (get_current_cpuid(&info, 1, 0) != B_OK)
362 return;
363
364 char path[128];
365 int family = info.eax_1.family;
366 int model = info.eax_1.model;
367 if (family == 0x6 || family == 0xf) {
368 family += info.eax_1.extended_family;
369 model += (info.eax_1.extended_model << 4);
370 }
371 if (isIntel) {
372 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/"
373 "%02x-%02x-%02x", family, model, info.eax_1.stepping);
374 } else if (family < 0x15) {
375 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
376 "microcode_amd.bin");
377 } else {
378 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/"
379 "microcode_amd_fam%02xh.bin", family);
380 }
381 dprintf("ucode_load: %s\n", path);
382
383 int fd = open_from(volume.RootDirectory(), path, O_RDONLY);
384 if (fd < B_OK) {
385 dprintf("ucode_load: couldn't find microcode\n");
386 return;
387 }
388 struct stat stat;
389 if (fstat(fd, &stat) < 0) {
390 dprintf("ucode_load: couldn't stat microcode file\n");
391 close(fd);
392 return;
393 }
394
395 ssize_t length = stat.st_size;
396
397 // 16-byte alignment required
398 void *buffer = kernel_args_malloc(length, 16);
399 if (buffer != NULL) {
400 if (read(fd, buffer, length) != length) {
401 dprintf("ucode_load: couldn't read microcode file\n");
402 kernel_args_free(buffer);
403 } else {
404 gKernelArgs.ucode_data = buffer;
405 gKernelArgs.ucode_data_size = length;
406 dprintf("ucode_load: microcode file read in memory\n");
407 }
408 }
409
410 close(fd);
411 }
412
413
414 extern "C" bigtime_t
system_time()415 system_time()
416 {
417 uint64 tsc = rdtsc_fenced();
418 uint64 lo = (uint32)tsc;
419 uint64 hi = tsc >> 32;
420 return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor;
421 }
422
423
424 extern "C" void
spin(bigtime_t microseconds)425 spin(bigtime_t microseconds)
426 {
427 bigtime_t time = system_time();
428
429 while ((system_time() - time) < microseconds)
430 asm volatile ("pause;");
431 }
432
433
434 extern "C" status_t
boot_arch_cpu_init()435 boot_arch_cpu_init()
436 {
437 // Nothing really to init on x86
438 return B_OK;
439 }
440
441
442 extern "C" void
arch_ucode_load(BootVolume & volume)443 arch_ucode_load(BootVolume& volume)
444 {
445 ucode_load(volume);
446 }
447