1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 4 * Distributed under the terms of the MIT License. 5 * 6 * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and 7 * licensed under the NewOS license. 8 */ 9 10 11 #include <OS.h> 12 13 #include <boot/arch/x86/arch_cpu.h> 14 #include <boot/kernel_args.h> 15 #include <boot/platform.h> 16 #include <boot/stage2.h> 17 #include <boot/stdio.h> 18 19 #include <arch/cpu.h> 20 #include <arch/x86/arch_cpu.h> 21 #include <arch_kernel.h> 22 #include <arch_system_info.h> 23 24 #include <string.h> 25 26 #include <x86intrin.h> 27 28 29 uint32 gTimeConversionFactor; 30 31 // PIT definitions 32 #define TIMER_CLKNUM_HZ (14318180 / 12) 33 34 // PIT IO Ports 35 #define PIT_CHANNEL_PORT_BASE 0x40 36 #define PIT_CONTROL 0x43 37 38 // Channel selection 39 #define PIT_SELECT_CHANNEL_SHIFT 6 40 41 // Access mode 42 #define PIT_ACCESS_LATCH_COUNTER (0 << 4) 43 #define PIT_ACCESS_LOW_BYTE_ONLY (1 << 4) 44 #define PIT_ACCESS_HIGH_BYTE_ONLY (2 << 4) 45 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE (3 << 4) 46 47 // Operating modes 48 #define PIT_MODE_INTERRUPT_ON_0 (0 << 1) 49 #define PIT_MODE_HARDWARE_COUNTDOWN (1 << 1) 50 #define PIT_MODE_RATE_GENERATOR (2 << 1) 51 #define PIT_MODE_SQUARE_WAVE_GENERATOR (3 << 1) 52 #define PIT_MODE_SOFTWARE_STROBE (4 << 1) 53 #define PIT_MODE_HARDWARE_STROBE (5 << 1) 54 55 // BCD/Binary mode 56 #define PIT_BINARY_MODE 0 57 #define PIT_BCD_MODE 1 58 59 // Channel 2 control (speaker) 60 #define PIT_CHANNEL_2_CONTROL 0x61 61 #define PIT_CHANNEL_2_GATE_HIGH 0x01 62 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK ~0x02 63 64 // Maximum values 65 #define MAX_QUICK_SAMPLES 20 66 #define MAX_SLOW_SAMPLES 20 67 // TODO: These are arbitrary. They are here to avoid spinning indefinitely 68 // if the TSC just isn't stable and we can't get our desired error range. 69 70 71 #ifdef __SIZEOF_INT128__ 72 typedef unsigned __int128 uint128; 73 #else 74 struct uint128 { 75 uint128(uint64 low, uint64 high = 0) 76 : 77 low(low), 78 high(high) 79 { 80 } 81 82 bool operator<(const uint128& other) const 83 { 84 return high < other.high || (high == other.high && low < other.low); 85 } 86 87 bool operator<=(const uint128& other) const 88 { 89 return !(other < *this); 90 } 91 92 uint128 operator<<(int count) const 93 { 94 if (count == 0) 95 return *this; 96 97 if (count >= 128) 98 return 0; 99 100 if (count >= 64) 101 return uint128(0, low << (count - 64)); 102 103 return uint128(low << count, (high << count) | (low >> (64 - count))); 104 } 105 106 uint128 operator>>(int count) const 107 { 108 if (count == 0) 109 return *this; 110 111 if (count >= 128) 112 return 0; 113 114 if (count >= 64) 115 return uint128(high >> (count - 64), 0); 116 117 return uint128((low >> count) | (high << (64 - count)), high >> count); 118 } 119 120 uint128 operator+(const uint128& other) const 121 { 122 uint64 resultLow = low + other.low; 123 return uint128(resultLow, 124 high + other.high + (resultLow < low ? 1 : 0)); 125 } 126 127 uint128 operator-(const uint128& other) const 128 { 129 uint64 resultLow = low - other.low; 130 return uint128(resultLow, 131 high - other.high - (resultLow > low ? 1 : 0)); 132 } 133 134 uint128 operator*(uint32 other) const 135 { 136 uint64 resultMid = (low >> 32) * other; 137 uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32); 138 return uint128(resultLow, 139 high * other + (resultMid >> 32) 140 + (resultLow < resultMid << 32 ? 1 : 0)); 141 } 142 143 uint128 operator/(const uint128& other) const 144 { 145 int shift = 0; 146 uint128 shiftedDivider = other; 147 while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) { 148 shiftedDivider = shiftedDivider << 1; 149 shift++; 150 } 151 152 uint128 result = 0; 153 uint128 temp = *this; 154 for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) { 155 if (shiftedDivider <= temp) { 156 result = result + (uint128(1) << shift); 157 temp = temp - shiftedDivider; 158 } 159 } 160 161 return result; 162 } 163 164 operator uint64() const 165 { 166 return low; 167 } 168 169 private: 170 uint64 low; 171 uint64 high; 172 }; 173 #endif 174 175 176 static inline uint64_t 177 rdtsc_fenced() 178 { 179 // RDTSC is not serializing, nor does it drain the instruction stream. 180 // RDTSCP does, but is not available everywhere. Other OSes seem to use 181 // "CPUID" rather than MFENCE/LFENCE for serializing here during boot. 182 asm volatile ("cpuid" : : : "eax", "ebx", "ecx", "edx"); 183 184 return __rdtsc(); 185 } 186 187 188 static inline void 189 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta, 190 double& conversionFactor, uint16& expired) 191 { 192 uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT; 193 out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0 194 | PIT_BINARY_MODE, PIT_CONTROL); 195 196 // Fill in count of 0xffff, low then high byte 197 uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel; 198 out8(0xff, channelPort); 199 out8(0xff, channelPort); 200 201 // Read the count back once to delay the start. This ensures that we've 202 // waited long enough for the counter to actually start counting down, as 203 // this only happens on the next clock cycle after reload. 204 in8(channelPort); 205 in8(channelPort); 206 207 // We're expecting the PIT to be at the starting position (high byte 0xff) 208 // as we just programmed it, but if it isn't we wait for it to wrap. 209 uint8 startLow; 210 uint8 startHigh; 211 do { 212 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 213 startLow = in8(channelPort); 214 startHigh = in8(channelPort); 215 } while (startHigh != 255); 216 217 // Read in the first TSC value 218 uint64 startTSC = rdtsc_fenced(); 219 220 // Wait for the PIT to count down to our desired value 221 uint8 endLow; 222 uint8 endHigh; 223 do { 224 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 225 endLow = in8(channelPort); 226 endHigh = in8(channelPort); 227 } while (endHigh > desiredHighByte); 228 229 // And read the second TSC value 230 uint64 endTSC = rdtsc_fenced(); 231 232 tscDelta = endTSC - startTSC; 233 expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow); 234 conversionFactor = (double)tscDelta / (double)expired; 235 } 236 237 238 static void 239 calculate_cpu_conversion_factor(uint8 channel) 240 { 241 // When using channel 2, enable the input and disable the speaker. 242 if (channel == 2) { 243 uint8 control = in8(PIT_CHANNEL_2_CONTROL); 244 control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK; 245 control |= PIT_CHANNEL_2_GATE_HIGH; 246 out8(control, PIT_CHANNEL_2_CONTROL); 247 } 248 249 uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow; 250 double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow; 251 uint16 expired; 252 253 uint32 quickSampleCount = 1; 254 uint32 slowSampleCount = 1; 255 256 quick_sample: 257 calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick, 258 expired); 259 260 slower_sample: 261 calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower, 262 expired); 263 264 double deviation = conversionFactorQuick / conversionFactorSlower; 265 if (deviation < 0.99 || deviation > 1.01) { 266 // We might have been hit by a SMI or were otherwise stalled 267 if (quickSampleCount++ < MAX_QUICK_SAMPLES) 268 goto quick_sample; 269 } 270 271 // Slow sample 272 calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow, 273 expired); 274 275 deviation = conversionFactorSlower / conversionFactorSlow; 276 if (deviation < 0.99 || deviation > 1.01) { 277 // We might have been hit by a SMI or were otherwise stalled 278 if (slowSampleCount++ < MAX_SLOW_SAMPLES) 279 goto slower_sample; 280 } 281 282 // Scale the TSC delta to timer units 283 tscDeltaSlow *= TIMER_CLKNUM_HZ; 284 285 uint64 clockSpeed = tscDeltaSlow / expired; 286 gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32) 287 / uint128(tscDeltaSlow); 288 289 #ifdef TRACE_CPU 290 if (clockSpeed > 1000000000LL) { 291 dprintf("CPU at %lld.%03Ld GHz\n", clockSpeed / 1000000000LL, 292 (clockSpeed % 1000000000LL) / 1000000LL); 293 } else { 294 dprintf("CPU at %lld.%03Ld MHz\n", clockSpeed / 1000000LL, 295 (clockSpeed % 1000000LL) / 1000LL); 296 } 297 #endif 298 299 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 300 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed; 301 //dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed); 302 303 if (quickSampleCount > 1) { 304 dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n", 305 quickSampleCount); 306 } 307 308 if (slowSampleCount > 1) { 309 dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n", 310 slowSampleCount); 311 } 312 313 if (channel == 2) { 314 // Set the gate low again 315 out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH, 316 PIT_CHANNEL_2_CONTROL); 317 } 318 } 319 320 321 void 322 determine_cpu_conversion_factor(uint8 channel) 323 { 324 // Before using the calibration loop, check if we are on a hypervisor. 325 cpuid_info info; 326 if (get_current_cpuid(&info, 1, 0) == B_OK 327 && (info.regs.ecx & IA32_FEATURE_EXT_HYPERVISOR) != 0) { 328 get_current_cpuid(&info, 0x40000000, 0); 329 const uint32 maxVMM = info.regs.eax; 330 if (maxVMM >= 0x40000010) { 331 get_current_cpuid(&info, 0x40000010, 0); 332 333 uint64 clockSpeed = uint64(info.regs.eax) * 1000; 334 gTimeConversionFactor = (uint64(1000) << 32) / info.regs.eax; 335 336 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 337 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed; 338 339 dprintf("TSC frequency read from hypervisor CPUID leaf\n"); 340 return; 341 } 342 } 343 344 calculate_cpu_conversion_factor(channel); 345 } 346 347 348 void 349 ucode_load(BootVolume& volume) 350 { 351 cpuid_info info; 352 if (get_current_cpuid(&info, 0, 0) != B_OK) 353 return; 354 355 bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0; 356 bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0; 357 358 if (!isIntel && !isAmd) 359 return; 360 361 if (get_current_cpuid(&info, 1, 0) != B_OK) 362 return; 363 364 char path[128]; 365 int family = info.eax_1.family; 366 int model = info.eax_1.model; 367 if (family == 0x6 || family == 0xf) { 368 family += info.eax_1.extended_family; 369 model += (info.eax_1.extended_model << 4); 370 } 371 if (isIntel) { 372 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/" 373 "%02x-%02x-%02x", family, model, info.eax_1.stepping); 374 } else if (family < 0x15) { 375 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 376 "microcode_amd.bin"); 377 } else { 378 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 379 "microcode_amd_fam%02xh.bin", family); 380 } 381 dprintf("ucode_load: %s\n", path); 382 383 int fd = open_from(volume.RootDirectory(), path, O_RDONLY); 384 if (fd < B_OK) { 385 dprintf("ucode_load: couldn't find microcode\n"); 386 return; 387 } 388 struct stat stat; 389 if (fstat(fd, &stat) < 0) { 390 dprintf("ucode_load: couldn't stat microcode file\n"); 391 close(fd); 392 return; 393 } 394 395 ssize_t length = stat.st_size; 396 397 // 16-byte alignment required 398 void *buffer = kernel_args_malloc(length, 16); 399 if (buffer != NULL) { 400 if (read(fd, buffer, length) != length) { 401 dprintf("ucode_load: couldn't read microcode file\n"); 402 kernel_args_free(buffer); 403 } else { 404 gKernelArgs.ucode_data = buffer; 405 gKernelArgs.ucode_data_size = length; 406 dprintf("ucode_load: microcode file read in memory\n"); 407 } 408 } 409 410 close(fd); 411 } 412 413 414 extern "C" bigtime_t 415 system_time() 416 { 417 uint64 tsc = rdtsc_fenced(); 418 uint64 lo = (uint32)tsc; 419 uint64 hi = tsc >> 32; 420 return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor; 421 } 422 423 424 extern "C" void 425 spin(bigtime_t microseconds) 426 { 427 bigtime_t time = system_time(); 428 429 while ((system_time() - time) < microseconds) 430 asm volatile ("pause;"); 431 } 432 433 434 extern "C" status_t 435 boot_arch_cpu_init() 436 { 437 // Nothing really to init on x86 438 return B_OK; 439 } 440 441 442 extern "C" void 443 arch_ucode_load(BootVolume& volume) 444 { 445 ucode_load(volume); 446 } 447