1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 4 * Distributed under the terms of the MIT License. 5 * 6 * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and 7 * licensed under the NewOS license. 8 */ 9 10 11 #include <OS.h> 12 13 #include <boot/arch/x86/arch_cpu.h> 14 #include <boot/kernel_args.h> 15 #include <boot/platform.h> 16 #include <boot/stage2.h> 17 #include <boot/stdio.h> 18 19 #include <arch/cpu.h> 20 #include <arch/x86/arch_cpu.h> 21 #include <arch_kernel.h> 22 #include <arch_system_info.h> 23 24 #include <string.h> 25 26 27 uint32 gTimeConversionFactor; 28 29 // PIT definitions 30 #define TIMER_CLKNUM_HZ (14318180 / 12) 31 32 // PIT IO Ports 33 #define PIT_CHANNEL_PORT_BASE 0x40 34 #define PIT_CONTROL 0x43 35 36 // Channel selection 37 #define PIT_SELECT_CHANNEL_SHIFT 6 38 39 // Access mode 40 #define PIT_ACCESS_LATCH_COUNTER (0 << 4) 41 #define PIT_ACCESS_LOW_BYTE_ONLY (1 << 4) 42 #define PIT_ACCESS_HIGH_BYTE_ONLY (2 << 4) 43 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE (3 << 4) 44 45 // Operating modes 46 #define PIT_MODE_INTERRUPT_ON_0 (0 << 1) 47 #define PIT_MODE_HARDWARE_COUNTDOWN (1 << 1) 48 #define PIT_MODE_RATE_GENERATOR (2 << 1) 49 #define PIT_MODE_SQUARE_WAVE_GENERATOR (3 << 1) 50 #define PIT_MODE_SOFTWARE_STROBE (4 << 1) 51 #define PIT_MODE_HARDWARE_STROBE (5 << 1) 52 53 // BCD/Binary mode 54 #define PIT_BINARY_MODE 0 55 #define PIT_BCD_MODE 1 56 57 // Channel 2 control (speaker) 58 #define PIT_CHANNEL_2_CONTROL 0x61 59 #define PIT_CHANNEL_2_GATE_HIGH 0x01 60 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK ~0x02 61 62 // Maximum values 63 #define MAX_QUICK_SAMPLES 20 64 #define MAX_SLOW_SAMPLES 20 65 // TODO: These are arbitrary. They are here to avoid spinning indefinitely 66 // if the TSC just isn't stable and we can't get our desired error range. 67 68 69 #ifdef __SIZEOF_INT128__ 70 typedef unsigned __int128 uint128; 71 #else 72 struct uint128 { 73 uint128(uint64 low, uint64 high = 0) 74 : 75 low(low), 76 high(high) 77 { 78 } 79 80 bool operator<(const uint128& other) const 81 { 82 return high < other.high || (high == other.high && low < other.low); 83 } 84 85 bool operator<=(const uint128& other) const 86 { 87 return !(other < *this); 88 } 89 90 uint128 operator<<(int count) const 91 { 92 if (count == 0) 93 return *this; 94 95 if (count >= 128) 96 return 0; 97 98 if (count >= 64) 99 return uint128(0, low << (count - 64)); 100 101 return uint128(low << count, (high << count) | (low >> (64 - count))); 102 } 103 104 uint128 operator>>(int count) const 105 { 106 if (count == 0) 107 return *this; 108 109 if (count >= 128) 110 return 0; 111 112 if (count >= 64) 113 return uint128(high >> (count - 64), 0); 114 115 return uint128((low >> count) | (high << (64 - count)), high >> count); 116 } 117 118 uint128 operator+(const uint128& other) const 119 { 120 uint64 resultLow = low + other.low; 121 return uint128(resultLow, 122 high + other.high + (resultLow < low ? 1 : 0)); 123 } 124 125 uint128 operator-(const uint128& other) const 126 { 127 uint64 resultLow = low - other.low; 128 return uint128(resultLow, 129 high - other.high - (resultLow > low ? 1 : 0)); 130 } 131 132 uint128 operator*(uint32 other) const 133 { 134 uint64 resultMid = (low >> 32) * other; 135 uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32); 136 return uint128(resultLow, 137 high * other + (resultMid >> 32) 138 + (resultLow < resultMid << 32 ? 1 : 0)); 139 } 140 141 uint128 operator/(const uint128& other) const 142 { 143 int shift = 0; 144 uint128 shiftedDivider = other; 145 while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) { 146 shiftedDivider = shiftedDivider << 1; 147 shift++; 148 } 149 150 uint128 result = 0; 151 uint128 temp = *this; 152 for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) { 153 if (shiftedDivider <= temp) { 154 result = result + (uint128(1) << shift); 155 temp = temp - shiftedDivider; 156 } 157 } 158 159 return result; 160 } 161 162 operator uint64() const 163 { 164 return low; 165 } 166 167 private: 168 uint64 low; 169 uint64 high; 170 }; 171 #endif 172 173 174 static inline uint64_t 175 rdtsc_fenced() 176 { 177 uint64 tsc; 178 179 // RDTSC is not serializing, nor does it drain the instruction stream. 180 // RDTSCP does, but is not available everywhere. Other OSes seem to use 181 // "CPUID" rather than MFENCE/LFENCE for serializing here during boot. 182 asm volatile ("cpuid" : : : "eax", "ebx", "ecx", "edx"); 183 184 asm volatile ("rdtsc" : "=A"(tsc)); 185 186 return tsc; 187 } 188 189 190 static inline void 191 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta, 192 double& conversionFactor, uint16& expired) 193 { 194 uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT; 195 out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0 196 | PIT_BINARY_MODE, PIT_CONTROL); 197 198 // Fill in count of 0xffff, low then high byte 199 uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel; 200 out8(0xff, channelPort); 201 out8(0xff, channelPort); 202 203 // Read the count back once to delay the start. This ensures that we've 204 // waited long enough for the counter to actually start counting down, as 205 // this only happens on the next clock cycle after reload. 206 in8(channelPort); 207 in8(channelPort); 208 209 // We're expecting the PIT to be at the starting position (high byte 0xff) 210 // as we just programmed it, but if it isn't we wait for it to wrap. 211 uint8 startLow; 212 uint8 startHigh; 213 do { 214 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 215 startLow = in8(channelPort); 216 startHigh = in8(channelPort); 217 } while (startHigh != 255); 218 219 // Read in the first TSC value 220 uint64 startTSC = rdtsc_fenced(); 221 222 // Wait for the PIT to count down to our desired value 223 uint8 endLow; 224 uint8 endHigh; 225 do { 226 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 227 endLow = in8(channelPort); 228 endHigh = in8(channelPort); 229 } while (endHigh > desiredHighByte); 230 231 // And read the second TSC value 232 uint64 endTSC = rdtsc_fenced(); 233 234 tscDelta = endTSC - startTSC; 235 expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow); 236 conversionFactor = (double)tscDelta / (double)expired; 237 } 238 239 240 static void 241 calculate_cpu_conversion_factor(uint8 channel) 242 { 243 // When using channel 2, enable the input and disable the speaker. 244 if (channel == 2) { 245 uint8 control = in8(PIT_CHANNEL_2_CONTROL); 246 control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK; 247 control |= PIT_CHANNEL_2_GATE_HIGH; 248 out8(control, PIT_CHANNEL_2_CONTROL); 249 } 250 251 uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow; 252 double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow; 253 uint16 expired; 254 255 uint32 quickSampleCount = 1; 256 uint32 slowSampleCount = 1; 257 258 quick_sample: 259 calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick, 260 expired); 261 262 slower_sample: 263 calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower, 264 expired); 265 266 double deviation = conversionFactorQuick / conversionFactorSlower; 267 if (deviation < 0.99 || deviation > 1.01) { 268 // We might have been hit by a SMI or were otherwise stalled 269 if (quickSampleCount++ < MAX_QUICK_SAMPLES) 270 goto quick_sample; 271 } 272 273 // Slow sample 274 calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow, 275 expired); 276 277 deviation = conversionFactorSlower / conversionFactorSlow; 278 if (deviation < 0.99 || deviation > 1.01) { 279 // We might have been hit by a SMI or were otherwise stalled 280 if (slowSampleCount++ < MAX_SLOW_SAMPLES) 281 goto slower_sample; 282 } 283 284 // Scale the TSC delta to timer units 285 tscDeltaSlow *= TIMER_CLKNUM_HZ; 286 287 uint64 clockSpeed = tscDeltaSlow / expired; 288 gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32) 289 / uint128(tscDeltaSlow); 290 291 #ifdef TRACE_CPU 292 if (clockSpeed > 1000000000LL) { 293 dprintf("CPU at %lld.%03Ld GHz\n", clockSpeed / 1000000000LL, 294 (clockSpeed % 1000000000LL) / 1000000LL); 295 } else { 296 dprintf("CPU at %lld.%03Ld MHz\n", clockSpeed / 1000000LL, 297 (clockSpeed % 1000000LL) / 1000LL); 298 } 299 #endif 300 301 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 302 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed; 303 //dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed); 304 305 if (quickSampleCount > 1) { 306 dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n", 307 quickSampleCount); 308 } 309 310 if (slowSampleCount > 1) { 311 dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n", 312 slowSampleCount); 313 } 314 315 if (channel == 2) { 316 // Set the gate low again 317 out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH, 318 PIT_CHANNEL_2_CONTROL); 319 } 320 } 321 322 323 void 324 determine_cpu_conversion_factor(uint8 channel) 325 { 326 // Before using the calibration loop, check if we are on a hypervisor. 327 cpuid_info info; 328 if (get_current_cpuid(&info, 1, 0) == B_OK 329 && (info.regs.ecx & IA32_FEATURE_EXT_HYPERVISOR) != 0) { 330 get_current_cpuid(&info, 0x40000000, 0); 331 const uint32 maxVMM = info.regs.eax; 332 if (maxVMM >= 0x40000010) { 333 get_current_cpuid(&info, 0x40000010, 0); 334 335 uint64 clockSpeed = uint64(info.regs.eax) * 1000; 336 gTimeConversionFactor = (uint64(1000) << 32) / info.regs.eax; 337 338 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 339 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed; 340 341 dprintf("TSC frequency read from hypervisor CPUID leaf\n"); 342 return; 343 } 344 } 345 346 calculate_cpu_conversion_factor(channel); 347 } 348 349 350 void 351 ucode_load(BootVolume& volume) 352 { 353 cpuid_info info; 354 if (get_current_cpuid(&info, 0, 0) != B_OK) 355 return; 356 357 bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0; 358 bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0; 359 360 if (!isIntel && !isAmd) 361 return; 362 363 if (get_current_cpuid(&info, 1, 0) != B_OK) 364 return; 365 366 char path[128]; 367 int family = info.eax_1.family; 368 int model = info.eax_1.model; 369 if (family == 0x6 || family == 0xf) { 370 family += info.eax_1.extended_family; 371 model += (info.eax_1.extended_model << 4); 372 } 373 if (isIntel) { 374 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/" 375 "%02x-%02x-%02x", family, model, info.eax_1.stepping); 376 } else if (family < 0x15) { 377 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 378 "microcode_amd.bin"); 379 } else { 380 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 381 "microcode_amd_fam%02xh.bin", family); 382 } 383 dprintf("ucode_load: %s\n", path); 384 385 int fd = open_from(volume.RootDirectory(), path, O_RDONLY); 386 if (fd < B_OK) { 387 dprintf("ucode_load: couldn't find microcode\n"); 388 return; 389 } 390 struct stat stat; 391 if (fstat(fd, &stat) < 0) { 392 dprintf("ucode_load: couldn't stat microcode file\n"); 393 close(fd); 394 return; 395 } 396 397 ssize_t length = stat.st_size; 398 399 // 16-byte alignment required 400 void *buffer = kernel_args_malloc(length, 16); 401 if (buffer != NULL) { 402 if (read(fd, buffer, length) != length) { 403 dprintf("ucode_load: couldn't read microcode file\n"); 404 kernel_args_free(buffer); 405 } else { 406 gKernelArgs.ucode_data = buffer; 407 gKernelArgs.ucode_data_size = length; 408 dprintf("ucode_load: microcode file read in memory\n"); 409 } 410 } 411 412 close(fd); 413 } 414 415 416 extern "C" bigtime_t 417 system_time() 418 { 419 uint64 tsc = rdtsc_fenced(); 420 uint64 lo = (uint32)tsc; 421 uint64 hi = tsc >> 32; 422 return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor; 423 } 424 425 426 extern "C" void 427 spin(bigtime_t microseconds) 428 { 429 bigtime_t time = system_time(); 430 431 while ((system_time() - time) < microseconds) 432 asm volatile ("pause;"); 433 } 434 435 436 extern "C" status_t 437 boot_arch_cpu_init() 438 { 439 // Nothing really to init on x86 440 return B_OK; 441 } 442 443 444 extern "C" void 445 arch_ucode_load(BootVolume& volume) 446 { 447 ucode_load(volume); 448 } 449