1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 4 * Distributed under the terms of the MIT License. 5 * 6 * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and 7 * licensed under the NewOS license. 8 */ 9 10 11 #include <OS.h> 12 13 #include <boot/arch/x86/arch_cpu.h> 14 #include <boot/kernel_args.h> 15 #include <boot/platform.h> 16 #include <boot/stage2.h> 17 #include <boot/stdio.h> 18 19 #include <arch/cpu.h> 20 #include <arch/x86/arch_cpu.h> 21 #include <arch_kernel.h> 22 #include <arch_system_info.h> 23 24 #include <string.h> 25 26 #if __GNUC__ > 2 27 #include <x86intrin.h> 28 #else 29 static inline uint64_t __rdtsc() 30 { 31 uint64 tsc; 32 33 asm volatile ("rdtsc\n" : "=A"(tsc)); 34 35 return tsc; 36 } 37 #endif 38 39 40 uint32 gTimeConversionFactor; 41 42 // PIT definitions 43 #define TIMER_CLKNUM_HZ (14318180 / 12) 44 45 // PIT IO Ports 46 #define PIT_CHANNEL_PORT_BASE 0x40 47 #define PIT_CONTROL 0x43 48 49 // Channel selection 50 #define PIT_SELECT_CHANNEL_SHIFT 6 51 52 // Access mode 53 #define PIT_ACCESS_LATCH_COUNTER (0 << 4) 54 #define PIT_ACCESS_LOW_BYTE_ONLY (1 << 4) 55 #define PIT_ACCESS_HIGH_BYTE_ONLY (2 << 4) 56 #define PIT_ACCESS_LOW_THEN_HIGH_BYTE (3 << 4) 57 58 // Operating modes 59 #define PIT_MODE_INTERRUPT_ON_0 (0 << 1) 60 #define PIT_MODE_HARDWARE_COUNTDOWN (1 << 1) 61 #define PIT_MODE_RATE_GENERATOR (2 << 1) 62 #define PIT_MODE_SQUARE_WAVE_GENERATOR (3 << 1) 63 #define PIT_MODE_SOFTWARE_STROBE (4 << 1) 64 #define PIT_MODE_HARDWARE_STROBE (5 << 1) 65 66 // BCD/Binary mode 67 #define PIT_BINARY_MODE 0 68 #define PIT_BCD_MODE 1 69 70 // Channel 2 control (speaker) 71 #define PIT_CHANNEL_2_CONTROL 0x61 72 #define PIT_CHANNEL_2_GATE_HIGH 0x01 73 #define PIT_CHANNEL_2_SPEAKER_OFF_MASK ~0x02 74 75 // Maximum values 76 #define MAX_QUICK_SAMPLES 20 77 #define MAX_SLOW_SAMPLES 20 78 // TODO: These are arbitrary. They are here to avoid spinning indefinitely 79 // if the TSC just isn't stable and we can't get our desired error range. 80 81 82 #ifdef __SIZEOF_INT128__ 83 typedef unsigned __int128 uint128; 84 #else 85 struct uint128 { 86 uint128(uint64 low, uint64 high = 0) 87 : 88 low(low), 89 high(high) 90 { 91 } 92 93 bool operator<(const uint128& other) const 94 { 95 return high < other.high || (high == other.high && low < other.low); 96 } 97 98 bool operator<=(const uint128& other) const 99 { 100 return !(other < *this); 101 } 102 103 uint128 operator<<(int count) const 104 { 105 if (count == 0) 106 return *this; 107 108 if (count >= 128) 109 return 0; 110 111 if (count >= 64) 112 return uint128(0, low << (count - 64)); 113 114 return uint128(low << count, (high << count) | (low >> (64 - count))); 115 } 116 117 uint128 operator>>(int count) const 118 { 119 if (count == 0) 120 return *this; 121 122 if (count >= 128) 123 return 0; 124 125 if (count >= 64) 126 return uint128(high >> (count - 64), 0); 127 128 return uint128((low >> count) | (high << (64 - count)), high >> count); 129 } 130 131 uint128 operator+(const uint128& other) const 132 { 133 uint64 resultLow = low + other.low; 134 return uint128(resultLow, 135 high + other.high + (resultLow < low ? 1 : 0)); 136 } 137 138 uint128 operator-(const uint128& other) const 139 { 140 uint64 resultLow = low - other.low; 141 return uint128(resultLow, 142 high - other.high - (resultLow > low ? 1 : 0)); 143 } 144 145 uint128 operator*(uint32 other) const 146 { 147 uint64 resultMid = (low >> 32) * other; 148 uint64 resultLow = (low & 0xffffffff) * other + (resultMid << 32); 149 return uint128(resultLow, 150 high * other + (resultMid >> 32) 151 + (resultLow < resultMid << 32 ? 1 : 0)); 152 } 153 154 uint128 operator/(const uint128& other) const 155 { 156 int shift = 0; 157 uint128 shiftedDivider = other; 158 while (shiftedDivider.high >> 63 == 0 && shiftedDivider < *this) { 159 shiftedDivider = shiftedDivider << 1; 160 shift++; 161 } 162 163 uint128 result = 0; 164 uint128 temp = *this; 165 for (; shift >= 0; shift--, shiftedDivider = shiftedDivider >> 1) { 166 if (shiftedDivider <= temp) { 167 result = result + (uint128(1) << shift); 168 temp = temp - shiftedDivider; 169 } 170 } 171 172 return result; 173 } 174 175 operator uint64() const 176 { 177 return low; 178 } 179 180 private: 181 uint64 low; 182 uint64 high; 183 }; 184 #endif 185 186 187 static inline void 188 calibration_loop(uint8 desiredHighByte, uint8 channel, uint64& tscDelta, 189 double& conversionFactor, uint16& expired) 190 { 191 uint8 select = channel << PIT_SELECT_CHANNEL_SHIFT; 192 out8(select | PIT_ACCESS_LOW_THEN_HIGH_BYTE | PIT_MODE_INTERRUPT_ON_0 193 | PIT_BINARY_MODE, PIT_CONTROL); 194 195 // Fill in count of 0xffff, low then high byte 196 uint8 channelPort = PIT_CHANNEL_PORT_BASE + channel; 197 out8(0xff, channelPort); 198 out8(0xff, channelPort); 199 200 // Read the count back once to delay the start. This ensures that we've 201 // waited long enough for the counter to actually start counting down, as 202 // this only happens on the next clock cycle after reload. 203 in8(channelPort); 204 in8(channelPort); 205 206 // We're expecting the PIT to be at the starting position (high byte 0xff) 207 // as we just programmed it, but if it isn't we wait for it to wrap. 208 uint8 startLow; 209 uint8 startHigh; 210 do { 211 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 212 startLow = in8(channelPort); 213 startHigh = in8(channelPort); 214 } while (startHigh != 255); 215 216 // Read in the first TSC value 217 uint64 startTSC = __rdtsc(); 218 219 // Wait for the PIT to count down to our desired value 220 uint8 endLow; 221 uint8 endHigh; 222 do { 223 out8(select | PIT_ACCESS_LATCH_COUNTER, PIT_CONTROL); 224 endLow = in8(channelPort); 225 endHigh = in8(channelPort); 226 } while (endHigh > desiredHighByte); 227 228 // And read the second TSC value 229 uint64 endTSC = __rdtsc(); 230 231 tscDelta = endTSC - startTSC; 232 expired = ((startHigh << 8) | startLow) - ((endHigh << 8) | endLow); 233 conversionFactor = (double)tscDelta / (double)expired; 234 } 235 236 237 void 238 calculate_cpu_conversion_factor(uint8 channel) 239 { 240 // When using channel 2, enable the input and disable the speaker. 241 if (channel == 2) { 242 uint8 control = in8(PIT_CHANNEL_2_CONTROL); 243 control &= PIT_CHANNEL_2_SPEAKER_OFF_MASK; 244 control |= PIT_CHANNEL_2_GATE_HIGH; 245 out8(control, PIT_CHANNEL_2_CONTROL); 246 } 247 248 uint64 tscDeltaQuick, tscDeltaSlower, tscDeltaSlow; 249 double conversionFactorQuick, conversionFactorSlower, conversionFactorSlow; 250 uint16 expired; 251 252 uint32 quickSampleCount = 1; 253 uint32 slowSampleCount = 1; 254 255 quick_sample: 256 calibration_loop(224, channel, tscDeltaQuick, conversionFactorQuick, 257 expired); 258 259 slower_sample: 260 calibration_loop(192, channel, tscDeltaSlower, conversionFactorSlower, 261 expired); 262 263 double deviation = conversionFactorQuick / conversionFactorSlower; 264 if (deviation < 0.99 || deviation > 1.01) { 265 // We might have been hit by a SMI or were otherwise stalled 266 if (quickSampleCount++ < MAX_QUICK_SAMPLES) 267 goto quick_sample; 268 } 269 270 // Slow sample 271 calibration_loop(128, channel, tscDeltaSlow, conversionFactorSlow, 272 expired); 273 274 deviation = conversionFactorSlower / conversionFactorSlow; 275 if (deviation < 0.99 || deviation > 1.01) { 276 // We might have been hit by a SMI or were otherwise stalled 277 if (slowSampleCount++ < MAX_SLOW_SAMPLES) 278 goto slower_sample; 279 } 280 281 // Scale the TSC delta to timer units 282 tscDeltaSlow *= TIMER_CLKNUM_HZ; 283 284 uint64 clockSpeed = tscDeltaSlow / expired; 285 gTimeConversionFactor = ((uint128(expired) * uint32(1000000)) << 32) 286 / uint128(tscDeltaSlow); 287 288 #ifdef TRACE_CPU 289 if (clockSpeed > 1000000000LL) { 290 dprintf("CPU at %Ld.%03Ld GHz\n", clockSpeed / 1000000000LL, 291 (clockSpeed % 1000000000LL) / 1000000LL); 292 } else { 293 dprintf("CPU at %Ld.%03Ld MHz\n", clockSpeed / 1000000LL, 294 (clockSpeed % 1000000LL) / 1000LL); 295 } 296 #endif 297 298 gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 299 gKernelArgs.arch_args.cpu_clock_speed = clockSpeed; 300 //dprintf("factors: %lu %llu\n", gTimeConversionFactor, clockSpeed); 301 302 if (quickSampleCount > 1) { 303 dprintf("needed %" B_PRIu32 " quick samples for TSC calibration\n", 304 quickSampleCount); 305 } 306 307 if (slowSampleCount > 1) { 308 dprintf("needed %" B_PRIu32 " slow samples for TSC calibration\n", 309 slowSampleCount); 310 } 311 312 if (channel == 2) { 313 // Set the gate low again 314 out8(in8(PIT_CHANNEL_2_CONTROL) & ~PIT_CHANNEL_2_GATE_HIGH, 315 PIT_CHANNEL_2_CONTROL); 316 } 317 } 318 319 320 void 321 ucode_load(BootVolume& volume) 322 { 323 cpuid_info info; 324 if (get_current_cpuid(&info, 0, 0) != B_OK) 325 return; 326 327 bool isIntel = strncmp(info.eax_0.vendor_id, "GenuineIntel", 12) == 0; 328 bool isAmd = strncmp(info.eax_0.vendor_id, "AuthenticAMD", 12) == 0; 329 330 if (!isIntel && !isAmd) 331 return; 332 333 if (get_current_cpuid(&info, 1, 0) != B_OK) 334 return; 335 336 char path[128]; 337 int family = info.eax_1.family; 338 int model = info.eax_1.model; 339 if (family == 0x6 || family == 0xf) { 340 family += info.eax_1.extended_family; 341 model += (info.eax_1.extended_model << 4); 342 } 343 if (isIntel) { 344 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/intel-ucode/" 345 "%02x-%02x-%02x", family, model, info.eax_1.stepping); 346 } else if (family < 0x15) { 347 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 348 "microcode_amd.bin"); 349 } else { 350 snprintf(path, sizeof(path), "system/non-packaged/data/firmware/amd-ucode/" 351 "microcode_amd_fam%02xh.bin", family); 352 } 353 dprintf("ucode_load: %s\n", path); 354 355 int fd = open_from(volume.RootDirectory(), path, O_RDONLY); 356 if (fd < B_OK) { 357 dprintf("ucode_load: couldn't find microcode\n"); 358 return; 359 } 360 struct stat stat; 361 if (fstat(fd, &stat) < 0) { 362 dprintf("ucode_load: couldn't stat microcode file\n"); 363 close(fd); 364 return; 365 } 366 367 ssize_t length = stat.st_size; 368 369 // 16-byte alignment required 370 void *buffer = kernel_args_malloc(length, 16); 371 if (buffer != NULL) { 372 if (read(fd, buffer, length) != length) { 373 dprintf("ucode_load: couldn't read microcode file\n"); 374 kernel_args_free(buffer); 375 } else { 376 gKernelArgs.ucode_data = buffer; 377 gKernelArgs.ucode_data_size = length; 378 dprintf("ucode_load: microcode file read in memory\n"); 379 } 380 } 381 382 close(fd); 383 } 384 385 386 extern "C" bigtime_t 387 system_time() 388 { 389 uint64 tsc = __rdtsc(); 390 uint64 lo = (uint32)tsc; 391 uint64 hi = tsc >> 32; 392 return ((lo * gTimeConversionFactor) >> 32) + hi * gTimeConversionFactor; 393 } 394 395 396 extern "C" void 397 spin(bigtime_t microseconds) 398 { 399 bigtime_t time = system_time(); 400 401 while ((system_time() - time) < microseconds) 402 asm volatile ("pause;"); 403 } 404 405 406 extern "C" status_t 407 boot_arch_cpu_init() 408 { 409 // Nothing really to init on x86 410 return B_OK; 411 } 412 413 414 extern "C" void 415 arch_ucode_load(BootVolume& volume) 416 { 417 ucode_load(volume); 418 } 419