1*5af32e75SAxel Dörfler /* 2*5af32e75SAxel Dörfler * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3*5af32e75SAxel Dörfler * Distributed under the terms of the MIT License. 4*5af32e75SAxel Dörfler * 5*5af32e75SAxel Dörfler * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and 6*5af32e75SAxel Dörfler * licensed under the NewOS license. 7*5af32e75SAxel Dörfler */ 8*5af32e75SAxel Dörfler 9*5af32e75SAxel Dörfler 10*5af32e75SAxel Dörfler #include "cpu.h" 11*5af32e75SAxel Dörfler 12*5af32e75SAxel Dörfler #include <OS.h> 13*5af32e75SAxel Dörfler #include <boot/platform.h> 14*5af32e75SAxel Dörfler #include <boot/stdio.h> 15*5af32e75SAxel Dörfler #include <boot/kernel_args.h> 16*5af32e75SAxel Dörfler #include <boot/stage2.h> 17*5af32e75SAxel Dörfler #include <arch/cpu.h> 18*5af32e75SAxel Dörfler #include <arch_kernel.h> 19*5af32e75SAxel Dörfler 20*5af32e75SAxel Dörfler #include <string.h> 21*5af32e75SAxel Dörfler 22*5af32e75SAxel Dörfler 23*5af32e75SAxel Dörfler //#define TRACE_CPU 24*5af32e75SAxel Dörfler #ifdef TRACE_CPU 25*5af32e75SAxel Dörfler # define TRACE(x) dprintf x 26*5af32e75SAxel Dörfler #else 27*5af32e75SAxel Dörfler # define TRACE(x) ; 28*5af32e75SAxel Dörfler #endif 29*5af32e75SAxel Dörfler 30*5af32e75SAxel Dörfler 31*5af32e75SAxel Dörfler extern "C" uint64 rdtsc(); 32*5af32e75SAxel Dörfler 33*5af32e75SAxel Dörfler uint32 gTimeConversionFactor; 34*5af32e75SAxel Dörfler 35*5af32e75SAxel Dörfler #define TIMER_CLKNUM_HZ (14318180/12) 36*5af32e75SAxel Dörfler 37*5af32e75SAxel Dörfler 38*5af32e75SAxel Dörfler static void 39*5af32e75SAxel Dörfler calculate_cpu_conversion_factor() 40*5af32e75SAxel Dörfler { 41*5af32e75SAxel Dörfler uint32 s_low, s_high; 42*5af32e75SAxel Dörfler uint32 low, high; 43*5af32e75SAxel Dörfler uint32 expired; 44*5af32e75SAxel Dörfler uint64 t1, t2; 45*5af32e75SAxel Dörfler uint64 p1, p2, p3; 46*5af32e75SAxel Dörfler double r1, r2, r3; 47*5af32e75SAxel Dörfler 48*5af32e75SAxel Dörfler out8(0x34, 0x43); /* program the timer to count down mode */ 49*5af32e75SAxel Dörfler out8(0xff, 0x40); /* low and then high */ 50*5af32e75SAxel Dörfler out8(0xff, 0x40); 51*5af32e75SAxel Dörfler 52*5af32e75SAxel Dörfler /* quick sample */ 53*5af32e75SAxel Dörfler quick_sample: 54*5af32e75SAxel Dörfler do { 55*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 56*5af32e75SAxel Dörfler s_low = in8(0x40); 57*5af32e75SAxel Dörfler s_high = in8(0x40); 58*5af32e75SAxel Dörfler } while(s_high != 255); 59*5af32e75SAxel Dörfler t1 = rdtsc(); 60*5af32e75SAxel Dörfler do { 61*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 62*5af32e75SAxel Dörfler low = in8(0x40); 63*5af32e75SAxel Dörfler high = in8(0x40); 64*5af32e75SAxel Dörfler } while (high > 224); 65*5af32e75SAxel Dörfler t2 = rdtsc(); 66*5af32e75SAxel Dörfler 67*5af32e75SAxel Dörfler p1 = t2-t1; 68*5af32e75SAxel Dörfler r1 = (double)(p1) / (double)(((s_high << 8) | s_low) - ((high << 8) | low)); 69*5af32e75SAxel Dörfler 70*5af32e75SAxel Dörfler /* not so quick sample */ 71*5af32e75SAxel Dörfler not_so_quick_sample: 72*5af32e75SAxel Dörfler do { 73*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 74*5af32e75SAxel Dörfler s_low = in8(0x40); 75*5af32e75SAxel Dörfler s_high = in8(0x40); 76*5af32e75SAxel Dörfler } while (s_high!= 255); 77*5af32e75SAxel Dörfler t1 = rdtsc(); 78*5af32e75SAxel Dörfler do { 79*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 80*5af32e75SAxel Dörfler low = in8(0x40); 81*5af32e75SAxel Dörfler high = in8(0x40); 82*5af32e75SAxel Dörfler } while (high> 192); 83*5af32e75SAxel Dörfler t2 = rdtsc(); 84*5af32e75SAxel Dörfler p2 = t2-t1; 85*5af32e75SAxel Dörfler r2 = (double)(p2) / (double)(((s_high << 8) | s_low) - ((high << 8) | low)); 86*5af32e75SAxel Dörfler if ((r1/r2) > 1.01) { 87*5af32e75SAxel Dörfler //dprintf("Tuning loop(1)\n"); 88*5af32e75SAxel Dörfler goto quick_sample; 89*5af32e75SAxel Dörfler } 90*5af32e75SAxel Dörfler if ((r1/r2) < 0.99) { 91*5af32e75SAxel Dörfler //dprintf("Tuning loop(1)\n"); 92*5af32e75SAxel Dörfler goto quick_sample; 93*5af32e75SAxel Dörfler } 94*5af32e75SAxel Dörfler 95*5af32e75SAxel Dörfler /* slow sample */ 96*5af32e75SAxel Dörfler do { 97*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 98*5af32e75SAxel Dörfler s_low = in8(0x40); 99*5af32e75SAxel Dörfler s_high = in8(0x40); 100*5af32e75SAxel Dörfler } while (s_high!= 255); 101*5af32e75SAxel Dörfler t1 = rdtsc(); 102*5af32e75SAxel Dörfler do { 103*5af32e75SAxel Dörfler out8(0x00, 0x43); /* latch counter value */ 104*5af32e75SAxel Dörfler low = in8(0x40); 105*5af32e75SAxel Dörfler high = in8(0x40); 106*5af32e75SAxel Dörfler } while (high > 128); 107*5af32e75SAxel Dörfler t2 = rdtsc(); 108*5af32e75SAxel Dörfler 109*5af32e75SAxel Dörfler p3 = t2-t1; 110*5af32e75SAxel Dörfler r3 = (double)(p3) / (double)(((s_high << 8) | s_low) - ((high << 8) | low)); 111*5af32e75SAxel Dörfler if ((r2/r3) > 1.01) { 112*5af32e75SAxel Dörfler TRACE(("Tuning loop(2)\n")); 113*5af32e75SAxel Dörfler goto not_so_quick_sample; 114*5af32e75SAxel Dörfler } 115*5af32e75SAxel Dörfler if ((r2/r3) < 0.99) { 116*5af32e75SAxel Dörfler TRACE(("Tuning loop(2)\n")); 117*5af32e75SAxel Dörfler goto not_so_quick_sample; 118*5af32e75SAxel Dörfler } 119*5af32e75SAxel Dörfler 120*5af32e75SAxel Dörfler expired = ((s_high << 8) | s_low) - ((high << 8) | low); 121*5af32e75SAxel Dörfler p3 *= TIMER_CLKNUM_HZ; 122*5af32e75SAxel Dörfler 123*5af32e75SAxel Dörfler /* 124*5af32e75SAxel Dörfler * cv_factor contains time in usecs per CPU cycle * 2^32 125*5af32e75SAxel Dörfler * 126*5af32e75SAxel Dörfler * The code below is a bit fancy. Originally Michael Noistering 127*5af32e75SAxel Dörfler * had it like: 128*5af32e75SAxel Dörfler * 129*5af32e75SAxel Dörfler * cv_factor = ((uint64)1000000<<32) * expired / p3; 130*5af32e75SAxel Dörfler * 131*5af32e75SAxel Dörfler * whic is perfect, but unfortunately 1000000ULL<<32*expired 132*5af32e75SAxel Dörfler * may overflow in fast cpus with the long sampling period 133*5af32e75SAxel Dörfler * i put there for being as accurate as possible under 134*5af32e75SAxel Dörfler * vmware. 135*5af32e75SAxel Dörfler * 136*5af32e75SAxel Dörfler * The below calculation is based in that we are trying 137*5af32e75SAxel Dörfler * to calculate: 138*5af32e75SAxel Dörfler * 139*5af32e75SAxel Dörfler * (C*expired)/p3 -> (C*(x0<<k + x1))/p3 -> 140*5af32e75SAxel Dörfler * (C*(x0<<k))/p3 + (C*x1)/p3 141*5af32e75SAxel Dörfler * 142*5af32e75SAxel Dörfler * Now the term (C*(x0<<k))/p3 is rewritten as: 143*5af32e75SAxel Dörfler * 144*5af32e75SAxel Dörfler * (C*(x0<<k))/p3 -> ((C*x0)/p3)<<k + reminder 145*5af32e75SAxel Dörfler * 146*5af32e75SAxel Dörfler * where reminder is: 147*5af32e75SAxel Dörfler * 148*5af32e75SAxel Dörfler * floor((1<<k)*decimalPart((C*x0)/p3)) 149*5af32e75SAxel Dörfler * 150*5af32e75SAxel Dörfler * which is approximated as: 151*5af32e75SAxel Dörfler * 152*5af32e75SAxel Dörfler * floor((1<<k)*decimalPart(((C*x0)%p3)/p3)) -> 153*5af32e75SAxel Dörfler * (((C*x0)%p3)<<k)/p3 154*5af32e75SAxel Dörfler * 155*5af32e75SAxel Dörfler * So the final expression is: 156*5af32e75SAxel Dörfler * 157*5af32e75SAxel Dörfler * ((C*x0)/p3)<<k + (((C*x0)%p3)<<k)/p3 + (C*x1)/p3 158*5af32e75SAxel Dörfler */ 159*5af32e75SAxel Dörfler /* 160*5af32e75SAxel Dörfler * To get the highest accuracy with this method 161*5af32e75SAxel Dörfler * x0 should have the 12 most significant bits of expired 162*5af32e75SAxel Dörfler * to minimize the error upon <<k. 163*5af32e75SAxel Dörfler */ 164*5af32e75SAxel Dörfler /* 165*5af32e75SAxel Dörfler * Of course, you are not expected to understand any of this. 166*5af32e75SAxel Dörfler */ 167*5af32e75SAxel Dörfler { 168*5af32e75SAxel Dörfler unsigned i; 169*5af32e75SAxel Dörfler unsigned k; 170*5af32e75SAxel Dörfler uint64 C; 171*5af32e75SAxel Dörfler uint64 x0; 172*5af32e75SAxel Dörfler uint64 x1; 173*5af32e75SAxel Dörfler uint64 a, b, c; 174*5af32e75SAxel Dörfler 175*5af32e75SAxel Dörfler /* first calculate k*/ 176*5af32e75SAxel Dörfler k = 0; 177*5af32e75SAxel Dörfler for (i = 12; i < 16; i++) { 178*5af32e75SAxel Dörfler if (expired & (1<<i)) 179*5af32e75SAxel Dörfler k = i - 11; 180*5af32e75SAxel Dörfler } 181*5af32e75SAxel Dörfler 182*5af32e75SAxel Dörfler C = 1000000ULL << 32; 183*5af32e75SAxel Dörfler x0 = expired >> k; 184*5af32e75SAxel Dörfler x1 = expired & ((1 << k) - 1); 185*5af32e75SAxel Dörfler 186*5af32e75SAxel Dörfler a = ((C * x0) / p3) << k; 187*5af32e75SAxel Dörfler b = (((C * x0) % p3) << k) / p3; 188*5af32e75SAxel Dörfler c = (C * x1) / p3; 189*5af32e75SAxel Dörfler #if 0 190*5af32e75SAxel Dörfler dprintf("a=%Ld\n", a); 191*5af32e75SAxel Dörfler dprintf("b=%Ld\n", b); 192*5af32e75SAxel Dörfler dprintf("c=%Ld\n", c); 193*5af32e75SAxel Dörfler dprintf("%d %Ld\n", expired, p3); 194*5af32e75SAxel Dörfler #endif 195*5af32e75SAxel Dörfler gTimeConversionFactor = a + b + c; 196*5af32e75SAxel Dörfler #if 0 197*5af32e75SAxel Dörfler dprintf("cvf=%Ld\n", cv_factor); 198*5af32e75SAxel Dörfler #endif 199*5af32e75SAxel Dörfler } 200*5af32e75SAxel Dörfler 201*5af32e75SAxel Dörfler #ifdef TRACE_CPU 202*5af32e75SAxel Dörfler if (p3 / expired / 1000000000LL) 203*5af32e75SAxel Dörfler dprintf("CPU at %Ld.%03Ld GHz\n", p3/expired/1000000000LL, ((p3/expired)%1000000000LL)/1000000LL); 204*5af32e75SAxel Dörfler else 205*5af32e75SAxel Dörfler dprintf("CPU at %Ld.%03Ld MHz\n", p3/expired/1000000LL, ((p3/expired)%1000000LL)/1000LL); 206*5af32e75SAxel Dörfler #endif 207*5af32e75SAxel Dörfler 208*5af32e75SAxel Dörfler gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor; 209*5af32e75SAxel Dörfler gKernelArgs.arch_args.cpu_clock_speed = p3/expired; 210*5af32e75SAxel Dörfler } 211*5af32e75SAxel Dörfler 212*5af32e75SAxel Dörfler 213*5af32e75SAxel Dörfler static status_t 214*5af32e75SAxel Dörfler check_cpu_features() 215*5af32e75SAxel Dörfler { 216*5af32e75SAxel Dörfler // ToDo: for now 217*5af32e75SAxel Dörfler return B_OK; 218*5af32e75SAxel Dörfler } 219*5af32e75SAxel Dörfler 220*5af32e75SAxel Dörfler 221*5af32e75SAxel Dörfler // #pragma mark - 222*5af32e75SAxel Dörfler 223*5af32e75SAxel Dörfler 224*5af32e75SAxel Dörfler extern "C" void 225*5af32e75SAxel Dörfler spin(bigtime_t microseconds) 226*5af32e75SAxel Dörfler { 227*5af32e75SAxel Dörfler bigtime_t time = system_time(); 228*5af32e75SAxel Dörfler 229*5af32e75SAxel Dörfler while((system_time() - time) < microseconds) 230*5af32e75SAxel Dörfler ; 231*5af32e75SAxel Dörfler } 232*5af32e75SAxel Dörfler 233*5af32e75SAxel Dörfler 234*5af32e75SAxel Dörfler extern "C" void 235*5af32e75SAxel Dörfler cpu_init() 236*5af32e75SAxel Dörfler { 237*5af32e75SAxel Dörfler if (check_cpu_features() != B_OK) 238*5af32e75SAxel Dörfler panic("You need a Pentium or higher in order to boot!\n"); 239*5af32e75SAxel Dörfler 240*5af32e75SAxel Dörfler calculate_cpu_conversion_factor(); 241*5af32e75SAxel Dörfler 242*5af32e75SAxel Dörfler gKernelArgs.num_cpus = 1; 243*5af32e75SAxel Dörfler // this will eventually be corrected later on 244*5af32e75SAxel Dörfler } 245*5af32e75SAxel Dörfler 246