xref: /haiku/src/system/boot/platform/bios_ia32/cpu.cpp (revision 5af32e752606778be5dd7379f319fe43cb3f6b8c)
1*5af32e75SAxel Dörfler /*
2*5af32e75SAxel Dörfler  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3*5af32e75SAxel Dörfler  * Distributed under the terms of the MIT License.
4*5af32e75SAxel Dörfler  *
5*5af32e75SAxel Dörfler  * calculate_cpu_conversion_factor() was written by Travis Geiselbrecht and
6*5af32e75SAxel Dörfler  * licensed under the NewOS license.
7*5af32e75SAxel Dörfler  */
8*5af32e75SAxel Dörfler 
9*5af32e75SAxel Dörfler 
10*5af32e75SAxel Dörfler #include "cpu.h"
11*5af32e75SAxel Dörfler 
12*5af32e75SAxel Dörfler #include <OS.h>
13*5af32e75SAxel Dörfler #include <boot/platform.h>
14*5af32e75SAxel Dörfler #include <boot/stdio.h>
15*5af32e75SAxel Dörfler #include <boot/kernel_args.h>
16*5af32e75SAxel Dörfler #include <boot/stage2.h>
17*5af32e75SAxel Dörfler #include <arch/cpu.h>
18*5af32e75SAxel Dörfler #include <arch_kernel.h>
19*5af32e75SAxel Dörfler 
20*5af32e75SAxel Dörfler #include <string.h>
21*5af32e75SAxel Dörfler 
22*5af32e75SAxel Dörfler 
23*5af32e75SAxel Dörfler //#define TRACE_CPU
24*5af32e75SAxel Dörfler #ifdef TRACE_CPU
25*5af32e75SAxel Dörfler #	define TRACE(x) dprintf x
26*5af32e75SAxel Dörfler #else
27*5af32e75SAxel Dörfler #	define TRACE(x) ;
28*5af32e75SAxel Dörfler #endif
29*5af32e75SAxel Dörfler 
30*5af32e75SAxel Dörfler 
31*5af32e75SAxel Dörfler extern "C" uint64 rdtsc();
32*5af32e75SAxel Dörfler 
33*5af32e75SAxel Dörfler uint32 gTimeConversionFactor;
34*5af32e75SAxel Dörfler 
35*5af32e75SAxel Dörfler #define TIMER_CLKNUM_HZ (14318180/12)
36*5af32e75SAxel Dörfler 
37*5af32e75SAxel Dörfler 
38*5af32e75SAxel Dörfler static void
39*5af32e75SAxel Dörfler calculate_cpu_conversion_factor()
40*5af32e75SAxel Dörfler {
41*5af32e75SAxel Dörfler 	uint32 s_low, s_high;
42*5af32e75SAxel Dörfler 	uint32 low, high;
43*5af32e75SAxel Dörfler 	uint32 expired;
44*5af32e75SAxel Dörfler 	uint64 t1, t2;
45*5af32e75SAxel Dörfler 	uint64 p1, p2, p3;
46*5af32e75SAxel Dörfler 	double r1, r2, r3;
47*5af32e75SAxel Dörfler 
48*5af32e75SAxel Dörfler 	out8(0x34, 0x43);	/* program the timer to count down mode */
49*5af32e75SAxel Dörfler 	out8(0xff, 0x40);	/* low and then high */
50*5af32e75SAxel Dörfler 	out8(0xff, 0x40);
51*5af32e75SAxel Dörfler 
52*5af32e75SAxel Dörfler 	/* quick sample */
53*5af32e75SAxel Dörfler quick_sample:
54*5af32e75SAxel Dörfler 	do {
55*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
56*5af32e75SAxel Dörfler 		s_low = in8(0x40);
57*5af32e75SAxel Dörfler 		s_high = in8(0x40);
58*5af32e75SAxel Dörfler 	} while(s_high != 255);
59*5af32e75SAxel Dörfler 	t1 = rdtsc();
60*5af32e75SAxel Dörfler 	do {
61*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
62*5af32e75SAxel Dörfler 		low = in8(0x40);
63*5af32e75SAxel Dörfler 		high = in8(0x40);
64*5af32e75SAxel Dörfler 	} while (high > 224);
65*5af32e75SAxel Dörfler 	t2 = rdtsc();
66*5af32e75SAxel Dörfler 
67*5af32e75SAxel Dörfler 	p1 = t2-t1;
68*5af32e75SAxel Dörfler 	r1 = (double)(p1) / (double)(((s_high << 8) | s_low) - ((high << 8) | low));
69*5af32e75SAxel Dörfler 
70*5af32e75SAxel Dörfler 	/* not so quick sample */
71*5af32e75SAxel Dörfler not_so_quick_sample:
72*5af32e75SAxel Dörfler 	do {
73*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
74*5af32e75SAxel Dörfler 		s_low = in8(0x40);
75*5af32e75SAxel Dörfler 		s_high = in8(0x40);
76*5af32e75SAxel Dörfler 	} while (s_high!= 255);
77*5af32e75SAxel Dörfler 	t1 = rdtsc();
78*5af32e75SAxel Dörfler 	do {
79*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
80*5af32e75SAxel Dörfler 		low = in8(0x40);
81*5af32e75SAxel Dörfler 		high = in8(0x40);
82*5af32e75SAxel Dörfler 	} while (high> 192);
83*5af32e75SAxel Dörfler 	t2 = rdtsc();
84*5af32e75SAxel Dörfler 	p2 = t2-t1;
85*5af32e75SAxel Dörfler 	r2 = (double)(p2) / (double)(((s_high << 8) | s_low) - ((high << 8) | low));
86*5af32e75SAxel Dörfler 	if ((r1/r2) > 1.01) {
87*5af32e75SAxel Dörfler 		//dprintf("Tuning loop(1)\n");
88*5af32e75SAxel Dörfler 		goto quick_sample;
89*5af32e75SAxel Dörfler 	}
90*5af32e75SAxel Dörfler 	if ((r1/r2) < 0.99) {
91*5af32e75SAxel Dörfler 		//dprintf("Tuning loop(1)\n");
92*5af32e75SAxel Dörfler 		goto quick_sample;
93*5af32e75SAxel Dörfler 	}
94*5af32e75SAxel Dörfler 
95*5af32e75SAxel Dörfler 	/* slow sample */
96*5af32e75SAxel Dörfler 	do {
97*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
98*5af32e75SAxel Dörfler 		s_low = in8(0x40);
99*5af32e75SAxel Dörfler 		s_high = in8(0x40);
100*5af32e75SAxel Dörfler 	} while (s_high!= 255);
101*5af32e75SAxel Dörfler 	t1 = rdtsc();
102*5af32e75SAxel Dörfler 	do {
103*5af32e75SAxel Dörfler 		out8(0x00, 0x43); /* latch counter value */
104*5af32e75SAxel Dörfler 		low = in8(0x40);
105*5af32e75SAxel Dörfler 		high = in8(0x40);
106*5af32e75SAxel Dörfler 	} while (high > 128);
107*5af32e75SAxel Dörfler 	t2 = rdtsc();
108*5af32e75SAxel Dörfler 
109*5af32e75SAxel Dörfler 	p3 = t2-t1;
110*5af32e75SAxel Dörfler 	r3 = (double)(p3) / (double)(((s_high << 8) | s_low) - ((high << 8) | low));
111*5af32e75SAxel Dörfler 	if ((r2/r3) > 1.01) {
112*5af32e75SAxel Dörfler 		TRACE(("Tuning loop(2)\n"));
113*5af32e75SAxel Dörfler 		goto not_so_quick_sample;
114*5af32e75SAxel Dörfler 	}
115*5af32e75SAxel Dörfler 	if ((r2/r3) < 0.99) {
116*5af32e75SAxel Dörfler 		TRACE(("Tuning loop(2)\n"));
117*5af32e75SAxel Dörfler 		goto not_so_quick_sample;
118*5af32e75SAxel Dörfler 	}
119*5af32e75SAxel Dörfler 
120*5af32e75SAxel Dörfler 	expired = ((s_high << 8) | s_low) - ((high << 8) | low);
121*5af32e75SAxel Dörfler 	p3 *= TIMER_CLKNUM_HZ;
122*5af32e75SAxel Dörfler 
123*5af32e75SAxel Dörfler 	/*
124*5af32e75SAxel Dörfler 	 * cv_factor contains time in usecs per CPU cycle * 2^32
125*5af32e75SAxel Dörfler 	 *
126*5af32e75SAxel Dörfler 	 * The code below is a bit fancy. Originally Michael Noistering
127*5af32e75SAxel Dörfler 	 * had it like:
128*5af32e75SAxel Dörfler 	 *
129*5af32e75SAxel Dörfler 	 *     cv_factor = ((uint64)1000000<<32) * expired / p3;
130*5af32e75SAxel Dörfler 	 *
131*5af32e75SAxel Dörfler 	 * whic is perfect, but unfortunately 1000000ULL<<32*expired
132*5af32e75SAxel Dörfler 	 * may overflow in fast cpus with the long sampling period
133*5af32e75SAxel Dörfler 	 * i put there for being as accurate as possible under
134*5af32e75SAxel Dörfler 	 * vmware.
135*5af32e75SAxel Dörfler 	 *
136*5af32e75SAxel Dörfler 	 * The below calculation is based in that we are trying
137*5af32e75SAxel Dörfler 	 * to calculate:
138*5af32e75SAxel Dörfler 	 *
139*5af32e75SAxel Dörfler 	 *     (C*expired)/p3 -> (C*(x0<<k + x1))/p3 ->
140*5af32e75SAxel Dörfler 	 *     (C*(x0<<k))/p3 + (C*x1)/p3
141*5af32e75SAxel Dörfler 	 *
142*5af32e75SAxel Dörfler 	 * Now the term (C*(x0<<k))/p3 is rewritten as:
143*5af32e75SAxel Dörfler 	 *
144*5af32e75SAxel Dörfler 	 *     (C*(x0<<k))/p3 -> ((C*x0)/p3)<<k + reminder
145*5af32e75SAxel Dörfler 	 *
146*5af32e75SAxel Dörfler 	 * where reminder is:
147*5af32e75SAxel Dörfler 	 *
148*5af32e75SAxel Dörfler 	 *     floor((1<<k)*decimalPart((C*x0)/p3))
149*5af32e75SAxel Dörfler 	 *
150*5af32e75SAxel Dörfler 	 * which is approximated as:
151*5af32e75SAxel Dörfler 	 *
152*5af32e75SAxel Dörfler 	 *     floor((1<<k)*decimalPart(((C*x0)%p3)/p3)) ->
153*5af32e75SAxel Dörfler 	 *     (((C*x0)%p3)<<k)/p3
154*5af32e75SAxel Dörfler 	 *
155*5af32e75SAxel Dörfler 	 * So the final expression is:
156*5af32e75SAxel Dörfler 	 *
157*5af32e75SAxel Dörfler 	 *     ((C*x0)/p3)<<k + (((C*x0)%p3)<<k)/p3 + (C*x1)/p3
158*5af32e75SAxel Dörfler 	 */
159*5af32e75SAxel Dörfler 	 /*
160*5af32e75SAxel Dörfler 	 * To get the highest accuracy with this method
161*5af32e75SAxel Dörfler 	 * x0 should have the 12 most significant bits of expired
162*5af32e75SAxel Dörfler 	 * to minimize the error upon <<k.
163*5af32e75SAxel Dörfler 	 */
164*5af32e75SAxel Dörfler 	 /*
165*5af32e75SAxel Dörfler 	 * Of course, you are not expected to understand any of this.
166*5af32e75SAxel Dörfler 	 */
167*5af32e75SAxel Dörfler 	{
168*5af32e75SAxel Dörfler 		unsigned i;
169*5af32e75SAxel Dörfler 		unsigned k;
170*5af32e75SAxel Dörfler 		uint64 C;
171*5af32e75SAxel Dörfler 		uint64 x0;
172*5af32e75SAxel Dörfler 		uint64 x1;
173*5af32e75SAxel Dörfler 		uint64 a, b, c;
174*5af32e75SAxel Dörfler 
175*5af32e75SAxel Dörfler 		/* first calculate k*/
176*5af32e75SAxel Dörfler 		k = 0;
177*5af32e75SAxel Dörfler 		for (i = 12; i < 16; i++) {
178*5af32e75SAxel Dörfler 			if (expired & (1<<i))
179*5af32e75SAxel Dörfler 				k = i - 11;
180*5af32e75SAxel Dörfler 		}
181*5af32e75SAxel Dörfler 
182*5af32e75SAxel Dörfler 		C = 1000000ULL << 32;
183*5af32e75SAxel Dörfler 		x0 = expired >> k;
184*5af32e75SAxel Dörfler 		x1 = expired & ((1 << k) - 1);
185*5af32e75SAxel Dörfler 
186*5af32e75SAxel Dörfler 		a = ((C * x0) / p3) << k;
187*5af32e75SAxel Dörfler 		b = (((C * x0) % p3) << k) / p3;
188*5af32e75SAxel Dörfler 		c = (C * x1) / p3;
189*5af32e75SAxel Dörfler #if 0
190*5af32e75SAxel Dörfler 		dprintf("a=%Ld\n", a);
191*5af32e75SAxel Dörfler 		dprintf("b=%Ld\n", b);
192*5af32e75SAxel Dörfler 		dprintf("c=%Ld\n", c);
193*5af32e75SAxel Dörfler 		dprintf("%d %Ld\n", expired, p3);
194*5af32e75SAxel Dörfler #endif
195*5af32e75SAxel Dörfler 		gTimeConversionFactor = a + b + c;
196*5af32e75SAxel Dörfler #if 0
197*5af32e75SAxel Dörfler 		dprintf("cvf=%Ld\n", cv_factor);
198*5af32e75SAxel Dörfler #endif
199*5af32e75SAxel Dörfler 	}
200*5af32e75SAxel Dörfler 
201*5af32e75SAxel Dörfler #ifdef TRACE_CPU
202*5af32e75SAxel Dörfler 	if (p3 / expired / 1000000000LL)
203*5af32e75SAxel Dörfler 		dprintf("CPU at %Ld.%03Ld GHz\n", p3/expired/1000000000LL, ((p3/expired)%1000000000LL)/1000000LL);
204*5af32e75SAxel Dörfler 	else
205*5af32e75SAxel Dörfler 		dprintf("CPU at %Ld.%03Ld MHz\n", p3/expired/1000000LL, ((p3/expired)%1000000LL)/1000LL);
206*5af32e75SAxel Dörfler #endif
207*5af32e75SAxel Dörfler 
208*5af32e75SAxel Dörfler 	gKernelArgs.arch_args.system_time_cv_factor = gTimeConversionFactor;
209*5af32e75SAxel Dörfler 	gKernelArgs.arch_args.cpu_clock_speed = p3/expired;
210*5af32e75SAxel Dörfler }
211*5af32e75SAxel Dörfler 
212*5af32e75SAxel Dörfler 
213*5af32e75SAxel Dörfler static status_t
214*5af32e75SAxel Dörfler check_cpu_features()
215*5af32e75SAxel Dörfler {
216*5af32e75SAxel Dörfler 	// ToDo: for now
217*5af32e75SAxel Dörfler 	return B_OK;
218*5af32e75SAxel Dörfler }
219*5af32e75SAxel Dörfler 
220*5af32e75SAxel Dörfler 
221*5af32e75SAxel Dörfler //	#pragma mark -
222*5af32e75SAxel Dörfler 
223*5af32e75SAxel Dörfler 
224*5af32e75SAxel Dörfler extern "C" void
225*5af32e75SAxel Dörfler spin(bigtime_t microseconds)
226*5af32e75SAxel Dörfler {
227*5af32e75SAxel Dörfler 	bigtime_t time = system_time();
228*5af32e75SAxel Dörfler 
229*5af32e75SAxel Dörfler 	while((system_time() - time) < microseconds)
230*5af32e75SAxel Dörfler 		;
231*5af32e75SAxel Dörfler }
232*5af32e75SAxel Dörfler 
233*5af32e75SAxel Dörfler 
234*5af32e75SAxel Dörfler extern "C" void
235*5af32e75SAxel Dörfler cpu_init()
236*5af32e75SAxel Dörfler {
237*5af32e75SAxel Dörfler 	if (check_cpu_features() != B_OK)
238*5af32e75SAxel Dörfler 		panic("You need a Pentium or higher in order to boot!\n");
239*5af32e75SAxel Dörfler 
240*5af32e75SAxel Dörfler 	calculate_cpu_conversion_factor();
241*5af32e75SAxel Dörfler 
242*5af32e75SAxel Dörfler 	gKernelArgs.num_cpus = 1;
243*5af32e75SAxel Dörfler 		// this will eventually be corrected later on
244*5af32e75SAxel Dörfler }
245*5af32e75SAxel Dörfler 
246