1 /*
2 Copyright (c) 2002-2004, Thomas Kurschel
3
4
5 Part of Radeon accelerant
6
7 Takes care of PLL
8 */
9
10
11 #include "radeon_accelerant.h"
12
13 #include "pll_regs.h"
14 #include "pll_access.h"
15 #include "utils.h"
16 #include <stdlib.h>
17 #include "set_mode.h"
18
19
Radeon_PLLWaitForReadUpdateComplete(accelerator_info * ai,int crtc_idx)20 static void Radeon_PLLWaitForReadUpdateComplete(
21 accelerator_info *ai, int crtc_idx )
22 {
23 int i;
24
25 // we should wait forever, but
26 // 1. this is unsafe
27 // 2. some r300 loop forever (reported by XFree86)
28 for( i = 0; i < 10000; ++i ) {
29 if( (Radeon_INPLL( ai->regs, ai->si->asic, crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV )
30 & RADEON_PPLL_ATOMIC_UPDATE_R) == 0 )
31 return;
32 }
33 }
34
Radeon_PLLWriteUpdate(accelerator_info * ai,int crtc_idx)35 static void Radeon_PLLWriteUpdate(
36 accelerator_info *ai, int crtc_idx )
37 {
38 Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );
39
40 Radeon_OUTPLLP( ai->regs, ai->si->asic,
41 crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
42 RADEON_PPLL_ATOMIC_UPDATE_W,
43 ~RADEON_PPLL_ATOMIC_UPDATE_W );
44 }
45
46 // calculate PLL dividers
47 // pll - info about PLL
48 // freq - whished frequency in Hz
49 // fixed_post_div - if != 0, fixed divider to be used
50 // dividers - filled with proper dividers
Radeon_CalcPLLDividers(const pll_info * pll,uint32 freq,uint fixed_post_div,pll_dividers * dividers)51 void Radeon_CalcPLLDividers(
52 const pll_info *pll, uint32 freq, uint fixed_post_div, pll_dividers *dividers )
53 {
54 // the PLL gets the reference
55 // pll_in = ref_freq / ref_div
56 // this must be within pll_in_min..pll_in_max
57 // the VCO of the PLL has the frequency
58 // vco = pll_in * feedback_div * extra_feedback_div
59 // = ref_freq / ref_div * feedback_div * extra_feedback_div
60 // where pre_feedback_div is hard-wired
61 // this must be within vco_min..vco_max
62 // the pixel clock is calculated as
63 // pll_out = vco / post_div / extra_post_div
64 // = ref_freq * feedback_div * extra_feedback_div / (ref_div * post_div * extra_post_div)
65 // where extra_post_div _may_ be choosable between 1 and 2
66
67 // synonyms are:
68 // ref_div = M
69 // feedback_div = N
70 // post_div = P
71
72 int
73 min_post_div_idx, max_post_div_idx,
74 post_div_idx, extra_post_div_idx,
75 best_post_div_idx, best_extra_post_div_idx;
76
77 uint32
78 best_ref_div, best_feedback_div, best_freq;
79 int32
80 best_error, best_vco_dev;
81
82 best_error = 999999999;
83
84 // make compiler happy
85 best_post_div_idx = 0;
86 best_extra_post_div_idx = 0;
87 best_ref_div = 1;
88 best_feedback_div = 1;
89 best_freq = 1;
90 best_vco_dev = 1;
91
92 if( fixed_post_div == 0 ) {
93 min_post_div_idx = 0;
94 for(
95 max_post_div_idx = 0;
96 pll->post_divs[max_post_div_idx].divider != 0;
97 ++max_post_div_idx )
98 ;
99 --max_post_div_idx;
100 } else {
101 for(
102 min_post_div_idx = 0;
103 pll->post_divs[min_post_div_idx].divider != fixed_post_div;
104 ++min_post_div_idx )
105 ;
106
107 max_post_div_idx = min_post_div_idx;
108
109 //SHOW_FLOW( 2, "idx of fixed post divider: %d", min_post_div_idx );
110 }
111
112 // post dividers are quite restrictive, so they provide little search space only
113 for( extra_post_div_idx = 0; pll->extra_post_divs[extra_post_div_idx].divider != 0; ++extra_post_div_idx ) {
114 for( post_div_idx = min_post_div_idx; post_div_idx <= max_post_div_idx; ++post_div_idx ) {
115 uint32 ref_div;
116 uint32 post_div =
117 pll->post_divs[post_div_idx].divider
118 * pll->extra_post_divs[extra_post_div_idx].divider;
119
120 // post devider determines VCO frequency, so determine and verify it;
121 // freq is in Hz, everything else is in 10 kHz units
122 // we use 10 kHz units as long as possible to avoid uint32 overflows
123 uint32 vco = (freq / 10000) * post_div;
124
125 //SHOW_FLOW( 2, "post_div=%d, vco=%d", post_div, vco );
126
127 if( vco < pll->vco_min || vco > pll->vco_max )
128 continue;
129
130 //SHOW_FLOW0( 2, "jau" );
131
132 // we can either iterate through feedback or reference dividers;
133 // usually, there are fewer possible reference dividers, so I picked them
134 for( ref_div = pll->min_ref_div; ref_div <= pll->max_ref_div; ++ref_div ) {
135 uint32 feedback_div, cur_freq;
136 int32 error, vco_dev;
137
138 // this implies the frequency of the lock unit
139 uint32 pll_in = pll->ref_freq / ref_div;
140
141 if( pll_in < pll->pll_in_min || pll_in > pll->pll_in_max )
142 continue;
143
144 // well, only one variable is left
145 // timing is almost certainly valid, time to use Hz units
146 feedback_div = RoundDiv64(
147 (int64)freq * ref_div * post_div,
148 pll->ref_freq * 10000 * pll->extra_feedback_div);
149
150 if( feedback_div < pll->min_feedback_div ||
151 feedback_div > pll->max_feedback_div )
152 continue;
153
154 // let's see what we've got
155 cur_freq = RoundDiv64(
156 (int64)pll->ref_freq * 10000 * feedback_div * pll->extra_feedback_div,
157 ref_div * post_div );
158
159 // absolute error in terms of output clock
160 error = abs( (int32)cur_freq - (int32)freq );
161 // deviation from perfect VCO clock
162 vco_dev = abs( (int32)vco - (int32)(pll->best_vco) );
163
164 // if there is no optimal VCO frequency, choose setting with less error;
165 // if there is an optimal VCO frequency, choose new settings if
166 // - error is reduced significantly (100 Hz or more), or
167 // - output frequency is almost the same (less then 100 Hz difference) but
168 // VCO frequency is closer to best frequency
169 if( (pll->best_vco == 0 && error < best_error) ||
170 (pll->best_vco != 0 &&
171 (error < best_error - 100 ||
172 (abs( error - best_error ) < 100 && vco_dev < best_vco_dev ))))
173 {
174 //SHOW_FLOW( 2, "got freq=%d, best_freq=%d", freq, cur_freq );
175 best_post_div_idx = post_div_idx;
176 best_extra_post_div_idx = extra_post_div_idx;
177 best_ref_div = ref_div;
178 best_feedback_div = feedback_div;
179 best_freq = cur_freq;
180 best_error = error;
181 best_vco_dev = vco_dev;
182 }
183 }
184 }
185 }
186
187 dividers->post_code = pll->post_divs[best_post_div_idx].code;
188 dividers->post = pll->post_divs[best_post_div_idx].divider;
189 dividers->extra_post_code = pll->post_divs[best_extra_post_div_idx].code;
190 dividers->extra_post = pll->post_divs[best_extra_post_div_idx].divider;
191 dividers->ref = best_ref_div;
192 dividers->feedback = best_feedback_div;
193 dividers->freq = best_freq;
194
195 /*SHOW_FLOW( 2, "post_code=%d, post=%d, extra_post_code=%d, extra_post=%d, ref=%d, feedback=%d, freq=%d",
196 dividers->post_code, dividers->post, dividers->extra_post_code,
197 dividers->extra_post, dividers->ref, dividers->feedback, dividers->freq );*/
198 }
199
200
201 // with a TV timing given, find a corresponding CRT timing.
202 // both timing must meet at the end of a frame, but as the PLL has a
203 // limited frequency granularity, you don't really get a CRT timing
204 // with precisely the same frame rate; the solution is to tweak the CRT
205 // image a bit by making it wider/taller/smaller until the frame rate
206 // drift is under a given threshold;
207 // we follow two aims:
208 // - primary, keep frame rate in sync
209 // - secondary, only tweak as much as unavoidable
Radeon_MatchCRTPLL(const pll_info * pll,uint32 tv_v_total,uint32 tv_h_total,uint32 tv_frame_size_adjust,uint32 freq,const display_mode * mode,uint32 max_v_tweak,uint32 max_h_tweak,uint32 max_frame_rate_drift,uint32 fixed_post_div,pll_dividers * dividers,display_mode * tweaked_mode)210 void Radeon_MatchCRTPLL(
211 const pll_info *pll,
212 uint32 tv_v_total, uint32 tv_h_total, uint32 tv_frame_size_adjust, uint32 freq,
213 const display_mode *mode, uint32 max_v_tweak, uint32 max_h_tweak,
214 uint32 max_frame_rate_drift, uint32 fixed_post_div,
215 pll_dividers *dividers,
216 display_mode *tweaked_mode )
217 {
218 uint32 v_tweak;
219 int32 v_tweak_dir;
220 uint32 pix_per_tv_frame;
221
222 SHOW_FLOW( 2, "fixed post divider: %d", fixed_post_div );
223
224 // number of TV pixels per frame
225 pix_per_tv_frame = tv_v_total * tv_h_total + tv_frame_size_adjust;
226
227 // starting with original data we tweak total horizontal and vertical size
228 // more and more until we find a proper CRT clock frequency
229 for( v_tweak = 0; v_tweak <= max_v_tweak; ++v_tweak ) {
230 for( v_tweak_dir = -1; v_tweak_dir <= 1; v_tweak_dir += 2 ) {
231 uint32 h_tweak;
232 int32 h_tweak_dir;
233
234 uint32 v_total = mode->timing.v_total + v_tweak * v_tweak_dir;
235
236 for( h_tweak = 0; h_tweak <= max_h_tweak; ++h_tweak ) {
237 for( h_tweak_dir = -1; h_tweak_dir <= 1; h_tweak_dir += 2 ) {
238 uint32 pix_per_crt_frame, frame_rate_drift;
239 uint32 crt_freq;
240 uint32 abs_crt_error;
241
242 uint32 h_total = mode->timing.h_total + h_tweak * h_tweak_dir;
243
244 // number of CRT pixels per frame
245 pix_per_crt_frame = v_total * h_total;
246
247 // frame rate must be:
248 // frame_rate = freq / pix_per_tv_half_frame
249 // because of interlace, we must use half frames
250 // pix_per_tv_half_frame = pix_per_tv_frame / 2
251 // to get a CRT image with the same frame rate, we get
252 // crt_freq = frame_rate * pix_per_crt_frame
253 // = freq / (pix_per_tv_frame / 2) * pix_per_crt_frame
254 // formula is reordered as usual to improve accuracy
255 crt_freq = (uint64)freq * pix_per_crt_frame * 2 / pix_per_tv_frame;
256
257 Radeon_CalcPLLDividers( pll, crt_freq, fixed_post_div, dividers );
258
259 // get absolute CRT clock error per second
260 abs_crt_error = abs( (int32)(dividers->freq) - (int32)crt_freq );
261
262 //SHOW_INFO( 2, "whished=%d, is=%d", crt_freq, dividers->freq );
263
264 // convert it to relative CRT clock error:
265 // rel_error = abs_crt_error / crt_freq
266 // now to absolute TV clock error per second:
267 // abs_tv_error = rel_error * tv_freq
268 // and finally to TV clock error per frame:
269 // frame_rate_drift = abs_tv_error / frame_rate
270 // = abs_crt_error / crt_freq * tv_freq / frame_rate
271 // this can be simplified by using:
272 // tv_freq = pix_per_tv_frame * frame_rate
273 // so we get:
274 // frame_rate_drift = abs_crt_error / crt_freq * pix_per_tv_frame * frame_rate / frame_rate
275 // = abs_crt_error / crt_freq * pix_per_tv_frame
276 frame_rate_drift = (uint64)abs_crt_error * pix_per_tv_frame / freq;
277
278 // if drift is within threshold, we take this setting and stop
279 // searching (later iteration will increasingly tweak screen size,
280 // and we don't really want that)
281 if( frame_rate_drift <= max_frame_rate_drift ) {
282 SHOW_INFO( 2, "frame_rate_drift=%d, crt_freq=%d, v_total=%d, h_total=%d",
283 frame_rate_drift, crt_freq, v_total, h_total );
284
285 tweaked_mode->timing.pixel_clock = crt_freq;
286 tweaked_mode->timing.v_total = v_total;
287 tweaked_mode->timing.h_total = h_total;
288 return;
289 }
290 }
291 }
292 }
293 }
294 }
295
296
297 // table to map divider to register value
298 static pll_divider_map post_divs[] = {
299 { 1, 0 },
300 { 2, 1 },
301 { 4, 2 },
302 { 8, 3 },
303 { 3, 4 },
304 // { 16, 5 }, // at least for pll2 of M6, this value is reserved
305 { 6, 6 },
306 { 12, 7 },
307 { 0, 0 }
308 };
309
310
311 // normal PLLs have no extra post divider
312 static pll_divider_map extra_post_divs[] = {
313 { 1, 1 },
314 { 0, 0 }
315 };
316
317
318 // extra post-divider provided by Rage Theatre
319 static pll_divider_map external_extra_post_divs[] = {
320 { 1, 0 },
321 { 2, 1 },
322 { 0, 0 }
323 };
324
325
326 // post-dividers of Rage Theatre
327 static pll_divider_map tv_post_divs[] = {
328 { 1, 1 },
329 { 2, 2 },
330 { 3, 3 },
331 { 4, 4 },
332 { 5, 5 },
333 { 6, 6 },
334 { 7, 7 },
335 { 8, 8 },
336 { 9, 9 },
337 { 10, 10 },
338 { 11, 11 },
339 { 12, 12 },
340 { 13, 13 },
341 { 14, 14 },
342 { 15, 15 },
343 { 0, 0 }
344 };
345
346
347 // get PLL parameters of TV PLL
Radeon_GetTVPLLConfiguration(const general_pll_info * general_pll,pll_info * pll,bool internal_encoder)348 void Radeon_GetTVPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
349 bool internal_encoder )
350 {
351 pll->post_divs = tv_post_divs;
352 pll->extra_post_divs = internal_encoder ? extra_post_divs : external_extra_post_divs;
353 pll->ref_freq = general_pll->ref_freq;
354 pll->vco_min = 10000;
355 pll->vco_max = 25000;
356 // I'm not sure about the upper limit
357 pll->min_ref_div = 4;
358 pll->max_ref_div = 0x3ff;
359 // in the original code, they set it to 330kHz if PAL is requested and
360 // quartz is 27 MHz, but I don't see how these circumstances can effect the
361 // mimimal PLL input frequency
362 pll->pll_in_min = 20;//40;
363 // in the original code, they don't define an upper limit
364 pll->pll_in_max = 100;
365 pll->extra_feedback_div = 1;
366 pll->min_feedback_div = 4;
367 pll->max_feedback_div = 0x7ff;
368 pll->best_vco = 21000;
369 }
370
371
372 // get PLL parameters of CRT PLL used in conjunction with TV-out
Radeon_GetTVCRTPLLConfiguration(const general_pll_info * general_pll,pll_info * pll,bool internal_tv_encoder)373 void Radeon_GetTVCRTPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
374 bool internal_tv_encoder )
375 {
376 pll->post_divs = post_divs;
377 pll->extra_post_divs = extra_post_divs;
378 pll->ref_freq = general_pll->ref_freq;
379
380 // in sample code, these limits are set in a strange way;
381 // as a first shot, I use the BIOS provided limits
382 /*pll->vco_min = general_pll->min_pll_freq;
383 pll->vco_max = general_pll->max_pll_freq;*/
384
385 // in sample code, they use a variable post divider during calculation, but
386 // use a fixed post divider for programming - the variable post divider is
387 // multiplied to the feedback divider;
388 // because of the fixed post divider (3), the VCO always runs far out of
389 // its stable frequency range, so we have hack the limits
390 pll->vco_min = 4000;
391 pll->vco_max = general_pll->max_pll_freq;
392
393 // in sample code, lower limit is 4, but in register spec they say everything but 0/1
394 pll->min_ref_div = 2;
395 pll->max_ref_div = 0x3ff;
396 pll->pll_in_min = 20;
397 pll->pll_in_max = 100;
398 pll->extra_feedback_div = 1;
399 pll->min_feedback_div = 4;
400 pll->max_feedback_div = 0x7ff;
401 pll->best_vco = internal_tv_encoder ? 17500 : 21000;
402 }
403
404
405 // calc PLL dividers for CRT
406 // mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
Radeon_CalcCRTPLLDividers(const general_pll_info * general_pll,const display_mode * mode,pll_dividers * dividers)407 void Radeon_CalcCRTPLLDividers(
408 const general_pll_info *general_pll, const display_mode *mode, pll_dividers *dividers )
409 {
410 pll_info pll;
411
412 pll.post_divs = post_divs;
413 pll.extra_post_divs = extra_post_divs;
414 pll.ref_freq = general_pll->ref_freq;
415 pll.vco_min = general_pll->min_pll_freq;
416 pll.vco_max = general_pll->max_pll_freq;
417 pll.min_ref_div = 2;
418 pll.max_ref_div = 0x3ff;
419 pll.pll_in_min = 40;
420 pll.pll_in_max = 100;
421 pll.extra_feedback_div = 1;
422 pll.min_feedback_div = 4;
423 pll.max_feedback_div = 0x7ff;
424 pll.best_vco = 0;
425
426 SHOW_FLOW( 2, "freq=%ld", mode->timing.pixel_clock );
427
428 Radeon_CalcPLLDividers( &pll, mode->timing.pixel_clock, 0, dividers );
429 }
430
431
432 // calculate PLL registers
433 // mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
434 // (old: freq is in 10kHz)
Radeon_CalcPLLRegisters(const display_mode * mode,const pll_dividers * dividers,pll_regs * values)435 void Radeon_CalcPLLRegisters(
436 const display_mode *mode, const pll_dividers *dividers, pll_regs *values )
437 {
438 values->dot_clock_freq = dividers->freq;
439 values->feedback_div = dividers->feedback;
440 values->post_div = dividers->post;
441 values->pll_output_freq = dividers->freq * dividers->post;
442
443 values->ppll_ref_div = dividers->ref;
444 values->ppll_div_3 = (dividers->feedback | (dividers->post_code << 16));
445 // this is mad: the PLL controls the horizontal length in sub-byte precision!
446 values->htotal_cntl = mode->timing.h_total & 7;
447
448 SHOW_FLOW( 2, "dot_clock_freq=%ld, pll_output_freq=%ld, ref_div=%d, feedback_div=%d, post_div=%d",
449 values->dot_clock_freq, values->pll_output_freq,
450 values->ppll_ref_div, values->feedback_div, values->post_div );
451 }
452
453 // write values into PLL registers
Radeon_ProgramPLL(accelerator_info * ai,int crtc_idx,pll_regs * values)454 void Radeon_ProgramPLL(
455 accelerator_info *ai, int crtc_idx, pll_regs *values )
456 {
457 vuint8 *regs = ai->regs;
458 radeon_type asic = ai->si->asic;
459
460 SHOW_FLOW0( 2, "" );
461
462 // use some other PLL for pixel clock source to not fiddling with PLL
463 // while somebody is using it
464 Radeon_OUTPLLP( regs, asic, crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
465 RADEON_VCLK_SRC_CPU_CLK, ~RADEON_VCLK_SRC_SEL_MASK );
466
467 Radeon_OUTPLLP( regs, asic,
468 crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL,
469 RADEON_PPLL_RESET
470 | RADEON_PPLL_ATOMIC_UPDATE_EN
471 | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN,
472 ~(RADEON_PPLL_RESET
473 | RADEON_PPLL_ATOMIC_UPDATE_EN
474 | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );
475
476 // select divider 3 (well, only required for first PLL)
477 OUTREGP( regs, RADEON_CLOCK_CNTL_INDEX,
478 RADEON_PLL_DIV_SEL_DIV3,
479 ~RADEON_PLL_DIV_SEL_MASK );
480
481 RADEONPllErrataAfterIndex(regs, asic);
482
483 if( ai->si->new_pll && crtc_idx == 0 ) {
484 // starting with r300, the reference divider of the first PLL was
485 // moved to another bit position; at the old location, you only
486 // find the "BIOS suggested divider"; no clue why they did that
487 Radeon_OUTPLLP( regs, asic,
488 RADEON_PPLL_REF_DIV,
489 values->ppll_ref_div << RADEON_PPLL_REF_DIV_ACC_SHIFT,
490 ~RADEON_PPLL_REF_DIV_ACC_MASK );
491 } else {
492 Radeon_OUTPLLP( regs, asic,
493 crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
494 values->ppll_ref_div,
495 ~RADEON_PPLL_REF_DIV_MASK );
496 }
497
498 Radeon_OUTPLLP( regs, asic,
499 crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
500 values->ppll_div_3,
501 ~RADEON_PPLL_FB3_DIV_MASK );
502
503 Radeon_OUTPLLP( regs, asic,
504 crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
505 values->ppll_div_3,
506 ~RADEON_PPLL_POST3_DIV_MASK );
507
508 Radeon_PLLWriteUpdate( ai, crtc_idx );
509 Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );
510
511 Radeon_OUTPLL( regs, asic,
512 crtc_idx == 0 ? RADEON_HTOTAL_CNTL : RADEON_HTOTAL2_CNTL,
513 values->htotal_cntl );
514
515 Radeon_OUTPLLP( regs, asic,
516 crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL, 0,
517 ~(RADEON_PPLL_RESET
518 | RADEON_PPLL_SLEEP
519 | RADEON_PPLL_ATOMIC_UPDATE_EN
520 | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );
521
522 // there is no way to check whether PLL has settled, so wait a bit
523 snooze( 5000 );
524
525 // use PLL for pixel clock again
526 Radeon_OUTPLLP( regs, asic,
527 crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
528 RADEON_VCLK_SRC_PPLL_CLK, ~RADEON_VCLK_SRC_SEL_MASK );
529 }
530