1 /*
2 Copyright (c) 2002-2004, Thomas Kurschel
3
4 Part of Radeon accelerant
5
6 Hardware access routines for overlays
7 */
8
9 #include "GlobalData.h"
10 #include "radeon_interface.h"
11 #include "mmio.h"
12 #include "overlay_regs.h"
13 #include "pll_regs.h"
14 #include "capture_regs.h"
15 #include "utils.h"
16 #include "pll_access.h"
17 #include <math.h>
18 #include <string.h>
19 #include "CP.h"
20
21
22 void Radeon_TempHideOverlay( accelerator_info *ai );
23
24 // standard (linear) gamma
25 static struct {
26 uint16 reg;
27 bool r200_or_above;
28 uint32 slope;
29 uint32 offset;
30 } std_gamma[] = {
31 { RADEON_OV0_GAMMA_0_F, false, 0x100, 0x0000 },
32 { RADEON_OV0_GAMMA_10_1F, false, 0x100, 0x0020 },
33 { RADEON_OV0_GAMMA_20_3F, false, 0x100, 0x0040 },
34 { RADEON_OV0_GAMMA_40_7F, false, 0x100, 0x0080 },
35 { RADEON_OV0_GAMMA_80_BF, true, 0x100, 0x0100 },
36 { RADEON_OV0_GAMMA_C0_FF, true, 0x100, 0x0100 },
37 { RADEON_OV0_GAMMA_100_13F, true, 0x100, 0x0200 },
38 { RADEON_OV0_GAMMA_140_17F, true, 0x100, 0x0200 },
39 { RADEON_OV0_GAMMA_180_1BF, true, 0x100, 0x0300 },
40 { RADEON_OV0_GAMMA_1C0_1FF, true, 0x100, 0x0300 },
41 { RADEON_OV0_GAMMA_200_23F, true, 0x100, 0x0400 },
42 { RADEON_OV0_GAMMA_240_27F, true, 0x100, 0x0400 },
43 { RADEON_OV0_GAMMA_280_2BF, true, 0x100, 0x0500 },
44 { RADEON_OV0_GAMMA_2C0_2FF, true, 0x100, 0x0500 },
45 { RADEON_OV0_GAMMA_300_33F, true, 0x100, 0x0600 },
46 { RADEON_OV0_GAMMA_340_37F, true, 0x100, 0x0600 },
47 { RADEON_OV0_GAMMA_380_3BF, false, 0x100, 0x0700 },
48 { RADEON_OV0_GAMMA_3C0_3FF, false, 0x100, 0x0700 }
49 };
50
51
52 // setup overlay unit before first use
Radeon_InitOverlay(accelerator_info * ai,int crtc_idx)53 void Radeon_InitOverlay(
54 accelerator_info *ai, int crtc_idx )
55 {
56 vuint8 *regs = ai->regs;
57 shared_info *si = ai->si;
58 uint i;
59 uint32 ecp_div;
60
61 SHOW_FLOW0( 0, "" );
62
63 // make sure we really write this value as the "toggle" bit
64 // contained in it (which is zero initially) is edge-sensitive!
65 // for capturing, we need to select "software" video port
66 si->overlay_mgr.auto_flip_reg = RADEON_OV0_VID_PORT_SELECT_SOFTWARE;
67
68 OUTREG( regs, RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOFT_RESET );
69 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
70 OUTREG( regs, RADEON_OV0_FILTER_CNTL, // use fixed filter coefficients
71 RADEON_OV0_HC_COEF_ON_HORZ_Y |
72 RADEON_OV0_HC_COEF_ON_HORZ_UV |
73 RADEON_OV0_HC_COEF_ON_VERT_Y |
74 RADEON_OV0_HC_COEF_ON_VERT_UV );
75 OUTREG( regs, RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
76 RADEON_VIDEO_KEY_FN_FALSE |
77 RADEON_CMP_MIX_OR );
78 OUTREG( regs, RADEON_OV0_TEST, 0 );
79 // OUTREG( regs, RADEON_FCP_CNTL, RADEON_FCP_CNTL_GND ); // disable capture clock
80 // OUTREG( regs, RADEON_CAP0_TRIG_CNTL, 0 ); // disable capturing
81 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
82 // tell deinterlacer to always show recent field
83 OUTREG( regs, RADEON_OV0_DEINTERLACE_PATTERN,
84 0xaaaaa | (9 << RADEON_OV0_DEINT_PAT_LEN_M1_SHIFT) );
85
86 // set gamma
87 for( i = 0; i < sizeof( std_gamma ) / sizeof( std_gamma[0] ); ++i ) {
88 if( !std_gamma[i].r200_or_above || si->asic >= rt_r200 ) {
89 OUTREG( regs, std_gamma[i].reg,
90 (std_gamma[i].slope << 16) | std_gamma[i].offset );
91 }
92 }
93
94 // overlay unit can only handle up to 175 MHz, if pixel clock is higher,
95 // only every second pixel is handled
96 if( si->crtc[crtc_idx].mode.timing.pixel_clock < 175000 )
97 ecp_div = 0;
98 else
99 ecp_div = 1;
100
101 Radeon_OUTPLLP( regs, si->asic, RADEON_VCLK_ECP_CNTL,
102 ecp_div << RADEON_ECP_DIV_SHIFT, ~RADEON_ECP_DIV_MASK );
103
104 // Force the overlay clock on for integrated chips
105 if ((si->asic == rt_rs100) ||
106 (si->asic == rt_rs200) ||
107 (si->asic == rt_rs300)) {
108 Radeon_OUTPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL,
109 (Radeon_INPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL) | (1<<18)));
110 }
111
112 si->active_overlay.crtc_idx = si->pending_overlay.crtc_idx;
113
114 // invalidate active colour space
115 si->active_overlay.ob.space = -1;
116
117 // invalidate position/scaling
118 si->active_overlay.ob.width = -1;
119 }
120
121 // colour space transformation matrix
122 typedef struct space_transform
123 {
124 float RefLuma; // scaling of luma to use full RGB range
125 float RefRCb; // b/u -> r
126 float RefRY; // g/y -> r
127 float RefRCr; // r/v -> r
128 float RefGCb;
129 float RefGY;
130 float RefGCr;
131 float RefBCb;
132 float RefBY;
133 float RefBCr;
134 } space_transform;
135
136
137 // Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
138 space_transform trans_yuv[2] =
139 {
140 { 1.1678, 0.0, 1, 1.6007, -0.3929, 1, -0.8154, 2.0232, 1, 0.0 }, /* BT.601 */
141 { 1.1678, 0.0, 1, 1.7980, -0.2139, 1, -0.5345, 2.1186, 1, 0.0 } /* BT.709 */
142 };
143
144
145 // RGB is a pass through
146 space_transform trans_rgb =
147 { 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 };
148
149
150 // set overlay colour space transformation matrix
Radeon_SetTransform(accelerator_info * ai,float bright,float cont,float sat,float hue,float red_intensity,float green_intensity,float blue_intensity,uint ref)151 static void Radeon_SetTransform(
152 accelerator_info *ai,
153 float bright,
154 float cont,
155 float sat,
156 float hue,
157 float red_intensity,
158 float green_intensity,
159 float blue_intensity,
160 uint ref)
161 {
162 vuint8 *regs = ai->regs;
163 shared_info *si = ai->si;
164 float OvHueSin, OvHueCos;
165 float CAdjOff;
166 float CAdjRY, CAdjGY, CAdjBY;
167 float CAdjRCb, CAdjRCr;
168 float CAdjGCb, CAdjGCr;
169 float CAdjBCb, CAdjBCr;
170 float RedAdj,GreenAdj,BlueAdj;
171 float OvROff, OvGOff, OvBOff;
172 float OvRY, OvGY, OvBY;
173 float OvRCb, OvRCr;
174 float OvGCb, OvGCr;
175 float OvBCb, OvBCr;
176 float Loff;
177 float Coff;
178
179 uint32 dwOvROff, dwOvGOff, dwOvBOff;
180 uint32 dwOvRY, dwOvGY, dwOvBY;
181 uint32 dwOvRCb, dwOvRCr;
182 uint32 dwOvGCb, dwOvGCr;
183 uint32 dwOvBCb, dwOvBCr;
184
185 space_transform *trans;
186
187 SHOW_FLOW0( 0, "" );
188
189 // get proper conversion formula
190 switch( si->pending_overlay.ob.space ) {
191 case B_YCbCr422:
192 case B_YUV12:
193 Loff = 16 * 4; // internal representation is 10 Bits
194 Coff = 128 * 4;
195
196 if (ref >= 2)
197 ref = 0;
198
199 trans = &trans_yuv[ref];
200 break;
201
202 case B_RGB15:
203 case B_RGB16:
204 case B_RGB32:
205 default:
206 Loff = 0;
207 Coff = 0;
208 trans = &trans_rgb;
209 }
210
211 OvHueSin = sin(hue);
212 OvHueCos = cos(hue);
213
214 // get matrix values to convert overlay colour space to RGB
215 // applying colour adjustment, saturation and luma scaling
216 // (saturation doesn't work with RGB input, perhaps it did with some
217 // maths; this is left to the reader :)
218 CAdjRY = cont * trans->RefLuma * trans->RefRY;
219 CAdjGY = cont * trans->RefLuma * trans->RefGY;
220 CAdjBY = cont * trans->RefLuma * trans->RefBY;
221
222 CAdjRCb = sat * -OvHueSin * trans->RefRCr;
223 CAdjRCr = sat * OvHueCos * trans->RefRCr;
224 CAdjGCb = sat * (OvHueCos * trans->RefGCb - OvHueSin * trans->RefGCr);
225 CAdjGCr = sat * (OvHueSin * trans->RefGCb + OvHueCos * trans->RefGCr);
226 CAdjBCb = sat * OvHueCos * trans->RefBCb;
227 CAdjBCr = sat * OvHueSin * trans->RefBCb;
228
229 // adjust black level
230 CAdjOff = cont * trans[ref].RefLuma * bright * 1023.0;
231 RedAdj = cont * trans[ref].RefLuma * red_intensity * 1023.0;
232 GreenAdj = cont * trans[ref].RefLuma * green_intensity * 1023.0;
233 BlueAdj = cont * trans[ref].RefLuma * blue_intensity * 1023.0;
234
235 OvRY = CAdjRY;
236 OvGY = CAdjGY;
237 OvBY = CAdjBY;
238 OvRCb = CAdjRCb;
239 OvRCr = CAdjRCr;
240 OvGCb = CAdjGCb;
241 OvGCr = CAdjGCr;
242 OvBCb = CAdjBCb;
243 OvBCr = CAdjBCr;
244 // apply offsets
245 OvROff = RedAdj + CAdjOff - CAdjRY * Loff - (OvRCb + OvRCr) * Coff;
246 OvGOff = GreenAdj + CAdjOff - CAdjGY * Loff - (OvGCb + OvGCr) * Coff;
247 OvBOff = BlueAdj + CAdjOff - CAdjBY * Loff - (OvBCb + OvBCr) * Coff;
248
249 dwOvROff = ((int32)(OvROff * 2.0)) & 0x1fff;
250 dwOvGOff = ((int32)(OvGOff * 2.0)) & 0x1fff;
251 dwOvBOff = ((int32)(OvBOff * 2.0)) & 0x1fff;
252
253 dwOvRY = (((int32)(OvRY * 2048.0))&0x7fff)<<17;
254 dwOvGY = (((int32)(OvGY * 2048.0))&0x7fff)<<17;
255 dwOvBY = (((int32)(OvBY * 2048.0))&0x7fff)<<17;
256 dwOvRCb = (((int32)(OvRCb * 2048.0))&0x7fff)<<1;
257 dwOvRCr = (((int32)(OvRCr * 2048.0))&0x7fff)<<17;
258 dwOvGCb = (((int32)(OvGCb * 2048.0))&0x7fff)<<1;
259 dwOvGCr = (((int32)(OvGCr * 2048.0))&0x7fff)<<17;
260 dwOvBCb = (((int32)(OvBCb * 2048.0))&0x7fff)<<1;
261 dwOvBCr = (((int32)(OvBCr * 2048.0))&0x7fff)<<17;
262
263 OUTREG( regs, RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvRY );
264 OUTREG( regs, RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr );
265 OUTREG( regs, RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvGY );
266 OUTREG( regs, RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr );
267 OUTREG( regs, RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvBY );
268 OUTREG( regs, RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr );
269
270 si->active_overlay.ob.space = si->pending_overlay.ob.space;
271 }
272
273
274 // convert Be colour key to rgb value
colourKey2RGB32(uint32 space,uint8 red,uint8 green,uint8 blue)275 static uint32 colourKey2RGB32(
276 uint32 space, uint8 red, uint8 green, uint8 blue )
277 {
278 uint32 res;
279
280 SHOW_FLOW0( 3, "" );
281
282 // the way Be defines colour keys may be convinient to some driver developers,
283 // but it's not well defined - took me some time to find out the format used
284 // and still I have no idea how alpha is defined; Rudolf told me that alpha is
285 // never used
286 switch( space ) {
287 case B_RGB15:
288 res =
289 ((uint32)(red >> 0) << (16+3)) |
290 ((uint32)(green >> 0) << (8+3)) |
291 ((blue >> 0) << 3);
292 break;
293 case B_RGB16:
294 res =
295 ((uint32)(red >> 0) << (16+3)) |
296 ((uint32)(green >> 0) << (8+2)) |
297 ((blue >> 0) << 3);
298 break;
299 case B_RGB32:
300 case B_CMAP8:
301 res = ((uint32)(red) << 16) | ((uint32)(green) << 8) | blue;
302 break;
303 default:
304 res = 0;
305 }
306
307 SHOW_FLOW( 3, "key=%lx", res );
308 return res;
309 }
310
311
312 // set colour key of overlay
Radeon_SetColourKey(accelerator_info * ai,const overlay_window * ow)313 static void Radeon_SetColourKey(
314 accelerator_info *ai, const overlay_window *ow )
315 {
316 virtual_card *vc = ai->vc;
317 vuint8 *regs = ai->regs;
318 uint32 rgb32, mask32, min32, max32;
319
320 /*SHOW_FLOW( 0, "value=%02x %02x %02x, mask=%02x %02x %02x",
321 ow->red.value, ow->green.value, ow->blue.value,
322 ow->red.mask, ow->green.mask, ow->blue.mask );*/
323
324 // Radeons don't support value and mask as colour key but colour range
325 rgb32 = colourKey2RGB32( vc->mode.space,
326 ow->red.value, ow->green.value, ow->blue.value );
327 mask32 = colourKey2RGB32( vc->mode.space,
328 ow->red.mask, ow->green.mask, ow->blue.mask );
329
330 // ~mask32 are all unimportant (usually low order) bits
331 // oring this to the colour should give us the highest valid colour value
332 // (add would be more precise but may lead to overflows)
333 min32 = rgb32;
334 max32 = rgb32 | ~mask32;
335
336 OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min32 );
337 OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max32 );
338 OUTREG( regs, RADEON_OV0_KEY_CNTL,
339 RADEON_GRAPHIC_KEY_FN_EQ |
340 RADEON_VIDEO_KEY_FN_FALSE |
341 RADEON_CMP_MIX_OR );
342 }
343
344 typedef struct {
345 uint max_scale; // maximum src_width/dest_width,
346 // i.e. source increment per screen pixel
347 uint8 group_size; // size of one filter group in pixels
348 uint8 p1_step_by, p23_step_by; // > 0: log(source pixel increment)+1, 2-tap filter
349 // = 0: source pixel increment = 1, 4-tap filter
350 } hscale_factor;
351
352
353 // scaling/filter tables depending on overlay colour space:
354 // magnifying pixels is no problem, but minifying can lead to overload,
355 // so we have to skip pixels and/or use 2-tap filters
356 static hscale_factor scale_RGB16[] = {
357 { (2 << 12), 2, 1, 1 },
358 { (4 << 12), 2, 2, 2 },
359 { (8 << 12), 2, 3, 3 },
360 { (16 << 12), 2, 4, 4 },
361 { (32 << 12), 2, 5, 5 }
362 };
363
364 static hscale_factor scale_RGB32[] = {
365 { (2 << 12) / 3, 2, 0, 0 },
366 { (4 << 12) / 3, 4, 1, 1 },
367 { (8 << 12) / 3, 4, 2, 2 },
368 { (4 << 12), 4, 2, 3 },
369 { (16 << 12) / 3, 4, 3, 3 },
370 { (8 << 12), 4, 3, 4 },
371 { (32 << 12) / 3, 4, 4, 4 },
372 { (16 << 12), 4, 5, 5 }
373 };
374
375 static hscale_factor scale_YUV[] = {
376 { (16 << 12) / 16, 2, 0, 0 },
377 { (16 << 12) / 12, 2, 0, 1 }, // mode 4, 1, 0 (as used by YUV12) is impossible
378 { (16 << 12) / 8, 4, 1, 1 },
379 { (16 << 12) / 6, 4, 1, 2 },
380 { (16 << 12) / 4, 4, 2, 2 },
381 { (16 << 12) / 3, 4, 2, 3 },
382 { (16 << 12) / 2, 4, 3, 3 },
383 { (16 << 12) / 1, 4, 4, 4 }
384 };
385
386 static hscale_factor scale_YUV12[] = {
387 { (16 << 12) / 16, 2, 0, 0 },
388 { (16 << 12) / 12, 4, 1, 0 },
389 { (16 << 12) / 12, 2, 0, 1 },
390 { (16 << 12) / 8, 4, 1, 1 },
391 { (16 << 12) / 6, 4, 1, 2 },
392 { (16 << 12) / 4, 4, 2, 2 },
393 { (16 << 12) / 3, 4, 2, 3 },
394 { (16 << 12) / 2, 4, 3, 3 },
395 { (int)((16 << 12) / 1.5), 4, 3, 4 },
396 { (int)((16 << 12) / 1.0), 4, 4, 4 },
397 { (int)((16 << 12) / 0.75), 4, 4, 5 },
398 { (int)((16 << 12) / 0.5), 4, 5, 5 }
399 };
400
401 #define min3( a, b, c ) (min( (a), min( (b), (c) )))
402
403 static hscale_factor scale_YUV9[] = {
404 { min3( (16 << 12) / 12, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 0 },
405 { min3( (16 << 12) / 8, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 0 },
406 { min3( (16 << 12) / 10, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 1 },
407 { min3( (16 << 12) / 6, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 1 },
408 { min3( (16 << 12) / 5, (3 << 12) * 1, (2 << 12) * 4 * 2 ), 4, 1, 2 },
409 { min3( (16 << 12) / 3, (3 << 12) * 2, (2 << 12) * 4 * 2 ), 4, 2, 2 },
410 { min3( (int)((16 << 12) / 2.5), (3 << 12) * 1, (2 << 12) * 4 * 4 ), 4, 2, 3 }, // probably, it should be (3 << 12) * 2
411 { min3( (int)((16 << 12) / 1.5), (3 << 12) * 4, (2 << 12) * 4 * 4 ), 4, 3, 3 },
412 { min3( (int)((16 << 12) / 0.75), (3 << 12) * 8, (2 << 12) * 4 * 8 ), 4, 4, 4 },
413 { min3( (int)((16 << 12) / 0.625), (3 << 12) * 8, (2 << 12) * 4 * 16 ), 4, 4, 5 },
414 { min3( (int)((16 << 12) / 0.375), (3 << 12) * 16, (2 << 12) * 4 * 16 ), 4, 5, 5 }
415 };
416
417
418 // parameters of an overlay colour space
419 typedef struct {
420 uint8 bpp_shift; // log2( bytes per pixel (main plain) )
421 uint8 bpuv_shift; // log2( bytes per pixel (uv-plane) );
422 // if there is one plane only: bpp=bpuv
423 uint8 num_planes; // number of planes
424 uint8 h_uv_sub_sample_shift; // log2( horizontal pixels per uv pair )
425 uint8 v_uv_sub_sample_shift; // log2( vertical pixels per uv pair )
426 hscale_factor *factors; // scaling/filter table
427 uint8 num_factors;
428 } space_params;
429
430 static space_params space_params_table[16] = {
431 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
432 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
433 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
434 { 1, 1, 1, 0, 0, scale_RGB16, B_COUNT_OF( scale_RGB16 ) }, // RGB15
435 { 1, 1, 1, 0, 0, scale_RGB16, B_COUNT_OF( scale_RGB16 ) }, // RGB16
436 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
437 { 2, 2, 1, 0, 0, scale_RGB32, B_COUNT_OF( scale_RGB32 ) }, // RGB32
438 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
439 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved
440 { 0, 0, 3, 2, 2, scale_YUV9, B_COUNT_OF( scale_YUV9 ) }, // YUV9
441 { 0, 0, 3, 1, 1, scale_YUV12, B_COUNT_OF( scale_YUV12 ) }, // YUV12, three-plane
442 { 1, 1, 1, 1, 0, scale_YUV, B_COUNT_OF( scale_YUV ) }, // VYUY422
443 { 1, 1, 1, 1, 0, scale_YUV, B_COUNT_OF( scale_YUV ) }, // YVYU422
444 { 0, 1, 2, 1, 1, scale_YUV12, B_COUNT_OF( scale_YUV12 ) }, // YUV12, two-plane
445 { 0, 1, 2, 1, 1, NULL, 0 }, // ???
446 { 0, 0, 0, 0, 0, NULL, 0 } // reserved
447 };
448
449 // get appropriate scaling/filter parameters
getHScaleFactor(accelerator_info * ai,space_params * params,uint32 src_left,uint32 src_right,uint32 * h_inc)450 static hscale_factor *getHScaleFactor(
451 accelerator_info *ai,
452 space_params *params,
453 uint32 src_left, uint32 src_right, uint32 *h_inc )
454 {
455 uint words_per_p1_line, words_per_p23_line, max_words_per_line;
456 bool p1_4tap_allowed, p23_4tap_allowed;
457 uint i;
458 uint num_factors;
459 hscale_factor *factors;
460
461 SHOW_FLOW0( 3, "" );
462
463 // check whether fifo is large enough to feed vertical 4-tap-filter
464
465 words_per_p1_line =
466 ceilShiftDiv( (src_right - 1) << params->bpp_shift, 4 ) -
467 ((src_left << params->bpp_shift) >> 4) + 1;
468 words_per_p23_line =
469 ceilShiftDiv( (src_right - 1) << params->bpuv_shift, 4 ) -
470 ((src_left << params->bpuv_shift) >> 4) + 1;
471
472 // overlay scaler line length differs for different revisions
473 // this needs to be maintained by hand
474 if (ai->si->asic == rt_r200 || ai->si->asic >= rt_r300)
475 max_words_per_line = 1920 / 16;
476 else
477 max_words_per_line = 1536 / 16;
478
479 switch (params->num_planes) {
480 case 3:
481 p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
482 p23_4tap_allowed = words_per_p23_line < max_words_per_line / 4;
483 break;
484 case 2:
485 p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
486 p23_4tap_allowed = words_per_p23_line < max_words_per_line / 2;
487 break;
488 case 1:
489 default:
490 p1_4tap_allowed = p23_4tap_allowed = words_per_p1_line < max_words_per_line;
491 break;
492 }
493
494 SHOW_FLOW( 3, "p1_4tap_allowed=%d, p23_4t_allowed=%d",
495 (int)p1_4tap_allowed, (int)p23_4tap_allowed );
496
497 // search for proper scaling/filter entry
498 factors = params->factors;
499 num_factors = params->num_factors;
500
501 if (factors == NULL || num_factors == 0)
502 return NULL;
503
504 for (i = 0; i < num_factors; ++i, ++factors) {
505 if (*h_inc <= factors->max_scale
506 && (factors->p1_step_by > 0 || p1_4tap_allowed)
507 && (factors->p23_step_by > 0 || p23_4tap_allowed))
508 break;
509 }
510
511 if (i == num_factors) {
512 // overlay is asked to be scaled down more than allowed,
513 // so use least scaling factor supported
514 --factors;
515 *h_inc = factors->max_scale;
516 }
517
518 SHOW_FLOW( 3, "group_size=%d, p1_step_by=%d, p23_step_by=%d",
519 factors->group_size, factors->p1_step_by, factors->p23_step_by );
520
521 return factors;
522 }
523
524
525 #define I2FF( a, shift ) ((uint32)((a) * (1 << (shift))))
526
527
528 // show overlay on screen
Radeon_ShowOverlay(accelerator_info * ai,int crtc_idx)529 static status_t Radeon_ShowOverlay(
530 accelerator_info *ai, int crtc_idx )
531 {
532 virtual_card *vc = ai->vc;
533 shared_info *si = ai->si;
534 vuint8 *regs = ai->regs;
535 overlay_info *overlay = &si->pending_overlay;
536 overlay_buffer_node *node = overlay->on;
537 crtc_info *crtc = &si->crtc[crtc_idx];
538
539 uint32 ecp_div;
540 uint32 v_inc, h_inc;
541 uint32 src_v_inc, src_h_inc;
542 uint32 src_left, src_top, src_right, src_bottom;
543 int32 dest_left, dest_top, dest_right, dest_bottom;
544 uint32 offset;
545 uint32 tmp;
546 uint32 p1_h_accum_init, p23_h_accum_init, p1_v_accum_init, p23_v_accum_init;
547 uint32 p1_active_lines, p23_active_lines;
548 hscale_factor *factors;
549 space_params *params;
550
551 uint32 p1_h_inc, p23_h_inc;
552 uint32 p1_x_start, p1_x_end;
553 uint32 p23_x_start, p23_x_end;
554
555 uint scale_ctrl;
556
557 /*uint32 buffer[20*2];
558 uint idx = 0;*/
559
560 SHOW_FLOW0( 0, "" );
561
562 Radeon_SetColourKey( ai, &overlay->ow );
563
564 // overlay unit can only handle up to 175 MHz; if pixel clock is higher,
565 // only every second pixel is handled
566 // (this devider is gets written into PLL by InitOverlay,
567 // so we don't need to do it ourself)
568 if( crtc->mode.timing.pixel_clock < 175000 )
569 ecp_div = 0;
570 else
571 ecp_div = 1;
572
573
574 // scaling is independant of clipping, get this first
575 {
576 uint32 src_width, src_height;
577
578 src_width = overlay->ov.width;
579 src_height = overlay->ov.height;
580
581 // this is for graphics card
582 v_inc = (src_height << 20) / overlay->ow.height;
583 h_inc = (src_width << (12 + ecp_div)) / overlay->ow.width;
584
585
586 // this is for us
587 src_v_inc = (src_height << 16) / overlay->ow.height;
588 src_h_inc = (src_width << 16) / overlay->ow.width;
589 }
590
591 // calculate unclipped position/size
592 // TBD: I assume that overlay_window.offset_xyz is only a hint where
593 // no overlay is visible; another interpretation were to zoom
594 // the overlay so it fits into remaining space
595 src_left = (overlay->ov.h_start << 16) + overlay->ow.offset_left * src_h_inc;
596 src_top = (overlay->ov.v_start << 16) + overlay->ow.offset_top * src_v_inc;
597 src_right = ((overlay->ov.h_start + overlay->ov.width) << 16) -
598 overlay->ow.offset_right * src_h_inc;
599 src_bottom = ((overlay->ov.v_start + overlay->ov.height) << 16) -
600 overlay->ow.offset_top * src_v_inc;
601 dest_left = overlay->ow.h_start + overlay->ow.offset_left;
602 dest_top = overlay->ow.v_start + overlay->ow.offset_top;
603 dest_right = overlay->ow.h_start + overlay->ow.width - overlay->ow.offset_right;
604 dest_bottom = overlay->ow.v_start + overlay->ow.height - overlay->ow.offset_bottom;
605
606 SHOW_FLOW( 3, "ow: h=%d, v=%d, width=%d, height=%d",
607 overlay->ow.h_start, overlay->ow.v_start,
608 overlay->ow.width, overlay->ow.height );
609
610 SHOW_FLOW( 3, "offset_left=%d, offset_right=%d, offset_top=%d, offset_bottom=%d",
611 overlay->ow.offset_left, overlay->ow.offset_right,
612 overlay->ow.offset_top, overlay->ow.offset_bottom );
613
614
615 // apply virtual screen
616 dest_left -= vc->mode.h_display_start + crtc->rel_x;
617 dest_top -= vc->mode.v_display_start + crtc->rel_y;
618 dest_right -= vc->mode.h_display_start + crtc->rel_x;
619 dest_bottom -= vc->mode.v_display_start + crtc->rel_y;
620
621 // clip to visible area
622 if( dest_left < 0 ) {
623 src_left += -dest_left * src_h_inc;
624 dest_left = 0;
625 }
626 if( dest_top < 0 ) {
627 src_top += -dest_top * src_v_inc;
628 dest_top = 0;
629 }
630
631 SHOW_FLOW( 3, "mode: w=%d, h=%d",
632 crtc->mode.timing.h_display, crtc->mode.timing.v_display );
633
634 if( dest_right > crtc->mode.timing.h_display )
635 dest_right = crtc->mode.timing.h_display;
636 if( dest_bottom > crtc->mode.timing.v_display )
637 dest_bottom = crtc->mode.timing.v_display;
638
639 SHOW_FLOW( 3, "src=(%d, %d, %d, %d)",
640 src_left, src_top, src_right, src_bottom );
641 SHOW_FLOW( 3, "dest=(%d, %d, %d, %d)",
642 dest_left, dest_top, dest_right, dest_bottom );
643
644
645 // especially with multi-screen modes the overlay may not be on screen at all
646 if( dest_left >= dest_right || dest_top >= dest_bottom ||
647 src_left >= src_right || src_top >= src_bottom )
648 {
649 Radeon_TempHideOverlay( ai );
650 goto done;
651 }
652
653
654 // let's calculate all those nice register values
655 SHOW_FLOW( 3, "ati_space=%d", node->ati_space );
656 params = &space_params_table[node->ati_space];
657
658 // choose proper scaler
659 {
660 factors = getHScaleFactor( ai, params, src_left >> 16, src_right >> 16, &h_inc );
661 if( factors == NULL )
662 return B_ERROR;
663
664 p1_h_inc = factors->p1_step_by > 0 ?
665 h_inc >> (factors->p1_step_by - 1) : h_inc;
666 p23_h_inc =
667 (factors->p23_step_by > 0 ? h_inc >> (factors->p23_step_by - 1) : h_inc)
668 >> params->h_uv_sub_sample_shift;
669
670 SHOW_FLOW( 3, "p1_h_inc=%x, p23_h_inc=%x", p1_h_inc, p23_h_inc );
671 }
672
673 // get register value for start/end position of overlay image (pixel-precise only)
674 {
675 uint32 p1_step_size, p23_step_size;
676 uint32 p1_left, p1_right, p1_width;
677 uint32 p23_left, p23_right, p23_width;
678
679 p1_left = src_left >> 16;
680 p1_right = src_right >> 16;
681 p1_width = p1_right - p1_left;
682
683 p1_step_size = factors->p1_step_by > 0 ? (1 << (factors->p1_step_by - 1)) : 1;
684 p1_x_start = p1_left % (16 >> params->bpp_shift);
685 p1_x_end = ((p1_x_start + p1_width - 1) / p1_step_size) * p1_step_size;
686
687 SHOW_FLOW( 3, "p1_x_start=%d, p1_x_end=%d", p1_x_start, p1_x_end );
688
689 p23_left = (src_left >> 16) >> params->h_uv_sub_sample_shift;
690 p23_right = (src_right >> 16) >> params->h_uv_sub_sample_shift;
691 p23_width = p23_right - p23_left;
692
693 p23_step_size = factors->p23_step_by > 0 ? (1 << (factors->p23_step_by - 1)) : 1;
694 // if resolution of Y and U/V differs but YUV are stored in one
695 // plane then UV alignment depends on Y data, therefore the hack
696 // (you are welcome to replace this with some cleaner code ;)
697 p23_x_start = p23_left %
698 ((16 >> params->bpuv_shift) /
699 (node->ati_space == 11 || node->ati_space == 12 ? 2 : 1));
700 p23_x_end = (int)((p23_x_start + p23_width - 1) / p23_step_size) * p23_step_size;
701
702 SHOW_FLOW( 3, "p23_x_start=%d, p23_x_end=%d", p23_x_start, p23_x_end );
703
704 // get memory location of first word to be read by scaler
705 // (save relative offset for fast update)
706 si->active_overlay.rel_offset = (src_top >> 16) * node->buffer.bytes_per_row +
707 ((p1_left << params->bpp_shift) & ~0xf);
708 offset = node->mem_offset + si->active_overlay.rel_offset;
709
710 SHOW_FLOW( 3, "rel_offset=%x", si->active_overlay.rel_offset );
711 }
712
713 // get active lines for scaler
714 // (we could add additional blank lines for DVD letter box mode,
715 // but this is not supported by API; additionally, this only makes
716 // sense if want to put subtitles onto the black border, which is
717 // supported neither)
718 {
719 uint16 int_top, int_bottom;
720
721 int_top = src_top >> 16;
722 int_bottom = (src_bottom >> 16);
723
724 p1_active_lines = int_bottom - int_top - 1;
725 p23_active_lines =
726 ceilShiftDiv( int_bottom - 1, params->v_uv_sub_sample_shift ) -
727 (int_top >> params->v_uv_sub_sample_shift);
728
729 SHOW_FLOW( 3, "p1_active_lines=%d, p23_active_lines=%d",
730 p1_active_lines, p23_active_lines );
731 }
732
733 // if picture is stretched for flat panel, we need to scale all
734 // vertical values accordingly
735 // TBD: there is no description at all concerning this, so v_accum_init may
736 // need to be initialized based on original value
737 {
738 if( (crtc->active_displays & (dd_lvds | dd_dvi)) != 0 ) {
739 uint64 v_ratio;
740
741 // convert 32.32 format to 16.16 format; else we
742 // cannot multiply two fixed point values without
743 // overflow
744 v_ratio = si->flatpanels[crtc->flatpanel_port].v_ratio >> (FIX_SHIFT - 16);
745
746 v_inc = (v_inc * v_ratio) >> 16;
747 }
748
749 SHOW_FLOW( 3, "v_inc=%x", v_inc );
750 }
751
752 // get initial horizontal scaler values, taking care of precharge
753 // don't ask questions about formulas - take them as is
754 // (TBD: home-brewed sub-pixel source clipping may be wrong,
755 // especially for uv-planes)
756 {
757 uint32 p23_group_size;
758
759 tmp = ((src_left & 0xffff) >> 11) + (
760 (
761 I2FF( p1_x_start % factors->group_size, 12 ) +
762 I2FF( 2.5, 12 ) +
763 p1_h_inc / 2 +
764 I2FF( 0.5, 12-5 ) // rounding
765 ) >> (12 - 5)); // scaled by 1 << 5
766
767 SHOW_FLOW( 3, "p1_h_accum_init=%x", tmp );
768
769 p1_h_accum_init =
770 ((tmp << 15) & RADEON_OV0_P1_H_ACCUM_INIT_MASK) |
771 ((tmp << 23) & RADEON_OV0_P1_PRESHIFT_MASK);
772
773
774 p23_group_size = 2;
775
776 tmp = ((src_left & 0xffff) >> 11) + (
777 (
778 I2FF( p23_x_start % p23_group_size, 12 ) +
779 I2FF( 2.5, 12 ) +
780 p23_h_inc / 2 +
781 I2FF( 0.5, 12-5 ) // rounding
782 ) >> (12 - 5)); // scaled by 1 << 5
783
784 SHOW_FLOW( 3, "p23_h_accum_init=%x", tmp );
785
786 p23_h_accum_init =
787 ((tmp << 15) & RADEON_OV0_P23_H_ACCUM_INIT_MASK) |
788 ((tmp << 23) & RADEON_OV0_P23_PRESHIFT_MASK);
789 }
790
791 // get initial vertical scaler values, taking care of precharge
792 {
793 uint extra_full_line;
794
795 extra_full_line = factors->p1_step_by == 0 ? 1 : 0;
796
797 tmp = ((src_top & 0x0000ffff) >> 11) + (
798 (min(
799 I2FF( 1.5, 20 ) + I2FF( extra_full_line, 20 ) + v_inc / 2,
800 I2FF( 2.5, 20 ) + 2 * I2FF( extra_full_line, 20 )
801 ) + I2FF( 0.5, 20-5 )) // rounding
802 >> (20 - 5)); // scaled by 1 << 5
803
804 SHOW_FLOW( 3, "p1_v_accum_init=%x", tmp );
805
806 p1_v_accum_init =
807 ((tmp << 15) & RADEON_OV0_P1_V_ACCUM_INIT_MASK) | 0x00000001;
808
809
810 extra_full_line = factors->p23_step_by == 0 ? 1 : 0;
811
812 if( params->v_uv_sub_sample_shift > 0 ) {
813 tmp = ((src_top & 0x0000ffff) >> 11) + (
814 (min(
815 I2FF( 1.5, 20 ) +
816 I2FF( extra_full_line, 20 ) +
817 ((v_inc / 2) >> params->v_uv_sub_sample_shift),
818 I2FF( 2.5, 20 ) +
819 2 * I2FF( extra_full_line, 20 )
820 ) + I2FF( 0.5, 20-5 )) // rounding
821 >> (20 - 5)); // scaled by 1 << 5
822 } else {
823 tmp = ((src_top & 0x0000ffff) >> 11) + (
824 (
825 I2FF( 2.5, 20 ) +
826 2 * I2FF( extra_full_line, 20 ) +
827 I2FF( 0.5, 20-5 ) // rounding
828 ) >> (20 - 5)); // scaled by 1 << 5
829 }
830
831 SHOW_FLOW( 3, "p23_v_accum_init=%x", tmp );
832
833 p23_v_accum_init =
834 ((tmp << 15) & RADEON_OV0_P23_V_ACCUM_INIT_MASK) | 0x00000001;
835 }
836
837 // show me what you've got!
838 // we could lock double buffering of overlay unit during update
839 // (new values are copied during vertical blank, so if we've updated
840 // only some of them, you get a whole frame of mismatched values)
841 // but during tests I couldn't get the artifacts go away, so
842 // we use the dangerous way which has the pro to not require any
843 // waiting
844
845 // let's try to lock overlay unit
846 // we had to wait now until the lock takes effect, but this is
847 // impossible with CCE; perhaps we have to convert this code to
848 // direct register access; did that - let's see what happens...
849 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK );
850
851 // wait until register access is locked
852 while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL)
853 & RADEON_REG_LD_CTL_LOCK_READBACK) == 0 )
854 ;
855
856 OUTREG( regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset );
857 OUTREG( regs, RADEON_OV0_VID_BUF_PITCH0_VALUE, node->buffer.bytes_per_row );
858 OUTREG( regs, RADEON_OV0_H_INC, p1_h_inc | (p23_h_inc << 16) );
859 OUTREG( regs, RADEON_OV0_STEP_BY, factors->p1_step_by | (factors->p23_step_by << 8) );
860 OUTREG( regs, RADEON_OV0_V_INC, v_inc );
861
862 OUTREG( regs,
863 crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_START : RADEON_OV1_Y_X_START,
864 (dest_left) | (dest_top << 16) );
865 OUTREG( regs,
866 crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_END : RADEON_OV1_Y_X_END,
867 (dest_right - 1) | ((dest_bottom - 1) << 16) );
868
869 OUTREG( regs, RADEON_OV0_P1_BLANK_LINES_AT_TOP,
870 RADEON_P1_BLNK_LN_AT_TOP_M1_MASK | (p1_active_lines << 16) );
871 OUTREG( regs, RADEON_OV0_P1_X_START_END, p1_x_end | (p1_x_start << 16) );
872 OUTREG( regs, RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init );
873 OUTREG( regs, RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init );
874
875 OUTREG( regs, RADEON_OV0_P23_BLANK_LINES_AT_TOP,
876 RADEON_P23_BLNK_LN_AT_TOP_M1_MASK | (p23_active_lines << 16) );
877 OUTREG( regs, RADEON_OV0_P2_X_START_END,
878 p23_x_end | (p23_x_start << 16) );
879 OUTREG( regs, RADEON_OV0_P3_X_START_END,
880 p23_x_end | (p23_x_start << 16) );
881 OUTREG( regs, RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init );
882 OUTREG( regs, RADEON_OV0_P23_V_ACCUM_INIT, p23_v_accum_init );
883
884 OUTREG( regs, RADEON_OV0_TEST, node->test_reg );
885
886 scale_ctrl = RADEON_SCALER_ENABLE |
887 RADEON_SCALER_DOUBLE_BUFFER |
888 (node->ati_space << 8) |
889 /* RADEON_SCALER_ADAPTIVE_DEINT | */
890 RADEON_SCALER_BURST_PER_PLANE |
891 (crtc->crtc_idx == 0 ? 0 : RADEON_SCALER_CRTC_SEL );
892
893 switch (node->ati_space << 8) {
894 case RADEON_SCALER_SOURCE_15BPP: // RGB15
895 case RADEON_SCALER_SOURCE_16BPP:
896 case RADEON_SCALER_SOURCE_32BPP:
897 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl |
898 RADEON_SCALER_LIN_TRANS_BYPASS);
899 break;
900 case RADEON_SCALER_SOURCE_VYUY422: // VYUY422
901 case RADEON_SCALER_SOURCE_YVYU422: // YVYU422
902 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl);
903 break;
904 default:
905 SHOW_FLOW(4, "What overlay format is this??? %d", node->ati_space);
906 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl |
907 (( ai->si->asic >= rt_r200) ? R200_SCALER_TEMPORAL_DEINT : 0));
908
909 }
910
911 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
912
913 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL,
914 si->overlay_mgr.auto_flip_reg );
915
916 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
917
918 done:
919 ai->si->active_overlay.on = ai->si->pending_overlay.on;
920 ai->si->active_overlay.ow = ai->si->pending_overlay.ow;
921 ai->si->active_overlay.ov = ai->si->pending_overlay.ov;
922 ai->si->active_overlay.ob = ai->si->pending_overlay.ob;
923 ai->si->active_overlay.h_display_start = vc->mode.h_display_start;
924 ai->si->active_overlay.v_display_start = vc->mode.v_display_start;
925
926 return B_OK;
927 }
928
929
930 // hide overlay, but not permanently
Radeon_TempHideOverlay(accelerator_info * ai)931 void Radeon_TempHideOverlay(
932 accelerator_info *ai )
933 {
934 SHOW_FLOW0( 3, "" );
935
936 OUTREG( ai->regs, RADEON_OV0_SCALE_CNTL, 0 );
937 }
938
939
940 // hide overlay (can be called even if there is none visible)
Radeon_HideOverlay(accelerator_info * ai)941 void Radeon_HideOverlay(
942 accelerator_info *ai )
943 {
944 shared_info *si = ai->si;
945
946 Radeon_TempHideOverlay( ai );
947
948 // remember that there is no overlay to be shown
949 si->active_overlay.on = NULL;
950 si->active_overlay.prev_on = NULL;
951 si->pending_overlay.on = NULL;
952
953 // invalidate active head so it will be setup again once
954 // a new overlay is shown
955 si->active_overlay.crtc_idx = -1;
956 }
957
958
959 // show new overlay buffer with same parameters as last one
Radeon_ReplaceOverlayBuffer(accelerator_info * ai)960 static void Radeon_ReplaceOverlayBuffer(
961 accelerator_info *ai )
962 {
963 #if 0
964 shared_info *si = ai->si;
965 vuint8 *regs = ai->regs;
966 uint32 offset;
967 int /*old_buf, */new_buf;
968
969 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
970
971 /*old_buf = si->overlay_mgr.auto_flip_reg & RADEON_OV0_SOFT_BUF_NUM_MASK;
972 new_buf = old_buf == 0 ? 3 : 0;
973 si->overlay_mgr.auto_flip_reg &= ~RADEON_OV0_SOFT_BUF_NUM_MASK;
974 si->overlay_mgr.auto_flip_reg |= new_buf;*/
975 new_buf = 0;
976
977 // lock overlay registers
978 /* OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK );
979
980 // wait until register access is locked
981 while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL)
982 & RADEON_REG_LD_CTL_LOCK_READBACK) == 0 )
983 ;*/
984
985 // setup new buffer
986 /*OUTREG( regs,
987 new_buf == 0 ? RADEON_OV0_VID_BUF_PITCH0_VALUE : RADEON_OV0_VID_BUF_PITCH1_VALUE,
988 si->pending_overlay.on->buffer.bytes_per_row );*/
989 OUTREG( regs,
990 new_buf == 0 ? RADEON_OV0_VID_BUF0_BASE_ADRS : RADEON_OV0_VID_BUF3_BASE_ADRS,
991 offset | (new_buf == 0 ? 0 : RADEON_VIF_BUF0_PITCH_SEL));
992
993 // make changes visible
994 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
995
996 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
997
998 // unlock overlay registers
999 // OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
1000
1001 ai->si->active_overlay.on = ai->si->pending_overlay.on;
1002 #else
1003 shared_info *si = ai->si;
1004 uint32 offset;
1005
1006 if ( ai->si->acc_dma )
1007 {
1008 START_IB();
1009
1010 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
1011
1012 WRITE_IB_REG( RADEON_OV0_VID_BUF0_BASE_ADRS, offset);
1013
1014 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
1015 WRITE_IB_REG( RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
1016
1017 SUBMIT_IB();
1018 } else {
1019 Radeon_WaitForFifo( ai, 2 );
1020 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
1021
1022 OUTREG( ai->regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset);
1023
1024 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
1025 OUTREG( ai->regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
1026 }
1027 ai->si->active_overlay.on = ai->si->pending_overlay.on;
1028 #endif
1029 }
1030
1031
1032 // get number of pixels of overlay shown on virtual port
getIntersectArea(accelerator_info * ai,overlay_window * ow,crtc_info * crtc)1033 static int getIntersectArea(
1034 accelerator_info *ai, overlay_window *ow, crtc_info *crtc )
1035 {
1036 virtual_card *vc = ai->vc;
1037 int left, top, right, bottom;
1038
1039 left = ow->h_start - (vc->mode.h_display_start + crtc->rel_x);
1040 top = ow->v_start - (vc->mode.v_display_start + crtc->rel_y);
1041 right = left + ow->width;
1042 bottom = top + ow->height;
1043
1044 if( left < 0 )
1045 left = 0;
1046 if( top < 0 )
1047 top = 0;
1048 if( right > crtc->mode.timing.h_display )
1049 right = crtc->mode.timing.h_display;
1050 if( bottom > crtc->mode.timing.v_display )
1051 bottom = crtc->mode.timing.v_display;
1052
1053 if( right < left || bottom < top )
1054 return 0;
1055
1056 return (right - left) * (bottom - top);
1057 }
1058
1059
1060 // update overlay, to be called whenever something in terms of
1061 // overlay have or can have been changed
Radeon_UpdateOverlay(accelerator_info * ai)1062 status_t Radeon_UpdateOverlay(
1063 accelerator_info *ai )
1064 {
1065 virtual_card *vc = ai->vc;
1066 shared_info *si = ai->si;
1067 int crtc_idx;
1068
1069 float brightness = 0.0f;
1070 float contrast = 1.0f;
1071 float saturation = 1.0f;
1072 float hue = 0.0f;
1073 int32 ref = 0;
1074
1075 SHOW_FLOW0( 3, "" );
1076
1077 // don't mess around with overlay of someone else
1078 if ( !vc->uses_overlay )
1079 return B_OK;
1080
1081 // make sure there really is an overlay
1082 if ( si->pending_overlay.on == NULL )
1083 return B_OK;
1084
1085 // verify that the overlay is still valid
1086 if ((uintptr_t)si->pending_overlay.ot != si->overlay_mgr.token )
1087 return B_BAD_VALUE;
1088
1089 if ( vc->different_heads > 1 ) {
1090 int area0, area1;
1091
1092 // determine on which port most of the overlay is shown
1093 area0 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] );
1094 area1 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] );
1095
1096 SHOW_FLOW( 3, "area0=%d, area1=%d", area0, area1 );
1097
1098 if (area0 >= area1 )
1099 crtc_idx = 0;
1100 else
1101 crtc_idx = 1;
1102
1103 } else if ( vc->independant_heads > 1 ) {
1104 // both ports show the same, use "swap displays" to decide
1105 // where to show the overlay (to be improved as this flag isn't
1106 // really designed for that)
1107 if ( vc->swap_displays )
1108 crtc_idx = 1;
1109 else
1110 crtc_idx = 0;
1111
1112 } else {
1113
1114 // one crtc used only - pick the one that we use
1115 crtc_idx = vc->used_crtc[0] ? 0 : 1;
1116 }
1117
1118 si->pending_overlay.crtc_idx = crtc_idx;
1119
1120 // only update registers that have been changed to minimize work
1121 if( si->active_overlay.crtc_idx != si->pending_overlay.crtc_idx ) {
1122 Radeon_InitOverlay( ai, crtc_idx );
1123 }
1124
1125 if( si->active_overlay.ob.space != si->pending_overlay.ob.space ) {
1126 Radeon_SetTransform( ai, brightness, contrast, saturation, hue, 0, 0, 0, ref );
1127 }
1128
1129 if( memcmp( &si->active_overlay.ow, &si->pending_overlay.ow, sizeof( si->active_overlay.ow )) != 0 ||
1130 memcmp( &si->active_overlay.ov, &si->pending_overlay.ov, sizeof( si->active_overlay.ov )) != 0 ||
1131 si->active_overlay.h_display_start != vc->mode.h_display_start ||
1132 si->active_overlay.v_display_start != vc->mode.v_display_start ||
1133 si->active_overlay.ob.width != si->pending_overlay.ob.width ||
1134 si->active_overlay.ob.height != si->pending_overlay.ob.height ||
1135 si->active_overlay.ob.bytes_per_row != si->pending_overlay.ob.bytes_per_row )
1136 Radeon_ShowOverlay( ai, crtc_idx );
1137
1138 else if( si->active_overlay.on != si->pending_overlay.on )
1139 Radeon_ReplaceOverlayBuffer( ai );
1140
1141 SHOW_FLOW0( 3, "success" );
1142
1143 return B_OK;
1144 }
1145