xref: /haiku/src/add-ons/accelerants/radeon/overlay.c (revision 67bce78b48ed6d01b5a8eef89f5694c372b7e0a1)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	Hardware access routines for overlays
8 */
9 
10 #include "GlobalData.h"
11 #include "radeon_interface.h"
12 #include "mmio.h"
13 #include "overlay_regs.h"
14 #include "pll_regs.h"
15 #include "capture_regs.h"
16 #include "cp_regs.h"
17 #include "utils.h"
18 #include <math.h>
19 #include <string.h>
20 
21 void Radeon_InitOverlay( accelerator_info *ai, virtual_port *overlay_port );
22 status_t Radeon_ShowOverlay( accelerator_info *ai, virtual_port *overlay_port );
23 void Radeon_ReplaceOverlayBuffer( accelerator_info *ai );
24 
25 
26 void Radeon_TempHideOverlay( accelerator_info *ai );
27 
28 // standard (linear) gamma
29 static struct {
30     uint16 reg;
31     bool r200_or_above;
32     uint32 slope;
33     uint32 offset;
34 } std_gamma[] = {
35     { RADEON_OV0_GAMMA_0_F, false, 0x100, 0x0000 },
36     { RADEON_OV0_GAMMA_10_1F, false, 0x100, 0x0020 },
37     { RADEON_OV0_GAMMA_20_3F, false, 0x100, 0x0040 },
38     { RADEON_OV0_GAMMA_40_7F, false, 0x100, 0x0080 },
39     { RADEON_OV0_GAMMA_80_BF, true, 0x100, 0x0100 },
40     { RADEON_OV0_GAMMA_C0_FF, true, 0x100, 0x0100 },
41     { RADEON_OV0_GAMMA_100_13F, true, 0x100, 0x0200 },
42     { RADEON_OV0_GAMMA_140_17F, true, 0x100, 0x0200 },
43     { RADEON_OV0_GAMMA_180_1BF, true, 0x100, 0x0300 },
44     { RADEON_OV0_GAMMA_1C0_1FF, true, 0x100, 0x0300 },
45     { RADEON_OV0_GAMMA_200_23F, true, 0x100, 0x0400 },
46     { RADEON_OV0_GAMMA_240_27F, true, 0x100, 0x0400 },
47     { RADEON_OV0_GAMMA_280_2BF, true, 0x100, 0x0500 },
48     { RADEON_OV0_GAMMA_2C0_2FF, true, 0x100, 0x0500 },
49     { RADEON_OV0_GAMMA_300_33F, true, 0x100, 0x0600 },
50     { RADEON_OV0_GAMMA_340_37F, true, 0x100, 0x0600 },
51     { RADEON_OV0_GAMMA_380_3BF, false, 0x100, 0x0700 },
52     { RADEON_OV0_GAMMA_3C0_3FF, false, 0x100, 0x0700 }
53 };
54 
55 
56 // setup overlay unit before first use
57 void Radeon_InitOverlay( accelerator_info *ai, virtual_port *overlay_port )
58 {
59 	vuint8 *regs = ai->regs;
60 	shared_info *si = ai->si;
61 	uint i;
62 	uint32 ecp_div;
63 
64 	SHOW_FLOW( 3, "physical_port=%d", overlay_port->physical_port );
65 
66 	Radeon_WaitForIdle( ai );
67 
68 	// make sure we really write this value as the "toggle" bit
69 	// contained in it (which is zero initially) is edge-sensitive!
70 	// for capturing, we need to select "software" video port
71 	si->overlay_mgr.auto_flip_reg = RADEON_OV0_VID_PORT_SELECT_SOFTWARE;
72 
73 	OUTREG( regs, RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOFT_RESET );
74 	OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
75 	OUTREG( regs, RADEON_OV0_FILTER_CNTL, 			// use fixed filter coefficients
76 		RADEON_OV0_HC_COEF_ON_HORZ_Y |
77 		RADEON_OV0_HC_COEF_ON_HORZ_UV |
78 		RADEON_OV0_HC_COEF_ON_VERT_Y |
79 		RADEON_OV0_HC_COEF_ON_VERT_UV );
80 	OUTREG( regs, RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
81 		RADEON_VIDEO_KEY_FN_FALSE |
82 		RADEON_CMP_MIX_OR );
83 	OUTREG( regs, RADEON_OV0_TEST, 0 );
84 //	OUTREG( regs, RADEON_FCP_CNTL, RADEON_FCP_CNTL_GND );	// disable capture clock
85 //	OUTREG( regs, RADEON_CAP0_TRIG_CNTL, 0 );				// disable capturing
86 	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
87 	// tell deinterlacer to always show recent field
88 	OUTREG( regs, RADEON_OV0_DEINTERLACE_PATTERN,
89 		0xaaaaa | (9 << RADEON_OV0_DEINT_PAT_LEN_M1_SHIFT) );
90 
91 	// set gamma
92 	for( i = 0; i < sizeof( std_gamma ) / sizeof( std_gamma[0] ); ++i ) {
93 		if( !std_gamma[i].r200_or_above || si->asic >= rt_r200 ) {
94 			OUTREG( regs, std_gamma[i].reg,
95 				(std_gamma[i].slope << 16) | std_gamma[i].offset );
96 		}
97 	}
98 
99 	// overlay unit can only handle up to 175 MHz, if pixel clock is higher,
100 	// only every second pixel is handled
101 	if( overlay_port->mode.timing.pixel_clock < 175000 )
102 		ecp_div = 0;
103 	else
104 		ecp_div = 1;
105 
106 	Radeon_OUTPLLP( ai, RADEON_VCLK_ECP_CNTL,
107 		ecp_div << RADEON_ECP_DIV_SHIFT, ~RADEON_ECP_DIV_MASK );
108 
109 	si->active_overlay.port = si->pending_overlay.port;
110 
111 	// invalidate active colour space
112 	si->active_overlay.ob.space = -1;
113 
114 	// invalidate position/scaling
115 	si->active_overlay.ob.width = -1;
116 }
117 
118 // colour space transformation matrix
119 typedef struct space_transform
120 {
121     float   RefLuma;	// scaling of luma to use full RGB range
122     float   RefRCb;		// b/u -> r
123     float   RefRY;		// g/y -> r
124     float   RefRCr;		// r/v -> r
125     float   RefGCb;
126     float   RefGY;
127     float   RefGCr;
128     float   RefBCb;
129     float   RefBY;
130     float   RefBCr;
131 } space_transform;
132 
133 
134 // Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
135 space_transform trans_yuv[2] =
136 {
137     { 1.1678, 0.0, 1, 1.6007, -0.3929, 1, -0.8154, 2.0232, 1, 0.0 }, /* BT.601 */
138     { 1.1678, 0.0, 1, 1.7980, -0.2139, 1, -0.5345, 2.1186, 1, 0.0 }  /* BT.709 */
139 };
140 
141 
142 // RGB is a pass through
143 space_transform trans_rgb =
144 	{ 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 };
145 
146 
147 // set overlay colour space transformation matrix
148 static void Radeon_SetTransform( accelerator_info *ai,
149 	float	    bright,
150 	float	    cont,
151 	float	    sat,
152 	float	    hue,
153 	float	    red_intensity,
154 	float	    green_intensity,
155 	float	    blue_intensity,
156 	uint	    ref)
157 {
158 	vuint8 *regs = ai->regs;
159 	shared_info *si = ai->si;
160 	float	    OvHueSin, OvHueCos;
161 	float	    CAdjOff;
162 	float		CAdjRY, CAdjGY, CAdjBY;
163 	float	    CAdjRCb, CAdjRCr;
164 	float	    CAdjGCb, CAdjGCr;
165 	float	    CAdjBCb, CAdjBCr;
166 	float	    RedAdj,GreenAdj,BlueAdj;
167 	float	    OvROff, OvGOff, OvBOff;
168 	float		OvRY, OvGY, OvBY;
169 	float	    OvRCb, OvRCr;
170 	float	    OvGCb, OvGCr;
171 	float	    OvBCb, OvBCr;
172 	float	    Loff;
173 	float	    Coff;
174 
175 	uint32	    dwOvROff, dwOvGOff, dwOvBOff;
176 	uint32		dwOvRY, dwOvGY, dwOvBY;
177 	uint32	    dwOvRCb, dwOvRCr;
178 	uint32	    dwOvGCb, dwOvGCr;
179 	uint32	    dwOvBCb, dwOvBCr;
180 
181 	space_transform	*trans;
182 
183 	SHOW_FLOW0( 3, "" );
184 
185 	// get proper conversion formula
186 	switch( si->pending_overlay.ob.space ) {
187 	case B_YCbCr422:
188 	case B_YUV12:
189 		Loff = 16 * 4;		// internal representation is 10 Bits
190 		Coff = 128 * 4;
191 
192 		if (ref >= 2)
193 			ref = 0;
194 
195 		trans = &trans_yuv[ref];
196 		break;
197 
198 	case B_RGB15:
199 	case B_RGB16:
200 	case B_RGB32:
201 	default:
202 		Loff = 0;
203 		Coff = 0;
204 		trans = &trans_rgb;
205 	}
206 
207 	OvHueSin = sin(hue);
208 	OvHueCos = cos(hue);
209 
210 	// get matrix values to convert overlay colour space to RGB
211 	// applying colour adjustment, saturation and luma scaling
212 	// (saturation doesn't work with RGB input, perhaps it did with some
213 	//  maths; this is left to the reader :)
214 	CAdjRY = cont * trans->RefLuma * trans->RefRY;
215 	CAdjGY = cont * trans->RefLuma * trans->RefGY;
216 	CAdjBY = cont * trans->RefLuma * trans->RefBY;
217 
218 	CAdjRCb = sat * -OvHueSin * trans->RefRCr;
219 	CAdjRCr = sat * OvHueCos * trans->RefRCr;
220 	CAdjGCb = sat * (OvHueCos * trans->RefGCb - OvHueSin * trans->RefGCr);
221 	CAdjGCr = sat * (OvHueSin * trans->RefGCb + OvHueCos * trans->RefGCr);
222 	CAdjBCb = sat * OvHueCos * trans->RefBCb;
223 	CAdjBCr = sat * OvHueSin * trans->RefBCb;
224 
225 	// adjust black level
226 	CAdjOff = cont * trans[ref].RefLuma * bright * 1023.0;
227 	RedAdj = cont * trans[ref].RefLuma * red_intensity * 1023.0;
228 	GreenAdj = cont * trans[ref].RefLuma * green_intensity * 1023.0;
229 	BlueAdj = cont * trans[ref].RefLuma * blue_intensity * 1023.0;
230 
231 	OvRY = CAdjRY;
232 	OvGY = CAdjGY;
233 	OvBY = CAdjBY;
234 	OvRCb = CAdjRCb;
235 	OvRCr = CAdjRCr;
236 	OvGCb = CAdjGCb;
237 	OvGCr = CAdjGCr;
238 	OvBCb = CAdjBCb;
239 	OvBCr = CAdjBCr;
240 	// apply offsets
241 	OvROff = RedAdj + CAdjOff -	CAdjRY * Loff - (OvRCb + OvRCr) * Coff;
242 	OvGOff = GreenAdj + CAdjOff - CAdjGY * Loff - (OvGCb + OvGCr) * Coff;
243 	OvBOff = BlueAdj + CAdjOff - CAdjBY * Loff - (OvBCb + OvBCr) * Coff;
244 
245 	dwOvROff = ((int32)(OvROff * 2.0)) & 0x1fff;
246 	dwOvGOff = ((int32)(OvGOff * 2.0)) & 0x1fff;
247 	dwOvBOff = ((int32)(OvBOff * 2.0)) & 0x1fff;
248 
249 	dwOvRY = (((int32)(OvRY * 2048.0))&0x7fff)<<17;
250 	dwOvGY = (((int32)(OvGY * 2048.0))&0x7fff)<<17;
251 	dwOvBY = (((int32)(OvBY * 2048.0))&0x7fff)<<17;
252 	dwOvRCb = (((int32)(OvRCb * 2048.0))&0x7fff)<<1;
253 	dwOvRCr = (((int32)(OvRCr * 2048.0))&0x7fff)<<17;
254 	dwOvGCb = (((int32)(OvGCb * 2048.0))&0x7fff)<<1;
255 	dwOvGCr = (((int32)(OvGCr * 2048.0))&0x7fff)<<17;
256 	dwOvBCb = (((int32)(OvBCb * 2048.0))&0x7fff)<<1;
257 	dwOvBCr = (((int32)(OvBCr * 2048.0))&0x7fff)<<17;
258 
259 	OUTREG( regs, RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvRY );
260 	OUTREG( regs, RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr );
261 	OUTREG( regs, RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvGY );
262 	OUTREG( regs, RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr );
263 	OUTREG( regs, RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvBY );
264 	OUTREG( regs, RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr );
265 
266 	si->active_overlay.ob.space = si->pending_overlay.ob.space;
267 }
268 
269 
270 // convert Be colour key to rgb value
271 static uint32 colourKey2RGB32( uint32 space, uint8 red, uint8 green, uint8 blue )
272 {
273 	uint32 res;
274 
275 	SHOW_FLOW0( 3, "" );
276 
277 	// the way Be defines colour keys may be convinient to some driver developers,
278 	// but it's not well defined - took me some time to find out the format used
279 	// and still I have no idea how alpha is defined; Rudolf told me that alpha is
280 	// never used
281 	switch( space ) {
282 	case B_RGB15:
283 		res =
284 			((uint32)(red >> 0) << (16+3)) |
285 			((uint32)(green >> 0) << (8+3)) |
286 			((blue >> 0) << 3);
287 		break;
288 	case B_RGB16:
289 		res =
290 			((uint32)(red >> 0) << (16+3)) |
291 			((uint32)(green >> 0) << (8+2)) |
292 			((blue >> 0) << 3);
293 		break;
294 	case B_RGB32:
295 	case B_CMAP8:
296 		res = ((uint32)(red) << 16) | ((uint32)(green) << 8) | blue;
297 		break;
298 	default:
299 		res = 0;
300 	}
301 
302 	SHOW_FLOW( 3, "key=%lx", res );
303 	return res;
304 }
305 
306 
307 // set colour key of overlay
308 void Radeon_SetColourKey( accelerator_info *ai, const overlay_window *ow )
309 {
310 	virtual_card *vc = ai->vc;
311 	uint32 rgb32;
312 	uint32 buffer[3*2];
313 	uint idx = 0;
314 
315 	SHOW_FLOW0( 3, "" );
316 
317 	rgb32 = colourKey2RGB32( vc->mode.space,
318 		ow->red.value, ow->green.value, ow->blue.value );
319 
320 	buffer[idx++] = CP_PACKET0( RADEON_OV0_GRAPHICS_KEY_CLR_LOW, 0 );
321 	buffer[idx++] = rgb32;
322 	buffer[idx++] = CP_PACKET0( RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, 0 );
323 	buffer[idx++] = rgb32;
324     buffer[idx++] = CP_PACKET0( RADEON_OV0_KEY_CNTL, 0 );
325 	buffer[idx++] = RADEON_GRAPHIC_KEY_FN_EQ |
326 		RADEON_VIDEO_KEY_FN_FALSE |
327 		RADEON_CMP_MIX_OR;
328 
329 	Radeon_SendCP( ai, buffer, idx );
330 }
331 
332 typedef struct {
333 	uint max_scale;					// maximum src_width/dest_width,
334 									// i.e. source increment per screen pixel
335 	uint8 group_size; 				// size of one filter group in pixels
336 	uint8 p1_step_by, p23_step_by;	// > 0: log(source pixel increment)+1, 2-tap filter
337 									// = 0: source pixel increment = 1, 4-tap filter
338 } hscale_factor;
339 
340 #define count_of( a ) (sizeof( a ) / sizeof( a[0] ))
341 
342 // scaling/filter tables depending on overlay colour space:
343 // magnifying pixels is no problem, but minifying can lead to overload,
344 // so we have to skip pixels and/or use 2-tap filters
345 static hscale_factor scale_RGB16[] = {
346 	{ (2 << 12), 		2, 1, 1 },
347 	{ (4 << 12), 		2, 2, 2 },
348 	{ (8 << 12), 		2, 3, 3 },
349 	{ (16 << 12), 		2, 4, 4 },
350 	{ (32 << 12), 		2, 5, 5 }
351 };
352 
353 static hscale_factor scale_RGB32[] = {
354 	{ (2 << 12) / 3,	2, 0, 0 },
355 	{ (4 << 12) / 3,	4, 1, 1 },
356 	{ (8 << 12) / 3,	4, 2, 2 },
357 	{ (4 << 12), 		4, 2, 3 },
358 	{ (16 << 12) / 3,	4, 3, 3 },
359 	{ (8 << 12), 		4, 3, 4 },
360 	{ (32 << 12) / 3,	4, 4, 4 },
361 	{ (16 << 12),		4, 5, 5 }
362 };
363 
364 static hscale_factor scale_YUV[] = {
365 	{ (16 << 12) / 16,	2, 0, 0 },
366 	{ (16 << 12) / 12,	2, 0, 1 },	// mode 4, 1, 0 (as used by YUV12) is impossible
367 	{ (16 << 12) / 8,	4, 1, 1 },
368 	{ (16 << 12) / 6,	4, 1, 2 },
369 	{ (16 << 12) / 4,	4, 2, 2 },
370 	{ (16 << 12) / 3,	4, 2, 3 },
371 	{ (16 << 12) / 2,	4, 3, 3 },
372 	{ (16 << 12) / 1,	4, 4, 4 }
373 };
374 
375 static hscale_factor scale_YUV12[] = {
376 	{ (16 << 12) / 16,			2, 0, 0 },
377 	{ (16 << 12) / 12,			4, 1, 0 },
378 	{ (16 << 12) / 12,			2, 0, 1 },
379 	{ (16 << 12) / 8,			4, 1, 1 },
380 	{ (16 << 12) / 6,			4, 1, 2 },
381 	{ (16 << 12) / 4,			4, 2, 2 },
382 	{ (16 << 12) / 3,			4, 2, 3 },
383 	{ (16 << 12) / 2,			4, 3, 3 },
384 	{ (int)((16 << 12) / 1.5),	4, 3, 4 },
385 	{ (int)((16 << 12) / 1.0),	4, 4, 4 },
386 	{ (int)((16 << 12) / 0.75),	4, 4, 5 },
387 	{ (int)((16 << 12) / 0.5),	4, 5, 5 }
388 };
389 
390 #define min3( a, b, c ) (min( (a), min( (b), (c) )))
391 
392 static hscale_factor scale_YUV9[] = {
393 	{ min3( (16 << 12) / 12,	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	2, 0, 0 },
394 	{ min3( (16 << 12) / 8, 	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	4, 1, 0 },
395 	{ min3( (16 << 12) / 10,	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	2, 0, 1 },
396 	{ min3( (16 << 12) / 6, 	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	4, 1, 1 },
397 	{ min3( (16 << 12) / 5, 	(3 << 12) * 1,	(2 << 12) * 4 * 2 ),	4, 1, 2 },
398 	{ min3( (16 << 12) / 3, 	(3 << 12) * 2,	(2 << 12) * 4 * 2 ),	4, 2, 2 },
399 	{ min3( (int)((16 << 12) / 2.5), 	(3 << 12) * 1,	(2 << 12) * 4 * 4 ),	4, 2, 3 },	// probably, it should be (3 << 12) * 2
400 	{ min3( (int)((16 << 12) / 1.5), 	(3 << 12) * 4,	(2 << 12) * 4 * 4 ),	4, 3, 3 },
401 	{ min3( (int)((16 << 12) / 0.75), 	(3 << 12) * 8,	(2 << 12) * 4 * 8 ),	4, 4, 4 },
402 	{ min3( (int)((16 << 12) / 0.625), 	(3 << 12) * 8,	(2 << 12) * 4 * 16 ),	4, 4, 5 },
403 	{ min3( (int)((16 << 12) / 0.375), 	(3 << 12) * 16,	(2 << 12) * 4 * 16 ),	4, 5, 5 }
404 };
405 
406 
407 // parameters of an overlay colour space
408 typedef struct {
409 	uint8 bpp_shift;				// log2( bytes per pixel (main plain) )
410 	uint8 bpuv_shift;				// log2( bytes per pixel (uv-plane) );
411 									// if there is one plane only: bpp=bpuv
412 	uint8 num_planes;				// number of planes
413 	uint8 h_uv_sub_sample_shift;	// log2( horizontal pixels per uv pair )
414 	uint8 v_uv_sub_sample_shift;	// log2( vertical pixels per uv pair )
415 	hscale_factor *factors;			// scaling/filter table
416 	uint8 num_factors;
417 } space_params;
418 
419 static space_params space_params_table[16] = {
420 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
421 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
422 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
423 	{ 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) },	// RGB15
424 	{ 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) },	// RGB16
425 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
426 	{ 2, 2, 1, 0, 0, scale_RGB32, count_of( scale_RGB32 ) },	// RGB32
427 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
428 	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
429 	{ 0, 0, 3, 2, 2, scale_YUV9, count_of( scale_YUV9 ) },		// YUV9
430 	{ 0, 0, 3, 1, 1, scale_YUV12, count_of( scale_YUV12 ) },	// YUV12, three-plane
431 	{ 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) },		// VYUY422
432 	{ 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) },		// YVYU422
433 	{ 0, 1, 2, 1, 1, scale_YUV12, count_of( scale_YUV12 ) },	// YUV12, two-plane
434 	{ 0, 1, 2, 1, 1, NULL, 0 },	// ???
435 	{ 0, 0, 0, 0, 0, NULL, 0 }	// reserved
436 };
437 
438 // get appropriate scaling/filter parameters
439 static hscale_factor *getHScaleFactor( space_params *params,
440 	uint32 src_left, uint32 src_right, uint32 *h_inc )
441 {
442 	uint words_per_p1_line, words_per_p23_line, max_words_per_line;
443 	bool p1_4tap_allowed, p23_4tap_allowed;
444 	uint i;
445 	uint num_factors;
446 	hscale_factor *factors;
447 
448 	SHOW_FLOW0( 3, "" );
449 
450 	// check whether fifo is large enough to feed vertical 4-tap-filter
451 
452 	words_per_p1_line =
453 		ceilShiftDiv( (src_right - 1) << params->bpp_shift, 4 ) -
454 		((src_left << params->bpp_shift) >> 4) + 1;
455 	words_per_p23_line =
456 		ceilShiftDiv( (src_right - 1) << params->bpuv_shift, 4 ) -
457 		((src_left << params->bpuv_shift) >> 4) + 1;
458 
459 	// overlay buffer for one line; this value is probably
460 	// higher on newer Radeons (or smaller on older Radeons?)
461 	max_words_per_line = 96;
462 
463 	switch( params->num_planes ) {
464 	case 3:
465 		p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
466 		p23_4tap_allowed = words_per_p23_line < max_words_per_line / 4;
467 		break;
468 	case 2:
469 		p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
470 		p23_4tap_allowed = words_per_p23_line < max_words_per_line / 2;
471 		break;
472 	case 1:
473 	default:
474 		p1_4tap_allowed = p23_4tap_allowed = words_per_p1_line < max_words_per_line;
475 		break;
476 	}
477 
478 	SHOW_FLOW( 3, "p1_4tap_allowed=%d, p23_4t_allowed=%d",
479 		(int)p1_4tap_allowed, (int)p23_4tap_allowed );
480 
481 	// search for proper scaling/filter entry
482 	factors = params->factors;
483 	num_factors = params->num_factors;
484 
485 	if( factors == NULL || num_factors == 0 )
486 		return NULL;
487 
488 	for( i = 0; i < num_factors; ++i, ++factors ) {
489 		if( *h_inc <= factors->max_scale &&
490 			(factors->p1_step_by > 0 || p1_4tap_allowed) &&
491 			(factors->p23_step_by > 0 || p23_4tap_allowed))
492 			break;
493 	}
494 
495 	if( i == num_factors ) {
496 		// overlay is asked to be scaled down more than allowed,
497 		// so use least scaling factor supported
498 		--factors;
499 		*h_inc = factors->max_scale;
500 	}
501 
502 	SHOW_FLOW( 3, "group_size=%d, p1_step_by=%d, p23_step_by=%d",
503 		factors->group_size, factors->p1_step_by, factors->p23_step_by );
504 
505 	return factors;
506 }
507 
508 
509 #define I2FF( a, shift ) ((uint32)((a) * (1 << (shift))))
510 
511 
512 // show overlay on screen
513 status_t Radeon_ShowOverlay( accelerator_info *ai, virtual_port *overlay_port )
514 {
515 	virtual_card *vc = ai->vc;
516 //	vuint8 *regs = ai->regs;
517 	shared_info *si = ai->si;
518 	overlay_info *overlay = &si->pending_overlay;
519 	overlay_buffer_node *node = overlay->on;
520 
521 	uint32 ecp_div;
522 //	uint32 step_by;
523 	uint32 v_inc, h_inc;
524 	uint32 src_v_inc, src_h_inc;
525 	uint32 src_left, src_top, src_right, src_bottom;
526 	int32 dest_left, dest_top, dest_right, dest_bottom;
527 	uint32 offset;
528 	uint32 tmp;
529 	uint32 p1_h_accum_init, p23_h_accum_init, p1_v_accum_init, p23_v_accum_init;
530 	uint32 p1_active_lines, p23_active_lines;
531 	hscale_factor *factors;
532 	space_params *params;
533 
534 	uint32 p1_h_inc, p23_h_inc;
535 	uint32 p1_x_start, p1_x_end;
536 	uint32 p23_x_start, p23_x_end;
537 
538 	uint32 buffer[20*2];
539 	uint idx = 0;
540 
541 	SHOW_FLOW0( 3, "" );
542 
543 	Radeon_SetColourKey( ai, &overlay->ow );
544 
545 	// overlay unit can only handle up to 175 MHz; if pixel clock is higher,
546 	// only every second pixel is handled
547 	// (this devider is gets written into PLL by OverlayInit,
548 	//  so we don't need to do it ourself)
549 	if( overlay_port->mode.timing.pixel_clock < 175000 )
550 		ecp_div = 0;
551 	else
552 		ecp_div = 1;
553 
554 
555 	// scaling is independant of clipping, get this first
556 	{
557 		uint32 src_width, src_height;
558 
559 		src_width = overlay->ov.width;
560 		src_height = overlay->ov.height;
561 
562 		// this is for graphics card
563 		v_inc = (src_height << 20) / overlay->ow.height;
564 		h_inc = (src_width << (12 + ecp_div)) / overlay->ow.width;
565 
566 
567 		// this is for us
568 		src_v_inc = (src_height << 16) / overlay->ow.height;
569 		src_h_inc = (src_width << 16) / overlay->ow.width;
570 	}
571 
572 	// calculate unclipped position/size
573 	// TBD: I assume that overlay_window.offset_xyz is only a hint where
574 	//      no overlay is visible; another interpretation were to zoom
575 	//      the overlay so it fits into remaining space
576 	src_left = (overlay->ov.h_start << 16) + overlay->ow.offset_left * src_h_inc;
577 	src_top = (overlay->ov.v_start << 16) + overlay->ow.offset_top * src_v_inc;
578 	src_right = ((overlay->ov.h_start + overlay->ov.width) << 16) -
579 		overlay->ow.offset_right * src_h_inc;
580 	src_bottom = ((overlay->ov.v_start + overlay->ov.height) << 16) -
581 		overlay->ow.offset_top * src_v_inc;
582 	dest_left = overlay->ow.h_start + overlay->ow.offset_left;
583 	dest_top = overlay->ow.v_start + overlay->ow.offset_top;
584 	dest_right = overlay->ow.h_start + overlay->ow.width - overlay->ow.offset_right;
585 	dest_bottom = overlay->ow.v_start + overlay->ow.height - overlay->ow.offset_bottom;
586 
587 	SHOW_FLOW( 3, "ow: h=%d, v=%d, width=%d, height=%d",
588 		overlay->ow.h_start, overlay->ow.v_start,
589 		overlay->ow.width, overlay->ow.height );
590 
591 	SHOW_FLOW( 3, "offset_left=%d, offset_right=%d, offset_top=%d, offset_bottom=%d",
592 		overlay->ow.offset_left, overlay->ow.offset_right,
593 		overlay->ow.offset_top, overlay->ow.offset_bottom );
594 
595 
596 	// apply virtual screen
597 	dest_left -= vc->mode.h_display_start + overlay_port->rel_x;
598 	dest_top -= vc->mode.v_display_start + overlay_port->rel_y;
599 	dest_right -= vc->mode.h_display_start + overlay_port->rel_x;
600 	dest_bottom -= vc->mode.v_display_start + overlay_port->rel_y;
601 
602 
603 	// clip to visible area
604 	if( dest_left < 0 ) {
605 		src_left += -dest_left * src_h_inc;
606 		dest_left = 0;
607 	}
608 	if( dest_top < 0 ) {
609 		src_top += -dest_top * src_v_inc;
610 		dest_top = 0;
611 	}
612 
613 	SHOW_FLOW( 3, "mode: w=%d, h=%d",
614 		overlay_port->mode.timing.h_display, overlay_port->mode.timing.v_display );
615 
616 	if( dest_right > overlay_port->mode.timing.h_display )
617 		dest_right = overlay_port->mode.timing.h_display;
618 	if( dest_bottom > overlay_port->mode.timing.v_display )
619 		dest_bottom = overlay_port->mode.timing.v_display;
620 
621 	SHOW_FLOW( 3, "src=(%d, %d, %d, %d)",
622 		src_left, src_top, src_right, src_bottom );
623 	SHOW_FLOW( 3, "dest=(%d, %d, %d, %d)",
624 		dest_left, dest_top, dest_right, dest_bottom );
625 
626 
627 	// especially with multi-screen modes the overlay may not be on screen at all
628 	if( dest_left >= dest_right || dest_top >= dest_bottom ||
629 		src_left >= src_right || src_top >= src_bottom )
630 	{
631 		Radeon_TempHideOverlay( ai );
632 		goto done;
633 	}
634 
635 
636 	// let's calculate all those nice register values
637 	SHOW_FLOW( 3, "ati_space=%d", node->ati_space );
638 	params = &space_params_table[node->ati_space];
639 
640 	// choose proper scaler
641 	{
642 		factors = getHScaleFactor( params, src_left >> 16, src_right >> 16, &h_inc );
643 		if( factors == NULL )
644 			return B_ERROR;
645 
646 		p1_h_inc = factors->p1_step_by > 0 ?
647 			h_inc >> (factors->p1_step_by - 1) : h_inc;
648 		p23_h_inc =
649 			(factors->p23_step_by > 0 ? h_inc >> (factors->p23_step_by - 1) : h_inc)
650 			>> params->h_uv_sub_sample_shift;
651 
652 		SHOW_FLOW( 3, "p1_h_inc=%x, p23_h_inc=%x", p1_h_inc, p23_h_inc );
653 	}
654 
655 	// get register value for start/end position of overlay image (pixel-precise only)
656 	{
657 		uint32 p1_step_size, p23_step_size;
658 		uint32 p1_left, p1_right, p1_width;
659 		uint32 p23_left, p23_right, p23_width;
660 
661 		p1_left = src_left >> 16;
662 		p1_right = src_right >> 16;
663 		p1_width = p1_right - p1_left;
664 
665 		p1_step_size = factors->p1_step_by > 0 ? (1 << (factors->p1_step_by - 1)) : 1;
666 		p1_x_start = p1_left % (16 >> params->bpp_shift);
667 		p1_x_end = ((p1_x_start + p1_width - 1) / p1_step_size) * p1_step_size;
668 
669 		SHOW_FLOW( 3, "p1_x_start=%d, p1_x_end=%d", p1_x_start, p1_x_end );
670 
671 		p23_left = (src_left >> 16) >> params->h_uv_sub_sample_shift;
672 		p23_right = (src_right >> 16) >> params->h_uv_sub_sample_shift;
673 		p23_width = p23_right - p23_left;
674 
675 		p23_step_size = factors->p23_step_by > 0 ? (1 << (factors->p23_step_by - 1)) : 1;
676 		// if resolution of Y and U/V differs but YUV are stored in one
677 		// plane then UV alignment depends on Y data, therefore the hack
678 		// (you are welcome to replace this with some cleaner code ;)
679 		p23_x_start = p23_left %
680 			((16 >> params->bpuv_shift) /
681 			 (node->ati_space == 11 || node->ati_space == 12 ? 2 : 1));
682 		p23_x_end = (int)((p23_x_start + p23_width - 1) / p23_step_size) * p23_step_size;
683 
684 		SHOW_FLOW( 3, "p23_x_start=%d, p23_x_end=%d", p23_x_start, p23_x_end );
685 
686 		// get memory location of first word to be read by scaler
687 		// (save relative offset for fast update)
688 		si->active_overlay.rel_offset = (src_top >> 16) * node->buffer.bytes_per_row +
689 			((p1_left << params->bpp_shift) & ~0xf);
690 		offset = node->mem_offset + si->active_overlay.rel_offset;
691 
692 		SHOW_FLOW( 3, "rel_offset=%x", si->active_overlay.rel_offset );
693 	}
694 
695 	// get active lines for scaler
696 	// (we could add additional blank lines for DVD letter box mode,
697 	//  but this is not supported by API; additionally, this only makes
698 	//  sense if want to put subtitles onto the black border, which is
699 	//  supported neither)
700 	{
701 		uint16 int_top, int_bottom;
702 
703 		int_top = src_top >> 16;
704 		int_bottom = (src_bottom >> 16);
705 
706 		p1_active_lines = int_bottom - int_top - 1;
707 		p23_active_lines =
708 			ceilShiftDiv( int_bottom - 1, params->v_uv_sub_sample_shift ) -
709 			(int_top >> params->v_uv_sub_sample_shift);
710 
711 		SHOW_FLOW( 3, "p1_active_lines=%d, p23_active_lines=%d",
712 			p1_active_lines, p23_active_lines );
713 	}
714 
715 	// if picture is stretched for flat panel, we need to scale all
716 	// vertical values accordingly
717 	// TBD: there is no description at all concerning this, so v_accum_init may
718 	//      need to be initialized based on original value
719 	{
720 		display_type_e disp_type;
721 
722 		disp_type = si->ports[overlay_port->physical_port].disp_type;
723 		if( disp_type == dt_lvds || disp_type == dt_dvi_1 ) {
724 			uint64 v_ratio;
725 
726 			// convert 32.32 format to 16.16 format; else we
727 			// cannot multiply two fixed point values without
728 			// overflow
729 			v_ratio = si->fp_port.v_ratio >> (FIX_SHIFT - 16);
730 
731 			v_inc = (v_inc * v_ratio) >> 16;
732 		}
733 
734 		SHOW_FLOW( 3, "v_inc=%x", v_inc );
735 	}
736 
737 	// get initial horizontal scaler values, taking care of precharge
738 	// don't ask questions about formulas - take them as is
739 	// (TBD: home-brewed sub-pixel source clipping may be wrong,
740 	//       especially for uv-planes)
741 	{
742 		uint32 p23_group_size;
743 
744 	    tmp = ((src_left & 0xffff) >> 11) + (
745 	    	(
746 		    	I2FF( p1_x_start % factors->group_size, 12 ) +
747 		    	I2FF( 2.5, 12 ) +
748 		    	p1_h_inc / 2 +
749 		    	I2FF( 0.5, 12-5 )	// rounding
750 	        ) >> (12 - 5));	// scaled by 1 << 5
751 
752 	    SHOW_FLOW( 3, "p1_h_accum_init=%x", tmp );
753 
754 		p1_h_accum_init =
755 			((tmp << 15) & RADEON_OV0_P1_H_ACCUM_INIT_MASK) |
756 			((tmp << 23) & RADEON_OV0_P1_PRESHIFT_MASK);
757 
758 
759 		p23_group_size = 2;
760 
761 		tmp = ((src_left & 0xffff) >> 11) + (
762 			(
763 				I2FF( p23_x_start % p23_group_size, 12 ) +
764 				I2FF( 2.5, 12 ) +
765 				p23_h_inc / 2 +
766 				I2FF( 0.5, 12-5 )	// rounding
767 			) >> (12 - 5)); // scaled by 1 << 5
768 
769 		SHOW_FLOW( 3, "p23_h_accum_init=%x", tmp );
770 
771 		p23_h_accum_init =
772 			((tmp << 15) & RADEON_OV0_P23_H_ACCUM_INIT_MASK) |
773 			((tmp << 23) & RADEON_OV0_P23_PRESHIFT_MASK);
774 	}
775 
776 	// get initial vertical scaler values, taking care of precharge
777 	{
778 		uint extra_full_line;
779 
780 		extra_full_line = factors->p1_step_by == 0 ? 1 : 0;
781 
782 	    tmp = ((src_top & 0x0000ffff) >> 11) + (
783 	    	(min(
784 		    	I2FF( 1.5, 20 ) + I2FF( extra_full_line, 20 ) + v_inc / 2,
785 	    		I2FF( 2.5, 20 ) + 2 * I2FF( extra_full_line, 20 )
786 	    	 ) + I2FF( 0.5, 20-5 )) // rounding
787 	    	>> (20 - 5)); // scaled by 1 << 5
788 
789 	    SHOW_FLOW( 3, "p1_v_accum_init=%x", tmp );
790 
791 		p1_v_accum_init =
792 			((tmp << 15) & RADEON_OV0_P1_V_ACCUM_INIT_MASK) | 0x00000001;
793 
794 
795 		extra_full_line = factors->p23_step_by == 0 ? 1 : 0;
796 
797 		if( params->v_uv_sub_sample_shift > 0 ) {
798 			tmp = ((src_top & 0x0000ffff) >> 11) + (
799 				(min(
800 					I2FF( 1.5, 20 ) +
801 						I2FF( extra_full_line, 20 ) +
802 						((v_inc / 2) >> params->v_uv_sub_sample_shift),
803 					I2FF( 2.5, 20 ) +
804 						2 * I2FF( extra_full_line, 20 )
805 				 ) + I2FF( 0.5, 20-5 )) // rounding
806 				>> (20 - 5)); // scaled by 1 << 5
807 		} else {
808 			tmp = ((src_top & 0x0000ffff) >> 11) + (
809 				(
810 					I2FF( 2.5, 20 ) +
811 					2 * I2FF( extra_full_line, 20 ) +
812 					I2FF( 0.5, 20-5 )	// rounding
813 				) >> (20 - 5)); // scaled by 1 << 5
814 		}
815 
816 		SHOW_FLOW( 3, "p23_v_accum_init=%x", tmp );
817 
818 		p23_v_accum_init =
819 			((tmp << 15) & RADEON_OV0_P23_V_ACCUM_INIT_MASK) | 0x00000001;
820 	}
821 
822 	// show me what you've got!
823 	// we could lock double buffering of overlay unit during update
824 	// (new values are copied during vertical blank, so if we've updated
825 	// only some of them, you get a whole frame of mismatched values)
826 	// but during tests I couldn't get the artifacts go away, so
827 	// we use the dangerous way which has the pro to not require any
828 	// waiting
829 	buffer[idx++] = CP_PACKET0( RADEON_OV0_VID_BUF0_BASE_ADRS, 0 );
830 	buffer[idx++] = offset;
831 	buffer[idx++] = CP_PACKET0( RADEON_OV0_VID_BUF_PITCH0_VALUE, 0 );
832 	buffer[idx++] = node->buffer.bytes_per_row;
833 
834 	buffer[idx++] = CP_PACKET0( RADEON_OV0_H_INC, 0 );
835 	buffer[idx++] = p1_h_inc | (p23_h_inc << 16);
836 	buffer[idx++] = CP_PACKET0( RADEON_OV0_STEP_BY, 0 );
837 	buffer[idx++] = factors->p1_step_by | (factors->p23_step_by << 8);
838 	buffer[idx++] = CP_PACKET0( RADEON_OV0_V_INC, 0 );
839 	buffer[idx++] = v_inc;
840 
841 	buffer[idx++] = CP_PACKET0(
842 		overlay_port->is_crtc2 ? RADEON_OV1_Y_X_START : RADEON_OV0_Y_X_START, 0 );
843 	buffer[idx++] = (dest_left) | (dest_top << 16);
844 	buffer[idx++] = CP_PACKET0(
845 		overlay_port->is_crtc2 ? RADEON_OV1_Y_X_END : RADEON_OV0_Y_X_END, 0 );
846 	buffer[idx++] = (dest_right - 1) | ((dest_bottom - 1) << 16);
847 
848 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P1_BLANK_LINES_AT_TOP, 0 );
849 	buffer[idx++] = RADEON_P1_BLNK_LN_AT_TOP_M1_MASK | (p1_active_lines << 16);
850 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P1_X_START_END, 0 );
851 	buffer[idx++] = p1_x_end | (p1_x_start << 16);
852 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P1_H_ACCUM_INIT, 0 );
853 	buffer[idx++] = p1_h_accum_init;
854 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P1_V_ACCUM_INIT, 0 );
855 	buffer[idx++] = p1_v_accum_init;
856 
857 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P23_BLANK_LINES_AT_TOP, 0 );
858 	buffer[idx++] = RADEON_P23_BLNK_LN_AT_TOP_M1_MASK | (p23_active_lines << 16);
859 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P2_X_START_END, 0 );
860 	buffer[idx++] = p23_x_end | (p23_x_start << 16);
861 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P3_X_START_END, 0 );
862 	buffer[idx++] = p23_x_end | (p23_x_start << 16);
863 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P23_H_ACCUM_INIT, 0 );
864 	buffer[idx++] = p23_h_accum_init;
865 	buffer[idx++] = CP_PACKET0( RADEON_OV0_P23_V_ACCUM_INIT, 0 );
866 	buffer[idx++] = p23_v_accum_init;
867 
868 	buffer[idx++] = CP_PACKET0( RADEON_OV0_TEST, 0 );
869 	buffer[idx++] = node->test_reg;
870 	buffer[idx++] = CP_PACKET0( RADEON_OV0_SCALE_CNTL, 0 );
871 	buffer[idx++] = RADEON_SCALER_ENABLE |
872 		RADEON_SCALER_DOUBLE_BUFFER |
873 		(node->ati_space << 8) |
874 		/*RADEON_SCALER_ADAPTIVE_DEINT |*/
875 		(overlay_port->is_crtc2 ? RADEON_SCALER_CRTC_SEL : 0  );
876 
877 	si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
878 
879 	buffer[idx++] = CP_PACKET0( RADEON_OV0_AUTO_FLIP_CNTRL, 0 );
880 	buffer[idx++] = si->overlay_mgr.auto_flip_reg;
881 
882 	Radeon_SendCP( ai, buffer, idx );
883 
884 done:
885 	ai->si->active_overlay.on = ai->si->pending_overlay.on;
886 	ai->si->active_overlay.ow = ai->si->pending_overlay.ow;
887 	ai->si->active_overlay.ov = ai->si->pending_overlay.ov;
888 	ai->si->active_overlay.ob = ai->si->pending_overlay.ob;
889 	ai->si->active_overlay.h_display_start = vc->mode.h_display_start;
890 	ai->si->active_overlay.v_display_start = vc->mode.v_display_start;
891 
892 	return B_OK;
893 }
894 
895 
896 // hide overlay, but not permanently
897 void Radeon_TempHideOverlay( accelerator_info *ai )
898 {
899 	SHOW_FLOW0( 3, "" );
900 
901 	Radeon_WriteRegCP( ai, RADEON_OV0_SCALE_CNTL, 0 );
902 }
903 
904 
905 // hide overlay (can be called even if there is none visible)
906 void Radeon_HideOverlay( accelerator_info *ai )
907 {
908 	shared_info *si = ai->si;
909 
910 	Radeon_TempHideOverlay( ai );
911 
912 	// save that there is no overlay to be shown
913 	si->active_overlay.on = NULL;
914 	si->pending_overlay.on = NULL;
915 
916 	// invalidate active port so it will be setup again once
917 	// a new overlay is shown
918 	si->active_overlay.port = -1;
919 }
920 
921 
922 // show new overlay buffer with same parameters as last one
923 void Radeon_ReplaceOverlayBuffer( accelerator_info *ai )
924 {
925 	shared_info *si = ai->si;
926 //	vuint8 *regs = ai->regs;
927 	uint32 offset;
928 	uint32 buffer[2*2];
929 	uint idx = 0;
930 
931 	offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
932 
933 	buffer[idx++] = CP_PACKET0( RADEON_OV0_VID_BUF0_BASE_ADRS, 0 );
934 	buffer[idx++] = offset;
935 
936 	si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
937 	buffer[idx++] = CP_PACKET0( RADEON_OV0_AUTO_FLIP_CNTRL, 0 );
938 	buffer[idx++] = si->overlay_mgr.auto_flip_reg;
939 
940 	Radeon_SendCP( ai, buffer, idx );
941 
942 	ai->si->active_overlay.on = ai->si->pending_overlay.on;
943 }
944 
945 
946 // get number of pixels of overlay shown on virtual port
947 static int getIntersectArea( virtual_card *vc, overlay_window *ow, virtual_port *port )
948 {
949 	int left, top, right, bottom;
950 
951 	left = ow->h_start - (vc->mode.h_display_start + port->rel_x);
952 	top = ow->v_start - (vc->mode.v_display_start + port->rel_y);
953 	right = left + ow->width;
954 	bottom = top + ow->height;
955 
956 	if( left < 0 )
957 		left = 0;
958 	if( top < 0 )
959 		top = 0;
960 	if( right > port->mode.timing.h_display )
961 		right = port->mode.timing.h_display;
962 	if( bottom > port->mode.timing.v_display )
963 		bottom = port->mode.timing.v_display;
964 
965 	if( right < left || bottom < top )
966 		return 0;
967 
968 	return (right - left) * (bottom - top);
969 }
970 
971 
972 // update overlay, to be called whenever something in terms of
973 // overlay have or can have been changed
974 status_t Radeon_UpdateOverlay( accelerator_info *ai )
975 {
976 	virtual_card *vc = ai->vc;
977 	shared_info *si = ai->si;
978 	virtual_port *overlay_port;
979 
980 	float brightness = 0.0f;
981 	float contrast = 1.0f;
982 	float saturation = 1.0f;
983 	float hue = 0.0f;
984     int32 ref = 0;
985 
986     SHOW_FLOW0( 3, "" );
987 
988 	// don't mess around with overlay of someone else
989     if( !vc->uses_overlay )
990     	return B_OK;
991 
992 	// make sure there really is an overlay
993 	if( si->pending_overlay.on == NULL )
994 		return B_OK;
995 
996 	// verify that the overlay is still valid
997 	if( (uint32)si->pending_overlay.ot != si->overlay_mgr.token )
998 		return B_BAD_VALUE;
999 
1000 /*	SHOW_FLOW( 3, "num_ports=%d, whished_overlay_port=%d",
1001 		vc->num_ports, vc->whished_overlay_port );*/
1002 
1003 	if( vc->different_ports > 1 ) {
1004 		int area0, area1;
1005 
1006 		// determine on which port most of the overlay is shown
1007 		area0 = getIntersectArea( vc, &si->pending_overlay.ow, &vc->ports[0] );
1008 		area1 = getIntersectArea( vc, &si->pending_overlay.ow, &vc->ports[1] );
1009 
1010 		SHOW_FLOW( 3, "area0=%d, area1=%d", area0, area1 );
1011 
1012 		if( area0 >= area1 )
1013 			overlay_port = &vc->ports[0];
1014 		else
1015 			overlay_port = &vc->ports[1];
1016 	} else {
1017 		// both ports show the same, use "swap displays" to decide
1018 		// where to show the overlay (to be improved as this flag isn't
1019 		// really designed for that)
1020 		if( vc->independant_ports > 1 && vc->swapDisplays )
1021 			overlay_port = &vc->ports[1];
1022 		else
1023 			overlay_port = &vc->ports[0];
1024 	}
1025 
1026 	si->pending_overlay.port = overlay_port->physical_port;
1027 
1028 	// only update registers that have been changed to minimize work
1029 	if( si->active_overlay.port != si->pending_overlay.port ) {
1030 		Radeon_InitOverlay( ai, overlay_port );
1031 	}
1032 
1033 	if( si->active_overlay.ob.space != si->pending_overlay.ob.space ) {
1034 		Radeon_SetTransform( ai, brightness, contrast, saturation, hue, 0, 0, 0, ref );
1035 	}
1036 
1037 	if( memcmp( &si->active_overlay.ow, &si->pending_overlay.ow, sizeof( si->active_overlay.ow )) != 0 ||
1038 		memcmp( &si->active_overlay.ov, &si->pending_overlay.ov, sizeof( si->active_overlay.ov )) != 0 ||
1039 		si->active_overlay.h_display_start != vc->mode.h_display_start ||
1040 		si->active_overlay.v_display_start != vc->mode.v_display_start ||
1041 		si->active_overlay.ob.width != si->pending_overlay.ob.width ||
1042 		si->active_overlay.ob.height != si->pending_overlay.ob.height ||
1043 		si->active_overlay.ob.bytes_per_row != si->pending_overlay.ob.bytes_per_row )
1044 		Radeon_ShowOverlay( ai, overlay_port );
1045 
1046 	else if( si->active_overlay.on != si->pending_overlay.on )
1047 		Radeon_ReplaceOverlayBuffer( ai );
1048 
1049 	SHOW_FLOW0( 3, "success" );
1050 
1051 	return B_OK;
1052 }
1053