1 /* 2 Copyright (c) 2002-2004, Thomas Kurschel 3 4 Part of Radeon accelerant 5 6 Hardware access routines for overlays 7 */ 8 9 #include "GlobalData.h" 10 #include "radeon_interface.h" 11 #include "mmio.h" 12 #include "overlay_regs.h" 13 #include "pll_regs.h" 14 #include "capture_regs.h" 15 #include "utils.h" 16 #include "pll_access.h" 17 #include <math.h> 18 #include <string.h> 19 #include "CP.h" 20 21 22 void Radeon_TempHideOverlay( accelerator_info *ai ); 23 24 // standard (linear) gamma 25 static struct { 26 uint16 reg; 27 bool r200_or_above; 28 uint32 slope; 29 uint32 offset; 30 } std_gamma[] = { 31 { RADEON_OV0_GAMMA_0_F, false, 0x100, 0x0000 }, 32 { RADEON_OV0_GAMMA_10_1F, false, 0x100, 0x0020 }, 33 { RADEON_OV0_GAMMA_20_3F, false, 0x100, 0x0040 }, 34 { RADEON_OV0_GAMMA_40_7F, false, 0x100, 0x0080 }, 35 { RADEON_OV0_GAMMA_80_BF, true, 0x100, 0x0100 }, 36 { RADEON_OV0_GAMMA_C0_FF, true, 0x100, 0x0100 }, 37 { RADEON_OV0_GAMMA_100_13F, true, 0x100, 0x0200 }, 38 { RADEON_OV0_GAMMA_140_17F, true, 0x100, 0x0200 }, 39 { RADEON_OV0_GAMMA_180_1BF, true, 0x100, 0x0300 }, 40 { RADEON_OV0_GAMMA_1C0_1FF, true, 0x100, 0x0300 }, 41 { RADEON_OV0_GAMMA_200_23F, true, 0x100, 0x0400 }, 42 { RADEON_OV0_GAMMA_240_27F, true, 0x100, 0x0400 }, 43 { RADEON_OV0_GAMMA_280_2BF, true, 0x100, 0x0500 }, 44 { RADEON_OV0_GAMMA_2C0_2FF, true, 0x100, 0x0500 }, 45 { RADEON_OV0_GAMMA_300_33F, true, 0x100, 0x0600 }, 46 { RADEON_OV0_GAMMA_340_37F, true, 0x100, 0x0600 }, 47 { RADEON_OV0_GAMMA_380_3BF, false, 0x100, 0x0700 }, 48 { RADEON_OV0_GAMMA_3C0_3FF, false, 0x100, 0x0700 } 49 }; 50 51 52 // setup overlay unit before first use 53 void Radeon_InitOverlay( 54 accelerator_info *ai, int crtc_idx ) 55 { 56 vuint8 *regs = ai->regs; 57 shared_info *si = ai->si; 58 uint i; 59 uint32 ecp_div; 60 61 SHOW_FLOW0( 0, "" ); 62 63 // make sure we really write this value as the "toggle" bit 64 // contained in it (which is zero initially) is edge-sensitive! 65 // for capturing, we need to select "software" video port 66 si->overlay_mgr.auto_flip_reg = RADEON_OV0_VID_PORT_SELECT_SOFTWARE; 67 68 OUTREG( regs, RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOFT_RESET ); 69 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg ); 70 OUTREG( regs, RADEON_OV0_FILTER_CNTL, // use fixed filter coefficients 71 RADEON_OV0_HC_COEF_ON_HORZ_Y | 72 RADEON_OV0_HC_COEF_ON_HORZ_UV | 73 RADEON_OV0_HC_COEF_ON_VERT_Y | 74 RADEON_OV0_HC_COEF_ON_VERT_UV ); 75 OUTREG( regs, RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ | 76 RADEON_VIDEO_KEY_FN_FALSE | 77 RADEON_CMP_MIX_OR ); 78 OUTREG( regs, RADEON_OV0_TEST, 0 ); 79 // OUTREG( regs, RADEON_FCP_CNTL, RADEON_FCP_CNTL_GND ); // disable capture clock 80 // OUTREG( regs, RADEON_CAP0_TRIG_CNTL, 0 ); // disable capturing 81 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 ); 82 // tell deinterlacer to always show recent field 83 OUTREG( regs, RADEON_OV0_DEINTERLACE_PATTERN, 84 0xaaaaa | (9 << RADEON_OV0_DEINT_PAT_LEN_M1_SHIFT) ); 85 86 // set gamma 87 for( i = 0; i < sizeof( std_gamma ) / sizeof( std_gamma[0] ); ++i ) { 88 if( !std_gamma[i].r200_or_above || si->asic >= rt_r200 ) { 89 OUTREG( regs, std_gamma[i].reg, 90 (std_gamma[i].slope << 16) | std_gamma[i].offset ); 91 } 92 } 93 94 // overlay unit can only handle up to 175 MHz, if pixel clock is higher, 95 // only every second pixel is handled 96 if( si->crtc[crtc_idx].mode.timing.pixel_clock < 175000 ) 97 ecp_div = 0; 98 else 99 ecp_div = 1; 100 101 Radeon_OUTPLLP( regs, si->asic, RADEON_VCLK_ECP_CNTL, 102 ecp_div << RADEON_ECP_DIV_SHIFT, ~RADEON_ECP_DIV_MASK ); 103 104 // Force the overlay clock on for integrated chips 105 if ((si->asic == rt_rs100) || 106 (si->asic == rt_rs200) || 107 (si->asic == rt_rs300)) { 108 Radeon_OUTPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL, 109 (Radeon_INPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL) | (1<<18))); 110 } 111 112 si->active_overlay.crtc_idx = si->pending_overlay.crtc_idx; 113 114 // invalidate active colour space 115 si->active_overlay.ob.space = -1; 116 117 // invalidate position/scaling 118 si->active_overlay.ob.width = -1; 119 } 120 121 // colour space transformation matrix 122 typedef struct space_transform 123 { 124 float RefLuma; // scaling of luma to use full RGB range 125 float RefRCb; // b/u -> r 126 float RefRY; // g/y -> r 127 float RefRCr; // r/v -> r 128 float RefGCb; 129 float RefGY; 130 float RefGCr; 131 float RefBCb; 132 float RefBY; 133 float RefBCr; 134 } space_transform; 135 136 137 // Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces 138 space_transform trans_yuv[2] = 139 { 140 { 1.1678, 0.0, 1, 1.6007, -0.3929, 1, -0.8154, 2.0232, 1, 0.0 }, /* BT.601 */ 141 { 1.1678, 0.0, 1, 1.7980, -0.2139, 1, -0.5345, 2.1186, 1, 0.0 } /* BT.709 */ 142 }; 143 144 145 // RGB is a pass through 146 space_transform trans_rgb = 147 { 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }; 148 149 150 // set overlay colour space transformation matrix 151 static void Radeon_SetTransform( 152 accelerator_info *ai, 153 float bright, 154 float cont, 155 float sat, 156 float hue, 157 float red_intensity, 158 float green_intensity, 159 float blue_intensity, 160 uint ref) 161 { 162 vuint8 *regs = ai->regs; 163 shared_info *si = ai->si; 164 float OvHueSin, OvHueCos; 165 float CAdjOff; 166 float CAdjRY, CAdjGY, CAdjBY; 167 float CAdjRCb, CAdjRCr; 168 float CAdjGCb, CAdjGCr; 169 float CAdjBCb, CAdjBCr; 170 float RedAdj,GreenAdj,BlueAdj; 171 float OvROff, OvGOff, OvBOff; 172 float OvRY, OvGY, OvBY; 173 float OvRCb, OvRCr; 174 float OvGCb, OvGCr; 175 float OvBCb, OvBCr; 176 float Loff; 177 float Coff; 178 179 uint32 dwOvROff, dwOvGOff, dwOvBOff; 180 uint32 dwOvRY, dwOvGY, dwOvBY; 181 uint32 dwOvRCb, dwOvRCr; 182 uint32 dwOvGCb, dwOvGCr; 183 uint32 dwOvBCb, dwOvBCr; 184 185 space_transform *trans; 186 187 SHOW_FLOW0( 0, "" ); 188 189 // get proper conversion formula 190 switch( si->pending_overlay.ob.space ) { 191 case B_YCbCr422: 192 case B_YUV12: 193 Loff = 16 * 4; // internal representation is 10 Bits 194 Coff = 128 * 4; 195 196 if (ref >= 2) 197 ref = 0; 198 199 trans = &trans_yuv[ref]; 200 break; 201 202 case B_RGB15: 203 case B_RGB16: 204 case B_RGB32: 205 default: 206 Loff = 0; 207 Coff = 0; 208 trans = &trans_rgb; 209 } 210 211 OvHueSin = sin(hue); 212 OvHueCos = cos(hue); 213 214 // get matrix values to convert overlay colour space to RGB 215 // applying colour adjustment, saturation and luma scaling 216 // (saturation doesn't work with RGB input, perhaps it did with some 217 // maths; this is left to the reader :) 218 CAdjRY = cont * trans->RefLuma * trans->RefRY; 219 CAdjGY = cont * trans->RefLuma * trans->RefGY; 220 CAdjBY = cont * trans->RefLuma * trans->RefBY; 221 222 CAdjRCb = sat * -OvHueSin * trans->RefRCr; 223 CAdjRCr = sat * OvHueCos * trans->RefRCr; 224 CAdjGCb = sat * (OvHueCos * trans->RefGCb - OvHueSin * trans->RefGCr); 225 CAdjGCr = sat * (OvHueSin * trans->RefGCb + OvHueCos * trans->RefGCr); 226 CAdjBCb = sat * OvHueCos * trans->RefBCb; 227 CAdjBCr = sat * OvHueSin * trans->RefBCb; 228 229 // adjust black level 230 CAdjOff = cont * trans[ref].RefLuma * bright * 1023.0; 231 RedAdj = cont * trans[ref].RefLuma * red_intensity * 1023.0; 232 GreenAdj = cont * trans[ref].RefLuma * green_intensity * 1023.0; 233 BlueAdj = cont * trans[ref].RefLuma * blue_intensity * 1023.0; 234 235 OvRY = CAdjRY; 236 OvGY = CAdjGY; 237 OvBY = CAdjBY; 238 OvRCb = CAdjRCb; 239 OvRCr = CAdjRCr; 240 OvGCb = CAdjGCb; 241 OvGCr = CAdjGCr; 242 OvBCb = CAdjBCb; 243 OvBCr = CAdjBCr; 244 // apply offsets 245 OvROff = RedAdj + CAdjOff - CAdjRY * Loff - (OvRCb + OvRCr) * Coff; 246 OvGOff = GreenAdj + CAdjOff - CAdjGY * Loff - (OvGCb + OvGCr) * Coff; 247 OvBOff = BlueAdj + CAdjOff - CAdjBY * Loff - (OvBCb + OvBCr) * Coff; 248 249 dwOvROff = ((int32)(OvROff * 2.0)) & 0x1fff; 250 dwOvGOff = ((int32)(OvGOff * 2.0)) & 0x1fff; 251 dwOvBOff = ((int32)(OvBOff * 2.0)) & 0x1fff; 252 253 dwOvRY = (((int32)(OvRY * 2048.0))&0x7fff)<<17; 254 dwOvGY = (((int32)(OvGY * 2048.0))&0x7fff)<<17; 255 dwOvBY = (((int32)(OvBY * 2048.0))&0x7fff)<<17; 256 dwOvRCb = (((int32)(OvRCb * 2048.0))&0x7fff)<<1; 257 dwOvRCr = (((int32)(OvRCr * 2048.0))&0x7fff)<<17; 258 dwOvGCb = (((int32)(OvGCb * 2048.0))&0x7fff)<<1; 259 dwOvGCr = (((int32)(OvGCr * 2048.0))&0x7fff)<<17; 260 dwOvBCb = (((int32)(OvBCb * 2048.0))&0x7fff)<<1; 261 dwOvBCr = (((int32)(OvBCr * 2048.0))&0x7fff)<<17; 262 263 OUTREG( regs, RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvRY ); 264 OUTREG( regs, RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr ); 265 OUTREG( regs, RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvGY ); 266 OUTREG( regs, RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr ); 267 OUTREG( regs, RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvBY ); 268 OUTREG( regs, RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr ); 269 270 si->active_overlay.ob.space = si->pending_overlay.ob.space; 271 } 272 273 274 // convert Be colour key to rgb value 275 static uint32 colourKey2RGB32( 276 uint32 space, uint8 red, uint8 green, uint8 blue ) 277 { 278 uint32 res; 279 280 SHOW_FLOW0( 3, "" ); 281 282 // the way Be defines colour keys may be convinient to some driver developers, 283 // but it's not well defined - took me some time to find out the format used 284 // and still I have no idea how alpha is defined; Rudolf told me that alpha is 285 // never used 286 switch( space ) { 287 case B_RGB15: 288 res = 289 ((uint32)(red >> 0) << (16+3)) | 290 ((uint32)(green >> 0) << (8+3)) | 291 ((blue >> 0) << 3); 292 break; 293 case B_RGB16: 294 res = 295 ((uint32)(red >> 0) << (16+3)) | 296 ((uint32)(green >> 0) << (8+2)) | 297 ((blue >> 0) << 3); 298 break; 299 case B_RGB32: 300 case B_CMAP8: 301 res = ((uint32)(red) << 16) | ((uint32)(green) << 8) | blue; 302 break; 303 default: 304 res = 0; 305 } 306 307 SHOW_FLOW( 3, "key=%lx", res ); 308 return res; 309 } 310 311 312 // set colour key of overlay 313 static void Radeon_SetColourKey( 314 accelerator_info *ai, const overlay_window *ow ) 315 { 316 virtual_card *vc = ai->vc; 317 vuint8 *regs = ai->regs; 318 uint32 rgb32, mask32, min32, max32; 319 320 /*SHOW_FLOW( 0, "value=%02x %02x %02x, mask=%02x %02x %02x", 321 ow->red.value, ow->green.value, ow->blue.value, 322 ow->red.mask, ow->green.mask, ow->blue.mask );*/ 323 324 // Radeons don't support value and mask as colour key but colour range 325 rgb32 = colourKey2RGB32( vc->mode.space, 326 ow->red.value, ow->green.value, ow->blue.value ); 327 mask32 = colourKey2RGB32( vc->mode.space, 328 ow->red.mask, ow->green.mask, ow->blue.mask ); 329 330 // ~mask32 are all unimportant (usually low order) bits 331 // oring this to the colour should give us the highest valid colour value 332 // (add would be more precise but may lead to overflows) 333 min32 = rgb32; 334 max32 = rgb32 | ~mask32; 335 336 OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min32 ); 337 OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max32 ); 338 OUTREG( regs, RADEON_OV0_KEY_CNTL, 339 RADEON_GRAPHIC_KEY_FN_EQ | 340 RADEON_VIDEO_KEY_FN_FALSE | 341 RADEON_CMP_MIX_OR ); 342 } 343 344 typedef struct { 345 uint max_scale; // maximum src_width/dest_width, 346 // i.e. source increment per screen pixel 347 uint8 group_size; // size of one filter group in pixels 348 uint8 p1_step_by, p23_step_by; // > 0: log(source pixel increment)+1, 2-tap filter 349 // = 0: source pixel increment = 1, 4-tap filter 350 } hscale_factor; 351 352 #define count_of( a ) (sizeof( a ) / sizeof( a[0] )) 353 354 // scaling/filter tables depending on overlay colour space: 355 // magnifying pixels is no problem, but minifying can lead to overload, 356 // so we have to skip pixels and/or use 2-tap filters 357 static hscale_factor scale_RGB16[] = { 358 { (2 << 12), 2, 1, 1 }, 359 { (4 << 12), 2, 2, 2 }, 360 { (8 << 12), 2, 3, 3 }, 361 { (16 << 12), 2, 4, 4 }, 362 { (32 << 12), 2, 5, 5 } 363 }; 364 365 static hscale_factor scale_RGB32[] = { 366 { (2 << 12) / 3, 2, 0, 0 }, 367 { (4 << 12) / 3, 4, 1, 1 }, 368 { (8 << 12) / 3, 4, 2, 2 }, 369 { (4 << 12), 4, 2, 3 }, 370 { (16 << 12) / 3, 4, 3, 3 }, 371 { (8 << 12), 4, 3, 4 }, 372 { (32 << 12) / 3, 4, 4, 4 }, 373 { (16 << 12), 4, 5, 5 } 374 }; 375 376 static hscale_factor scale_YUV[] = { 377 { (16 << 12) / 16, 2, 0, 0 }, 378 { (16 << 12) / 12, 2, 0, 1 }, // mode 4, 1, 0 (as used by YUV12) is impossible 379 { (16 << 12) / 8, 4, 1, 1 }, 380 { (16 << 12) / 6, 4, 1, 2 }, 381 { (16 << 12) / 4, 4, 2, 2 }, 382 { (16 << 12) / 3, 4, 2, 3 }, 383 { (16 << 12) / 2, 4, 3, 3 }, 384 { (16 << 12) / 1, 4, 4, 4 } 385 }; 386 387 static hscale_factor scale_YUV12[] = { 388 { (16 << 12) / 16, 2, 0, 0 }, 389 { (16 << 12) / 12, 4, 1, 0 }, 390 { (16 << 12) / 12, 2, 0, 1 }, 391 { (16 << 12) / 8, 4, 1, 1 }, 392 { (16 << 12) / 6, 4, 1, 2 }, 393 { (16 << 12) / 4, 4, 2, 2 }, 394 { (16 << 12) / 3, 4, 2, 3 }, 395 { (16 << 12) / 2, 4, 3, 3 }, 396 { (int)((16 << 12) / 1.5), 4, 3, 4 }, 397 { (int)((16 << 12) / 1.0), 4, 4, 4 }, 398 { (int)((16 << 12) / 0.75), 4, 4, 5 }, 399 { (int)((16 << 12) / 0.5), 4, 5, 5 } 400 }; 401 402 #define min3( a, b, c ) (min( (a), min( (b), (c) ))) 403 404 static hscale_factor scale_YUV9[] = { 405 { min3( (16 << 12) / 12, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 0 }, 406 { min3( (16 << 12) / 8, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 0 }, 407 { min3( (16 << 12) / 10, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 1 }, 408 { min3( (16 << 12) / 6, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 1 }, 409 { min3( (16 << 12) / 5, (3 << 12) * 1, (2 << 12) * 4 * 2 ), 4, 1, 2 }, 410 { min3( (16 << 12) / 3, (3 << 12) * 2, (2 << 12) * 4 * 2 ), 4, 2, 2 }, 411 { min3( (int)((16 << 12) / 2.5), (3 << 12) * 1, (2 << 12) * 4 * 4 ), 4, 2, 3 }, // probably, it should be (3 << 12) * 2 412 { min3( (int)((16 << 12) / 1.5), (3 << 12) * 4, (2 << 12) * 4 * 4 ), 4, 3, 3 }, 413 { min3( (int)((16 << 12) / 0.75), (3 << 12) * 8, (2 << 12) * 4 * 8 ), 4, 4, 4 }, 414 { min3( (int)((16 << 12) / 0.625), (3 << 12) * 8, (2 << 12) * 4 * 16 ), 4, 4, 5 }, 415 { min3( (int)((16 << 12) / 0.375), (3 << 12) * 16, (2 << 12) * 4 * 16 ), 4, 5, 5 } 416 }; 417 418 419 // parameters of an overlay colour space 420 typedef struct { 421 uint8 bpp_shift; // log2( bytes per pixel (main plain) ) 422 uint8 bpuv_shift; // log2( bytes per pixel (uv-plane) ); 423 // if there is one plane only: bpp=bpuv 424 uint8 num_planes; // number of planes 425 uint8 h_uv_sub_sample_shift; // log2( horizontal pixels per uv pair ) 426 uint8 v_uv_sub_sample_shift; // log2( vertical pixels per uv pair ) 427 hscale_factor *factors; // scaling/filter table 428 uint8 num_factors; 429 } space_params; 430 431 static space_params space_params_table[16] = { 432 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 433 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 434 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 435 { 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) }, // RGB15 436 { 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) }, // RGB16 437 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 438 { 2, 2, 1, 0, 0, scale_RGB32, count_of( scale_RGB32 ) }, // RGB32 439 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 440 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved 441 { 0, 0, 3, 2, 2, scale_YUV9, count_of( scale_YUV9 ) }, // YUV9 442 { 0, 0, 3, 1, 1, scale_YUV12, count_of( scale_YUV12 ) }, // YUV12, three-plane 443 { 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) }, // VYUY422 444 { 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) }, // YVYU422 445 { 0, 1, 2, 1, 1, scale_YUV12, count_of( scale_YUV12 ) }, // YUV12, two-plane 446 { 0, 1, 2, 1, 1, NULL, 0 }, // ??? 447 { 0, 0, 0, 0, 0, NULL, 0 } // reserved 448 }; 449 450 // get appropriate scaling/filter parameters 451 static hscale_factor *getHScaleFactor( 452 accelerator_info *ai, 453 space_params *params, 454 uint32 src_left, uint32 src_right, uint32 *h_inc ) 455 { 456 uint words_per_p1_line, words_per_p23_line, max_words_per_line; 457 bool p1_4tap_allowed, p23_4tap_allowed; 458 uint i; 459 uint num_factors; 460 hscale_factor *factors; 461 462 SHOW_FLOW0( 3, "" ); 463 464 // check whether fifo is large enough to feed vertical 4-tap-filter 465 466 words_per_p1_line = 467 ceilShiftDiv( (src_right - 1) << params->bpp_shift, 4 ) - 468 ((src_left << params->bpp_shift) >> 4) + 1; 469 words_per_p23_line = 470 ceilShiftDiv( (src_right - 1) << params->bpuv_shift, 4 ) - 471 ((src_left << params->bpuv_shift) >> 4) + 1; 472 473 // overlay scaler line length differs for different revisions 474 // this needs to be maintained by hand 475 if (ai->si->asic == rt_r200 || ai->si->asic >= rt_r300) 476 max_words_per_line = 1920 / 16; 477 else 478 max_words_per_line = 1536 / 16; 479 480 switch (params->num_planes) { 481 case 3: 482 p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2; 483 p23_4tap_allowed = words_per_p23_line < max_words_per_line / 4; 484 break; 485 case 2: 486 p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2; 487 p23_4tap_allowed = words_per_p23_line < max_words_per_line / 2; 488 break; 489 case 1: 490 default: 491 p1_4tap_allowed = p23_4tap_allowed = words_per_p1_line < max_words_per_line; 492 break; 493 } 494 495 SHOW_FLOW( 3, "p1_4tap_allowed=%d, p23_4t_allowed=%d", 496 (int)p1_4tap_allowed, (int)p23_4tap_allowed ); 497 498 // search for proper scaling/filter entry 499 factors = params->factors; 500 num_factors = params->num_factors; 501 502 if (factors == NULL || num_factors == 0) 503 return NULL; 504 505 for (i = 0; i < num_factors; ++i, ++factors) { 506 if (*h_inc <= factors->max_scale 507 && (factors->p1_step_by > 0 || p1_4tap_allowed) 508 && (factors->p23_step_by > 0 || p23_4tap_allowed)) 509 break; 510 } 511 512 if (i == num_factors) { 513 // overlay is asked to be scaled down more than allowed, 514 // so use least scaling factor supported 515 --factors; 516 *h_inc = factors->max_scale; 517 } 518 519 SHOW_FLOW( 3, "group_size=%d, p1_step_by=%d, p23_step_by=%d", 520 factors->group_size, factors->p1_step_by, factors->p23_step_by ); 521 522 return factors; 523 } 524 525 526 #define I2FF( a, shift ) ((uint32)((a) * (1 << (shift)))) 527 528 529 // show overlay on screen 530 static status_t Radeon_ShowOverlay( 531 accelerator_info *ai, int crtc_idx ) 532 { 533 virtual_card *vc = ai->vc; 534 shared_info *si = ai->si; 535 vuint8 *regs = ai->regs; 536 overlay_info *overlay = &si->pending_overlay; 537 overlay_buffer_node *node = overlay->on; 538 crtc_info *crtc = &si->crtc[crtc_idx]; 539 540 uint32 ecp_div; 541 uint32 v_inc, h_inc; 542 uint32 src_v_inc, src_h_inc; 543 uint32 src_left, src_top, src_right, src_bottom; 544 int32 dest_left, dest_top, dest_right, dest_bottom; 545 uint32 offset; 546 uint32 tmp; 547 uint32 p1_h_accum_init, p23_h_accum_init, p1_v_accum_init, p23_v_accum_init; 548 uint32 p1_active_lines, p23_active_lines; 549 hscale_factor *factors; 550 space_params *params; 551 552 uint32 p1_h_inc, p23_h_inc; 553 uint32 p1_x_start, p1_x_end; 554 uint32 p23_x_start, p23_x_end; 555 556 uint scale_ctrl; 557 558 /*uint32 buffer[20*2]; 559 uint idx = 0;*/ 560 561 SHOW_FLOW0( 0, "" ); 562 563 Radeon_SetColourKey( ai, &overlay->ow ); 564 565 // overlay unit can only handle up to 175 MHz; if pixel clock is higher, 566 // only every second pixel is handled 567 // (this devider is gets written into PLL by InitOverlay, 568 // so we don't need to do it ourself) 569 if( crtc->mode.timing.pixel_clock < 175000 ) 570 ecp_div = 0; 571 else 572 ecp_div = 1; 573 574 575 // scaling is independant of clipping, get this first 576 { 577 uint32 src_width, src_height; 578 579 src_width = overlay->ov.width; 580 src_height = overlay->ov.height; 581 582 // this is for graphics card 583 v_inc = (src_height << 20) / overlay->ow.height; 584 h_inc = (src_width << (12 + ecp_div)) / overlay->ow.width; 585 586 587 // this is for us 588 src_v_inc = (src_height << 16) / overlay->ow.height; 589 src_h_inc = (src_width << 16) / overlay->ow.width; 590 } 591 592 // calculate unclipped position/size 593 // TBD: I assume that overlay_window.offset_xyz is only a hint where 594 // no overlay is visible; another interpretation were to zoom 595 // the overlay so it fits into remaining space 596 src_left = (overlay->ov.h_start << 16) + overlay->ow.offset_left * src_h_inc; 597 src_top = (overlay->ov.v_start << 16) + overlay->ow.offset_top * src_v_inc; 598 src_right = ((overlay->ov.h_start + overlay->ov.width) << 16) - 599 overlay->ow.offset_right * src_h_inc; 600 src_bottom = ((overlay->ov.v_start + overlay->ov.height) << 16) - 601 overlay->ow.offset_top * src_v_inc; 602 dest_left = overlay->ow.h_start + overlay->ow.offset_left; 603 dest_top = overlay->ow.v_start + overlay->ow.offset_top; 604 dest_right = overlay->ow.h_start + overlay->ow.width - overlay->ow.offset_right; 605 dest_bottom = overlay->ow.v_start + overlay->ow.height - overlay->ow.offset_bottom; 606 607 SHOW_FLOW( 3, "ow: h=%d, v=%d, width=%d, height=%d", 608 overlay->ow.h_start, overlay->ow.v_start, 609 overlay->ow.width, overlay->ow.height ); 610 611 SHOW_FLOW( 3, "offset_left=%d, offset_right=%d, offset_top=%d, offset_bottom=%d", 612 overlay->ow.offset_left, overlay->ow.offset_right, 613 overlay->ow.offset_top, overlay->ow.offset_bottom ); 614 615 616 // apply virtual screen 617 dest_left -= vc->mode.h_display_start + crtc->rel_x; 618 dest_top -= vc->mode.v_display_start + crtc->rel_y; 619 dest_right -= vc->mode.h_display_start + crtc->rel_x; 620 dest_bottom -= vc->mode.v_display_start + crtc->rel_y; 621 622 // clip to visible area 623 if( dest_left < 0 ) { 624 src_left += -dest_left * src_h_inc; 625 dest_left = 0; 626 } 627 if( dest_top < 0 ) { 628 src_top += -dest_top * src_v_inc; 629 dest_top = 0; 630 } 631 632 SHOW_FLOW( 3, "mode: w=%d, h=%d", 633 crtc->mode.timing.h_display, crtc->mode.timing.v_display ); 634 635 if( dest_right > crtc->mode.timing.h_display ) 636 dest_right = crtc->mode.timing.h_display; 637 if( dest_bottom > crtc->mode.timing.v_display ) 638 dest_bottom = crtc->mode.timing.v_display; 639 640 SHOW_FLOW( 3, "src=(%d, %d, %d, %d)", 641 src_left, src_top, src_right, src_bottom ); 642 SHOW_FLOW( 3, "dest=(%d, %d, %d, %d)", 643 dest_left, dest_top, dest_right, dest_bottom ); 644 645 646 // especially with multi-screen modes the overlay may not be on screen at all 647 if( dest_left >= dest_right || dest_top >= dest_bottom || 648 src_left >= src_right || src_top >= src_bottom ) 649 { 650 Radeon_TempHideOverlay( ai ); 651 goto done; 652 } 653 654 655 // let's calculate all those nice register values 656 SHOW_FLOW( 3, "ati_space=%d", node->ati_space ); 657 params = &space_params_table[node->ati_space]; 658 659 // choose proper scaler 660 { 661 factors = getHScaleFactor( ai, params, src_left >> 16, src_right >> 16, &h_inc ); 662 if( factors == NULL ) 663 return B_ERROR; 664 665 p1_h_inc = factors->p1_step_by > 0 ? 666 h_inc >> (factors->p1_step_by - 1) : h_inc; 667 p23_h_inc = 668 (factors->p23_step_by > 0 ? h_inc >> (factors->p23_step_by - 1) : h_inc) 669 >> params->h_uv_sub_sample_shift; 670 671 SHOW_FLOW( 3, "p1_h_inc=%x, p23_h_inc=%x", p1_h_inc, p23_h_inc ); 672 } 673 674 // get register value for start/end position of overlay image (pixel-precise only) 675 { 676 uint32 p1_step_size, p23_step_size; 677 uint32 p1_left, p1_right, p1_width; 678 uint32 p23_left, p23_right, p23_width; 679 680 p1_left = src_left >> 16; 681 p1_right = src_right >> 16; 682 p1_width = p1_right - p1_left; 683 684 p1_step_size = factors->p1_step_by > 0 ? (1 << (factors->p1_step_by - 1)) : 1; 685 p1_x_start = p1_left % (16 >> params->bpp_shift); 686 p1_x_end = ((p1_x_start + p1_width - 1) / p1_step_size) * p1_step_size; 687 688 SHOW_FLOW( 3, "p1_x_start=%d, p1_x_end=%d", p1_x_start, p1_x_end ); 689 690 p23_left = (src_left >> 16) >> params->h_uv_sub_sample_shift; 691 p23_right = (src_right >> 16) >> params->h_uv_sub_sample_shift; 692 p23_width = p23_right - p23_left; 693 694 p23_step_size = factors->p23_step_by > 0 ? (1 << (factors->p23_step_by - 1)) : 1; 695 // if resolution of Y and U/V differs but YUV are stored in one 696 // plane then UV alignment depends on Y data, therefore the hack 697 // (you are welcome to replace this with some cleaner code ;) 698 p23_x_start = p23_left % 699 ((16 >> params->bpuv_shift) / 700 (node->ati_space == 11 || node->ati_space == 12 ? 2 : 1)); 701 p23_x_end = (int)((p23_x_start + p23_width - 1) / p23_step_size) * p23_step_size; 702 703 SHOW_FLOW( 3, "p23_x_start=%d, p23_x_end=%d", p23_x_start, p23_x_end ); 704 705 // get memory location of first word to be read by scaler 706 // (save relative offset for fast update) 707 si->active_overlay.rel_offset = (src_top >> 16) * node->buffer.bytes_per_row + 708 ((p1_left << params->bpp_shift) & ~0xf); 709 offset = node->mem_offset + si->active_overlay.rel_offset; 710 711 SHOW_FLOW( 3, "rel_offset=%x", si->active_overlay.rel_offset ); 712 } 713 714 // get active lines for scaler 715 // (we could add additional blank lines for DVD letter box mode, 716 // but this is not supported by API; additionally, this only makes 717 // sense if want to put subtitles onto the black border, which is 718 // supported neither) 719 { 720 uint16 int_top, int_bottom; 721 722 int_top = src_top >> 16; 723 int_bottom = (src_bottom >> 16); 724 725 p1_active_lines = int_bottom - int_top - 1; 726 p23_active_lines = 727 ceilShiftDiv( int_bottom - 1, params->v_uv_sub_sample_shift ) - 728 (int_top >> params->v_uv_sub_sample_shift); 729 730 SHOW_FLOW( 3, "p1_active_lines=%d, p23_active_lines=%d", 731 p1_active_lines, p23_active_lines ); 732 } 733 734 // if picture is stretched for flat panel, we need to scale all 735 // vertical values accordingly 736 // TBD: there is no description at all concerning this, so v_accum_init may 737 // need to be initialized based on original value 738 { 739 if( (crtc->active_displays & (dd_lvds | dd_dvi)) != 0 ) { 740 uint64 v_ratio; 741 742 // convert 32.32 format to 16.16 format; else we 743 // cannot multiply two fixed point values without 744 // overflow 745 v_ratio = si->flatpanels[crtc->flatpanel_port].v_ratio >> (FIX_SHIFT - 16); 746 747 v_inc = (v_inc * v_ratio) >> 16; 748 } 749 750 SHOW_FLOW( 3, "v_inc=%x", v_inc ); 751 } 752 753 // get initial horizontal scaler values, taking care of precharge 754 // don't ask questions about formulas - take them as is 755 // (TBD: home-brewed sub-pixel source clipping may be wrong, 756 // especially for uv-planes) 757 { 758 uint32 p23_group_size; 759 760 tmp = ((src_left & 0xffff) >> 11) + ( 761 ( 762 I2FF( p1_x_start % factors->group_size, 12 ) + 763 I2FF( 2.5, 12 ) + 764 p1_h_inc / 2 + 765 I2FF( 0.5, 12-5 ) // rounding 766 ) >> (12 - 5)); // scaled by 1 << 5 767 768 SHOW_FLOW( 3, "p1_h_accum_init=%x", tmp ); 769 770 p1_h_accum_init = 771 ((tmp << 15) & RADEON_OV0_P1_H_ACCUM_INIT_MASK) | 772 ((tmp << 23) & RADEON_OV0_P1_PRESHIFT_MASK); 773 774 775 p23_group_size = 2; 776 777 tmp = ((src_left & 0xffff) >> 11) + ( 778 ( 779 I2FF( p23_x_start % p23_group_size, 12 ) + 780 I2FF( 2.5, 12 ) + 781 p23_h_inc / 2 + 782 I2FF( 0.5, 12-5 ) // rounding 783 ) >> (12 - 5)); // scaled by 1 << 5 784 785 SHOW_FLOW( 3, "p23_h_accum_init=%x", tmp ); 786 787 p23_h_accum_init = 788 ((tmp << 15) & RADEON_OV0_P23_H_ACCUM_INIT_MASK) | 789 ((tmp << 23) & RADEON_OV0_P23_PRESHIFT_MASK); 790 } 791 792 // get initial vertical scaler values, taking care of precharge 793 { 794 uint extra_full_line; 795 796 extra_full_line = factors->p1_step_by == 0 ? 1 : 0; 797 798 tmp = ((src_top & 0x0000ffff) >> 11) + ( 799 (min( 800 I2FF( 1.5, 20 ) + I2FF( extra_full_line, 20 ) + v_inc / 2, 801 I2FF( 2.5, 20 ) + 2 * I2FF( extra_full_line, 20 ) 802 ) + I2FF( 0.5, 20-5 )) // rounding 803 >> (20 - 5)); // scaled by 1 << 5 804 805 SHOW_FLOW( 3, "p1_v_accum_init=%x", tmp ); 806 807 p1_v_accum_init = 808 ((tmp << 15) & RADEON_OV0_P1_V_ACCUM_INIT_MASK) | 0x00000001; 809 810 811 extra_full_line = factors->p23_step_by == 0 ? 1 : 0; 812 813 if( params->v_uv_sub_sample_shift > 0 ) { 814 tmp = ((src_top & 0x0000ffff) >> 11) + ( 815 (min( 816 I2FF( 1.5, 20 ) + 817 I2FF( extra_full_line, 20 ) + 818 ((v_inc / 2) >> params->v_uv_sub_sample_shift), 819 I2FF( 2.5, 20 ) + 820 2 * I2FF( extra_full_line, 20 ) 821 ) + I2FF( 0.5, 20-5 )) // rounding 822 >> (20 - 5)); // scaled by 1 << 5 823 } else { 824 tmp = ((src_top & 0x0000ffff) >> 11) + ( 825 ( 826 I2FF( 2.5, 20 ) + 827 2 * I2FF( extra_full_line, 20 ) + 828 I2FF( 0.5, 20-5 ) // rounding 829 ) >> (20 - 5)); // scaled by 1 << 5 830 } 831 832 SHOW_FLOW( 3, "p23_v_accum_init=%x", tmp ); 833 834 p23_v_accum_init = 835 ((tmp << 15) & RADEON_OV0_P23_V_ACCUM_INIT_MASK) | 0x00000001; 836 } 837 838 // show me what you've got! 839 // we could lock double buffering of overlay unit during update 840 // (new values are copied during vertical blank, so if we've updated 841 // only some of them, you get a whole frame of mismatched values) 842 // but during tests I couldn't get the artifacts go away, so 843 // we use the dangerous way which has the pro to not require any 844 // waiting 845 846 // let's try to lock overlay unit 847 // we had to wait now until the lock takes effect, but this is 848 // impossible with CCE; perhaps we have to convert this code to 849 // direct register access; did that - let's see what happens... 850 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK ); 851 852 // wait until register access is locked 853 while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL) 854 & RADEON_REG_LD_CTL_LOCK_READBACK) == 0 ) 855 ; 856 857 OUTREG( regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset ); 858 OUTREG( regs, RADEON_OV0_VID_BUF_PITCH0_VALUE, node->buffer.bytes_per_row ); 859 OUTREG( regs, RADEON_OV0_H_INC, p1_h_inc | (p23_h_inc << 16) ); 860 OUTREG( regs, RADEON_OV0_STEP_BY, factors->p1_step_by | (factors->p23_step_by << 8) ); 861 OUTREG( regs, RADEON_OV0_V_INC, v_inc ); 862 863 OUTREG( regs, 864 crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_START : RADEON_OV1_Y_X_START, 865 (dest_left) | (dest_top << 16) ); 866 OUTREG( regs, 867 crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_END : RADEON_OV1_Y_X_END, 868 (dest_right - 1) | ((dest_bottom - 1) << 16) ); 869 870 OUTREG( regs, RADEON_OV0_P1_BLANK_LINES_AT_TOP, 871 RADEON_P1_BLNK_LN_AT_TOP_M1_MASK | (p1_active_lines << 16) ); 872 OUTREG( regs, RADEON_OV0_P1_X_START_END, p1_x_end | (p1_x_start << 16) ); 873 OUTREG( regs, RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init ); 874 OUTREG( regs, RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init ); 875 876 OUTREG( regs, RADEON_OV0_P23_BLANK_LINES_AT_TOP, 877 RADEON_P23_BLNK_LN_AT_TOP_M1_MASK | (p23_active_lines << 16) ); 878 OUTREG( regs, RADEON_OV0_P2_X_START_END, 879 p23_x_end | (p23_x_start << 16) ); 880 OUTREG( regs, RADEON_OV0_P3_X_START_END, 881 p23_x_end | (p23_x_start << 16) ); 882 OUTREG( regs, RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init ); 883 OUTREG( regs, RADEON_OV0_P23_V_ACCUM_INIT, p23_v_accum_init ); 884 885 OUTREG( regs, RADEON_OV0_TEST, node->test_reg ); 886 887 scale_ctrl = RADEON_SCALER_ENABLE | 888 RADEON_SCALER_DOUBLE_BUFFER | 889 (node->ati_space << 8) | 890 /* RADEON_SCALER_ADAPTIVE_DEINT | */ 891 RADEON_SCALER_BURST_PER_PLANE | 892 (crtc->crtc_idx == 0 ? 0 : RADEON_SCALER_CRTC_SEL ); 893 894 switch (node->ati_space << 8) { 895 case RADEON_SCALER_SOURCE_15BPP: // RGB15 896 case RADEON_SCALER_SOURCE_16BPP: 897 case RADEON_SCALER_SOURCE_32BPP: 898 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl | 899 RADEON_SCALER_LIN_TRANS_BYPASS); 900 break; 901 case RADEON_SCALER_SOURCE_VYUY422: // VYUY422 902 case RADEON_SCALER_SOURCE_YVYU422: // YVYU422 903 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl); 904 break; 905 default: 906 SHOW_FLOW(4, "What overlay format is this??? %d", node->ati_space); 907 OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl | 908 (( ai->si->asic >= rt_r200) ? R200_SCALER_TEMPORAL_DEINT : 0)); 909 910 } 911 912 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE; 913 914 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, 915 si->overlay_mgr.auto_flip_reg ); 916 917 OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 ); 918 919 done: 920 ai->si->active_overlay.on = ai->si->pending_overlay.on; 921 ai->si->active_overlay.ow = ai->si->pending_overlay.ow; 922 ai->si->active_overlay.ov = ai->si->pending_overlay.ov; 923 ai->si->active_overlay.ob = ai->si->pending_overlay.ob; 924 ai->si->active_overlay.h_display_start = vc->mode.h_display_start; 925 ai->si->active_overlay.v_display_start = vc->mode.v_display_start; 926 927 return B_OK; 928 } 929 930 931 // hide overlay, but not permanently 932 void Radeon_TempHideOverlay( 933 accelerator_info *ai ) 934 { 935 SHOW_FLOW0( 3, "" ); 936 937 OUTREG( ai->regs, RADEON_OV0_SCALE_CNTL, 0 ); 938 } 939 940 941 // hide overlay (can be called even if there is none visible) 942 void Radeon_HideOverlay( 943 accelerator_info *ai ) 944 { 945 shared_info *si = ai->si; 946 947 Radeon_TempHideOverlay( ai ); 948 949 // remember that there is no overlay to be shown 950 si->active_overlay.on = NULL; 951 si->active_overlay.prev_on = NULL; 952 si->pending_overlay.on = NULL; 953 954 // invalidate active head so it will be setup again once 955 // a new overlay is shown 956 si->active_overlay.crtc_idx = -1; 957 } 958 959 960 // show new overlay buffer with same parameters as last one 961 static void Radeon_ReplaceOverlayBuffer( 962 accelerator_info *ai ) 963 { 964 #if 0 965 shared_info *si = ai->si; 966 vuint8 *regs = ai->regs; 967 uint32 offset; 968 int /*old_buf, */new_buf; 969 970 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset; 971 972 /*old_buf = si->overlay_mgr.auto_flip_reg & RADEON_OV0_SOFT_BUF_NUM_MASK; 973 new_buf = old_buf == 0 ? 3 : 0; 974 si->overlay_mgr.auto_flip_reg &= ~RADEON_OV0_SOFT_BUF_NUM_MASK; 975 si->overlay_mgr.auto_flip_reg |= new_buf;*/ 976 new_buf = 0; 977 978 // lock overlay registers 979 /* OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK ); 980 981 // wait until register access is locked 982 while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL) 983 & RADEON_REG_LD_CTL_LOCK_READBACK) == 0 ) 984 ;*/ 985 986 // setup new buffer 987 /*OUTREG( regs, 988 new_buf == 0 ? RADEON_OV0_VID_BUF_PITCH0_VALUE : RADEON_OV0_VID_BUF_PITCH1_VALUE, 989 si->pending_overlay.on->buffer.bytes_per_row );*/ 990 OUTREG( regs, 991 new_buf == 0 ? RADEON_OV0_VID_BUF0_BASE_ADRS : RADEON_OV0_VID_BUF3_BASE_ADRS, 992 offset | (new_buf == 0 ? 0 : RADEON_VIF_BUF0_PITCH_SEL)); 993 994 // make changes visible 995 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE; 996 997 OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg ); 998 999 // unlock overlay registers 1000 // OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 ); 1001 1002 ai->si->active_overlay.on = ai->si->pending_overlay.on; 1003 #else 1004 shared_info *si = ai->si; 1005 uint32 offset; 1006 1007 if ( ai->si->acc_dma ) 1008 { 1009 START_IB(); 1010 1011 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset; 1012 1013 WRITE_IB_REG( RADEON_OV0_VID_BUF0_BASE_ADRS, offset); 1014 1015 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE; 1016 WRITE_IB_REG( RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg ); 1017 1018 SUBMIT_IB(); 1019 } else { 1020 Radeon_WaitForFifo( ai, 2 ); 1021 offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset; 1022 1023 OUTREG( ai->regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset); 1024 1025 si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE; 1026 OUTREG( ai->regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg ); 1027 } 1028 ai->si->active_overlay.on = ai->si->pending_overlay.on; 1029 #endif 1030 } 1031 1032 1033 // get number of pixels of overlay shown on virtual port 1034 static int getIntersectArea( 1035 accelerator_info *ai, overlay_window *ow, crtc_info *crtc ) 1036 { 1037 virtual_card *vc = ai->vc; 1038 int left, top, right, bottom; 1039 1040 left = ow->h_start - (vc->mode.h_display_start + crtc->rel_x); 1041 top = ow->v_start - (vc->mode.v_display_start + crtc->rel_y); 1042 right = left + ow->width; 1043 bottom = top + ow->height; 1044 1045 if( left < 0 ) 1046 left = 0; 1047 if( top < 0 ) 1048 top = 0; 1049 if( right > crtc->mode.timing.h_display ) 1050 right = crtc->mode.timing.h_display; 1051 if( bottom > crtc->mode.timing.v_display ) 1052 bottom = crtc->mode.timing.v_display; 1053 1054 if( right < left || bottom < top ) 1055 return 0; 1056 1057 return (right - left) * (bottom - top); 1058 } 1059 1060 1061 // update overlay, to be called whenever something in terms of 1062 // overlay have or can have been changed 1063 status_t Radeon_UpdateOverlay( 1064 accelerator_info *ai ) 1065 { 1066 virtual_card *vc = ai->vc; 1067 shared_info *si = ai->si; 1068 int crtc_idx; 1069 1070 float brightness = 0.0f; 1071 float contrast = 1.0f; 1072 float saturation = 1.0f; 1073 float hue = 0.0f; 1074 int32 ref = 0; 1075 1076 SHOW_FLOW0( 3, "" ); 1077 1078 // don't mess around with overlay of someone else 1079 if( !vc->uses_overlay ) 1080 return B_OK; 1081 1082 // make sure there really is an overlay 1083 if( si->pending_overlay.on == NULL ) 1084 return B_OK; 1085 1086 // verify that the overlay is still valid 1087 if( (uint32)si->pending_overlay.ot != si->overlay_mgr.token ) 1088 return B_BAD_VALUE; 1089 1090 if( vc->different_heads > 1 ) { 1091 int area0, area1; 1092 1093 // determine on which port most of the overlay is shown 1094 area0 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] ); 1095 area1 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] ); 1096 1097 SHOW_FLOW( 3, "area0=%d, area1=%d", area0, area1 ); 1098 1099 if( area0 >= area1 ) 1100 crtc_idx = 0; 1101 else 1102 crtc_idx = 1; 1103 1104 } else if( vc->independant_heads > 1 ) { 1105 // both ports show the same, use "swap displays" to decide 1106 // where to show the overlay (to be improved as this flag isn't 1107 // really designed for that) 1108 if( vc->swap_displays ) 1109 crtc_idx = 1; 1110 else 1111 crtc_idx = 0; 1112 1113 } else { 1114 1115 // one crtc used only - pick the one that we use 1116 crtc_idx = vc->used_crtc[0] ? 0 : 1; 1117 } 1118 1119 si->pending_overlay.crtc_idx = crtc_idx; 1120 1121 // only update registers that have been changed to minimize work 1122 if( si->active_overlay.crtc_idx != si->pending_overlay.crtc_idx ) { 1123 Radeon_InitOverlay( ai, crtc_idx ); 1124 } 1125 1126 if( si->active_overlay.ob.space != si->pending_overlay.ob.space ) { 1127 Radeon_SetTransform( ai, brightness, contrast, saturation, hue, 0, 0, 0, ref ); 1128 } 1129 1130 if( memcmp( &si->active_overlay.ow, &si->pending_overlay.ow, sizeof( si->active_overlay.ow )) != 0 || 1131 memcmp( &si->active_overlay.ov, &si->pending_overlay.ov, sizeof( si->active_overlay.ov )) != 0 || 1132 si->active_overlay.h_display_start != vc->mode.h_display_start || 1133 si->active_overlay.v_display_start != vc->mode.v_display_start || 1134 si->active_overlay.ob.width != si->pending_overlay.ob.width || 1135 si->active_overlay.ob.height != si->pending_overlay.ob.height || 1136 si->active_overlay.ob.bytes_per_row != si->pending_overlay.ob.bytes_per_row ) 1137 Radeon_ShowOverlay( ai, crtc_idx ); 1138 1139 else if( si->active_overlay.on != si->pending_overlay.on ) 1140 Radeon_ReplaceOverlayBuffer( ai ); 1141 1142 SHOW_FLOW0( 3, "success" ); 1143 1144 return B_OK; 1145 } 1146