1 /* Nvidia TNT and GeForce Back End Scaler functions */ 2 /* Written by Rudolf Cornelissen 05/2002-9/2004 */ 3 4 #define MODULE_BIT 0x00000200 5 6 #include "std.h" 7 8 typedef struct move_overlay_info move_overlay_info; 9 10 struct move_overlay_info 11 { 12 uint32 hcoordv; /* left and right edges of video output window */ 13 uint32 vcoordv; /* top and bottom edges of video output window */ 14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */ 15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */ 16 uint32 a1orgv; /* alternate source clipping via startadress of source buffer */ 17 }; 18 19 static void eng_bes_calc_move_overlay(move_overlay_info *moi); 20 static void eng_bes_program_move_overlay(move_overlay_info moi); 21 22 /* move the overlay output window in virtualscreens */ 23 /* Note: 24 * si->dm.h_display_start and si->dm.v_display_start determine where the new 25 * output window is located! */ 26 void eng_bes_move_overlay() 27 { 28 move_overlay_info moi; 29 30 /* abort if overlay is not active */ 31 if (!si->overlay.active) return; 32 33 eng_bes_calc_move_overlay(&moi); 34 eng_bes_program_move_overlay(moi); 35 } 36 37 static void eng_bes_calc_move_overlay(move_overlay_info *moi) 38 { 39 /* misc used variables */ 40 uint16 temp1, temp2; 41 /* visible screen window in virtual workspaces */ 42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 43 44 /* do 'overlay follow head' in dualhead modes on dualhead cards */ 45 if (si->ps.secondary_head) 46 { 47 switch (si->dm.flags & DUALHEAD_BITS) 48 { 49 case DUALHEAD_ON: 50 case DUALHEAD_SWITCH: 51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) < 52 (si->dm.h_display_start + si->dm.timing.h_display)) 53 eng_bes_to_crtc(si->crtc_switch_mode); 54 else 55 eng_bes_to_crtc(!si->crtc_switch_mode); 56 break; 57 default: 58 eng_bes_to_crtc(si->crtc_switch_mode); 59 break; 60 } 61 } 62 63 /* the BES does not respect virtual_workspaces, but adheres to CRTC 64 * constraints only */ 65 crtc_hstart = si->dm.h_display_start; 66 /* make dualhead stretch and switch mode work while we're at it.. */ 67 if (si->overlay.crtc) 68 { 69 crtc_hstart += si->dm.timing.h_display; 70 } 71 72 /* horizontal end is the first position beyond the displayed range on the CRTC */ 73 crtc_hend = crtc_hstart + si->dm.timing.h_display; 74 crtc_vstart = si->dm.v_display_start; 75 /* vertical end is the first position beyond the displayed range on the CRTC */ 76 crtc_vend = crtc_vstart + si->dm.timing.v_display; 77 78 79 /**************************************** 80 *** setup all edges of output window *** 81 ****************************************/ 82 83 /* setup left and right edges of output window */ 84 moi->hcoordv = 0; 85 /* left edge coordinate of output window, must be inside desktop */ 86 /* clipping on the left side */ 87 if (si->overlay.ow.h_start < crtc_hstart) 88 { 89 temp1 = 0; 90 } 91 else 92 { 93 /* clipping on the right side */ 94 if (si->overlay.ow.h_start >= (crtc_hend - 1)) 95 { 96 /* width < 2 is not allowed */ 97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 98 } 99 else 100 /* no clipping here */ 101 { 102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff; 103 } 104 } 105 moi->hcoordv |= temp1 << 16; 106 /* right edge coordinate of output window, must be inside desktop */ 107 /* width < 2 is not allowed */ 108 if (si->overlay.ow.width < 2) 109 { 110 temp2 = (temp1 + 1) & 0x7ff; 111 } 112 else 113 { 114 /* clipping on the right side */ 115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1)) 116 { 117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 118 } 119 else 120 { 121 /* clipping on the left side */ 122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 123 { 124 /* width < 2 is not allowed */ 125 temp2 = 1; 126 } 127 else 128 /* no clipping here */ 129 { 130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff; 131 } 132 } 133 } 134 moi->hcoordv |= temp2 << 0; 135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 136 137 /* setup top and bottom edges of output window */ 138 moi->vcoordv = 0; 139 /* top edge coordinate of output window, must be inside desktop */ 140 /* clipping on the top side */ 141 if (si->overlay.ow.v_start < crtc_vstart) 142 { 143 temp1 = 0; 144 } 145 else 146 { 147 /* clipping on the bottom side */ 148 if (si->overlay.ow.v_start >= (crtc_vend - 1)) 149 { 150 /* height < 2 is not allowed */ 151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 152 } 153 else 154 /* no clipping here */ 155 { 156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff; 157 } 158 } 159 moi->vcoordv |= temp1 << 16; 160 /* bottom edge coordinate of output window, must be inside desktop */ 161 /* height < 2 is not allowed */ 162 if (si->overlay.ow.height < 2) 163 { 164 temp2 = (temp1 + 1) & 0x7ff; 165 } 166 else 167 { 168 /* clipping on the bottom side */ 169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1)) 170 { 171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 172 } 173 else 174 { 175 /* clipping on the top side */ 176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 177 { 178 /* height < 2 is not allowed */ 179 temp2 = 1; 180 } 181 else 182 /* no clipping here */ 183 { 184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff; 185 } 186 } 187 } 188 moi->vcoordv |= temp2 << 0; 189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 190 191 192 /********************************* 193 *** setup horizontal clipping *** 194 *********************************/ 195 196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 197 /* Note: 198 * The method is to calculate, based on 1:1 scaling, based on the output window. 199 * After this is done, include the scaling factor so you get a value based on the input bitmap. 200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 202 /* Note also: 203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 204 moi->hsrcstv = 0; 205 /* check for destination horizontal clipping at left side */ 206 if (si->overlay.ow.h_start < crtc_hstart) 207 { 208 /* check if entire destination picture is clipping left: 209 * (2 pixels will be clamped onscreen at least) */ 210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 211 { 212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 213 moi->hsrcstv += (si->overlay.ow.width - 2); 214 } 215 else 216 { 217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start); 219 } 220 LOG(4,("Overlay: clipping left...\n")); 221 222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 223 * Note that this also already takes care of aligning the value to the BES register! */ 224 moi->hsrcstv *= si->overlay.h_ifactor; 225 } 226 /* take zoom into account */ 227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16; 228 /* AND below required by hardware */ 229 moi->hsrcstv &= 0x03fffffc; 230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536)); 231 232 233 /******************************* 234 *** setup vertical clipping *** 235 *******************************/ 236 237 /* calculate inputbitmap origin adress */ 238 moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer); 239 moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer); 240 LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv)); 241 242 /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 243 /* Note: 244 * The method is to calculate, based on 1:1 scaling, based on the output window. 245 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 246 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 247 /* Note also: 248 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 249 250 moi->v1srcstv = 0; 251 /* check for destination vertical clipping at top side */ 252 if (si->overlay.ow.v_start < crtc_vstart) 253 { 254 /* check if entire destination picture is clipping at top: 255 * (2 pixels will be clamped onscreen at least) */ 256 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 257 { 258 /* increase 'number of clipping pixels' with 'fixed value': 259 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 260 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor; 261 /* on pre-NV10 we need to do clipping in the source 262 * bitmap because no seperate clipping registers exist... */ 263 if (si->ps.card_arch < NV10A) 264 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 265 } 266 else 267 { 268 /* increase 'first contributing pixel' with: 269 * number of destination picture clipping pixels * inverse scaling factor */ 270 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor; 271 /* on pre-NV10 we need to do clipping in the source 272 * bitmap because no seperate clipping registers exist... */ 273 if (si->ps.card_arch < NV10A) 274 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 275 } 276 LOG(4,("Overlay: clipping at top...\n")); 277 } 278 /* take zoom into account */ 279 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16); 280 if (si->ps.card_arch < NV10A) 281 { 282 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row); 283 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv)); 284 } 285 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536)); 286 287 /* AND below is probably required by hardware. */ 288 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 289 moi->a1orgv &= 0xfffffff0; 290 } 291 292 static void eng_bes_program_move_overlay(move_overlay_info moi) 293 { 294 /************************************* 295 *** sync to BES (Back End Scaler) *** 296 *************************************/ 297 298 /* Done in card hardware: 299 * double buffered registers + trigger if programming complete feature. */ 300 301 302 /************************************** 303 *** actually program the registers *** 304 **************************************/ 305 306 if (si->ps.card_arch < NV10A) 307 { 308 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 309 BESW(NV04_OE_STATE, 0x00000000); 310 /* select buffer 0 as active (b16) */ 311 BESW(NV04_SU_STATE, 0x00000000); 312 /* unknown (no effect?) */ 313 BESW(NV04_RM_STATE, 0x00000000); 314 /* setup clipped(!) buffer startadress in RAM */ 315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 317 /* (program both buffers to prevent sync distortions) */ 318 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 323 /* setup output window position */ 324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 325 /* setup output window size */ 326 BESW(NV04_DSTSIZE, ( 327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 329 )); 330 /* select buffer 1 as active (b16) */ 331 BESW(NV04_SU_STATE, 0x00010000); 332 } 333 else 334 { 335 /* >= NV10A */ 336 337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 339 /* setup output window position */ 340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 341 /* setup output window size */ 342 BESW(NV10_0DSTSIZE, ( 343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 345 )); 346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 347 /* This also triggers activation of programmed values (double buffered registers feature) */ 348 BESW(NV10_BUFSEL, 0x00000001); 349 } 350 } 351 352 status_t eng_bes_to_crtc(bool crtc) 353 { 354 if (si->ps.secondary_head) 355 { 356 if (crtc) 357 { 358 LOG(4,("Overlay: switching overlay to CRTC2\n")); 359 /* switch overlay engine to CRTC2 */ 360 ENG_RG32(RG32_FUNCSEL) &= ~0x00001000; 361 ENG_RG32(RG32_2FUNCSEL) |= 0x00001000; 362 si->overlay.crtc = !si->crtc_switch_mode; 363 } 364 else 365 { 366 LOG(4,("Overlay: switching overlay to CRTC1\n")); 367 /* switch overlay engine to CRTC1 */ 368 ENG_RG32(RG32_2FUNCSEL) &= ~0x00001000; 369 ENG_RG32(RG32_FUNCSEL) |= 0x00001000; 370 si->overlay.crtc = si->crtc_switch_mode; 371 } 372 return B_OK; 373 } 374 else 375 { 376 return B_ERROR; 377 } 378 } 379 380 status_t eng_bes_init() 381 { 382 if (si->ps.card_arch < NV10A) 383 { 384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 385 BESW(NV04_INTE, 0x00000000); 386 387 /* setup saturation to be 'neutral' */ 388 BESW(NV04_SAT, 0x00000000); 389 /* setup RGB brightness to be 'neutral' */ 390 BESW(NV04_RED_AMP, 0x00000069); 391 BESW(NV04_GRN_AMP, 0x0000003e); 392 BESW(NV04_BLU_AMP, 0x00000089); 393 394 /* setup fifo for fetching data */ 395 BESW(NV04_FIFOBURL, 0x00000003); 396 BESW(NV04_FIFOTHRS, 0x00000038); 397 398 /* unknown, but needed (registers only have b0 implemented) */ 399 /* (program both buffers to prevent sync distortions) */ 400 BESW(NV04_0OFFSET, 0x00000000); 401 BESW(NV04_1OFFSET, 0x00000000); 402 } 403 else 404 { 405 /* >= NV10A */ 406 407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 408 BESW(NV10_INTE, 0x00000000); 409 /* shut off GeForce4MX MPEG2 decoder */ 410 BESW(DEC_GENCTRL, 0x00000000); 411 /* setup BES memory-range mask */ 412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1)); 413 /* unknown, but needed */ 414 BESW(NV10_0OFFSET, 0x00000000); 415 416 /* setup brightness, contrast and saturation to be 'neutral' */ 417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000)); 418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000)); 419 } 420 421 return B_OK; 422 } 423 424 status_t eng_configure_bes 425 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset) 426 { 427 /* yuy2 (4:2:2) colorspace calculations */ 428 429 /* Note: 430 * in BeOS R5.0.3 and DANO: 431 * 'ow->offset_xxx' is always 0, so not used; 432 * 'ow->width' and 'ow->height' are the output window size: does not change 433 * if window is clipping; 434 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output 435 * window. These values can be negative: this means the window is clipping 436 * at the left or the top of the display, respectively. */ 437 438 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually 439 * displayed on screen. This is used for the 'hardware zoom' function. */ 440 441 /* output window position and clipping info for source buffer */ 442 move_overlay_info moi; 443 /* calculated BES register values */ 444 uint32 hiscalv, viscalv; 445 /* interval representation, used for scaling calculations */ 446 uint16 intrep; 447 /* inverse scaling factor, used for source positioning */ 448 uint32 ifactor; 449 /* copy of overlay view which has checked valid values */ 450 overlay_view my_ov; 451 452 453 /************************************************************************************** 454 *** copy, check and limit if needed the user-specified view into the intput bitmap *** 455 **************************************************************************************/ 456 my_ov = *ov; 457 /* check for valid 'coordinates' */ 458 if (my_ov.width == 0) my_ov.width++; 459 if (my_ov.height == 0) my_ov.height++; 460 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 461 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1); 462 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 463 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1); 464 if (my_ov.v_start > (ob->height - 1)) 465 my_ov.v_start = (ob->height - 1); 466 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1)) 467 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1); 468 469 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n", 470 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height)); 471 472 /* save for eng_bes_calc_move_overlay() */ 473 si->overlay.ow = *ow; 474 si->overlay.ob = *ob; 475 si->overlay.my_ov = my_ov; 476 477 478 /******************************** 479 *** setup horizontal scaling *** 480 ********************************/ 481 LOG(4,("Overlay: total input picture width = %d, height = %d\n", 482 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height)); 483 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height)); 484 485 /* determine interval representation value, taking zoom into account */ 486 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) 487 { 488 /* horizontal filtering is ON */ 489 if ((my_ov.width == ow->width) | (ow->width < 2)) 490 { 491 /* no horizontal scaling used, OR destination width < 2 */ 492 intrep = 0; 493 } 494 else 495 { 496 intrep = 1; 497 } 498 } 499 else 500 { 501 /* horizontal filtering is OFF */ 502 if ((ow->width < my_ov.width) & (ow->width >= 2)) 503 { 504 /* horizontal downscaling used AND destination width >= 2 */ 505 intrep = 1; 506 } 507 else 508 { 509 intrep = 0; 510 } 511 } 512 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep)); 513 514 /* calculate inverse horizontal scaling factor, taking zoom into account */ 515 /* standard scaling formula: */ 516 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep); 517 518 /* correct factor to prevent most-right visible 'line' from distorting */ 519 ifactor -= (1 << 2); 520 hiscalv = ifactor; 521 /* save for eng_bes_calc_move_overlay() */ 522 si->overlay.h_ifactor = ifactor; 523 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor)); 524 525 /* check scaling factor (and modify if needed) to be within scaling limits */ 526 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 527 if (hiscalv < 0x00002000) 528 { 529 /* (non-inverse) factor too large, set factor to max. valid value */ 530 hiscalv = 0x00002000; 531 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv)); 532 } 533 switch (si->ps.card_arch) 534 { 535 case NV04A: 536 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 537 * (16bit register with 0.11 format value) */ 538 if (hiscalv > 0x0000ffff) 539 { 540 /* (non-inverse) factor too small, set factor to min. valid value */ 541 hiscalv = 0x0000ffff; 542 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5))); 543 } 544 break; 545 case NV30A: 546 case NV40A: 547 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */ 548 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31)) 549 { 550 /* (non-inverse) factor too small, set factor to min. valid value */ 551 hiscalv = (2 << 16); 552 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 553 } 554 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 555 * So let it fall through... */ 556 if (si->ps.card_type != NV31) break; 557 default: 558 /* the rest has a downscaling limit of 0.125 */ 559 if (hiscalv > (8 << 16)) 560 { 561 /* (non-inverse) factor too small, set factor to min. valid value */ 562 hiscalv = (8 << 16); 563 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 564 } 565 break; 566 } 567 /* AND below is required by hardware */ 568 hiscalv &= 0x001ffffc; 569 570 571 /****************************** 572 *** setup vertical scaling *** 573 ******************************/ 574 575 /* determine interval representation value, taking zoom into account */ 576 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING) 577 { 578 /* vertical filtering is ON */ 579 if ((my_ov.height == ow->height) | (ow->height < 2)) 580 { 581 /* no vertical scaling used, OR destination height < 2 */ 582 intrep = 0; 583 } 584 else 585 { 586 intrep = 1; 587 } 588 } 589 else 590 { 591 /* vertical filtering is OFF */ 592 if ((ow->height < my_ov.height) & (ow->height >= 2)) 593 { 594 /* vertical downscaling used AND destination height >= 2 */ 595 intrep = 1; 596 } 597 else 598 { 599 intrep = 0; 600 } 601 } 602 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep)); 603 604 /* calculate inverse vertical scaling factor, taking zoom into account */ 605 /* standard scaling formula: */ 606 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep); 607 608 /* correct factor to prevent lowest visible line from distorting */ 609 ifactor -= (1 << 2); 610 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor)); 611 612 /* preserve ifactor for source positioning calculations later on */ 613 viscalv = ifactor; 614 /* save for eng_bes_calc_move_overlay() */ 615 si->overlay.v_ifactor = ifactor; 616 617 /* check scaling factor (and modify if needed) to be within scaling limits */ 618 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 619 if (viscalv < 0x00002000) 620 { 621 /* (non-inverse) factor too large, set factor to max. valid value */ 622 viscalv = 0x00002000; 623 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv)); 624 } 625 switch (si->ps.card_arch) 626 { 627 case NV04A: 628 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 629 * (16bit register with 0.11 format value) */ 630 if (viscalv > 0x0000ffff) 631 { 632 /* (non-inverse) factor too small, set factor to min. valid value */ 633 viscalv = 0x0000ffff; 634 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5))); 635 } 636 break; 637 case NV30A: 638 case NV40A: 639 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */ 640 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31)) 641 { 642 /* (non-inverse) factor too small, set factor to min. valid value */ 643 viscalv = (2 << 16); 644 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 645 } 646 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 647 * So let it fall through... */ 648 if (si->ps.card_type != NV31) break; 649 default: 650 /* the rest has a downscaling limit of 0.125 */ 651 if (viscalv > (8 << 16)) 652 { 653 /* (non-inverse) factor too small, set factor to min. valid value */ 654 viscalv = (8 << 16); 655 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 656 } 657 break; 658 } 659 /* AND below is required by hardware */ 660 viscalv &= 0x001ffffc; 661 662 663 /******************************************************************************** 664 *** setup all edges of output window, setup horizontal and vertical clipping *** 665 ********************************************************************************/ 666 eng_bes_calc_move_overlay(&moi); 667 668 669 /***************************** 670 *** log color keying info *** 671 *****************************/ 672 673 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n", 674 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value)); 675 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n", 676 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask)); 677 678 679 /***************** 680 *** log flags *** 681 *****************/ 682 683 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags)); 684 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */ 685 686 687 /************************************* 688 *** sync to BES (Back End Scaler) *** 689 *************************************/ 690 691 /* Done in card hardware: 692 * double buffered registers + trigger if programming complete feature. */ 693 694 695 /************************************** 696 *** actually program the registers *** 697 **************************************/ 698 699 if (si->ps.card_arch < NV10A) 700 { 701 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 702 BESW(NV04_OE_STATE, 0x00000000); 703 /* select buffer 0 as active (b16) */ 704 BESW(NV04_SU_STATE, 0x00000000); 705 /* unknown (no effect?) */ 706 BESW(NV04_RM_STATE, 0x00000000); 707 /* setup clipped(!) buffer startadress in RAM */ 708 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 709 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 710 /* (program both buffers to prevent sync distortions) */ 711 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 712 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 713 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 714 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 715 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 716 /* setup buffer source pitch including slopspace (in bytes). 717 * Note: 718 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 719 /* (program both buffers to prevent sync distortions) */ 720 BESW(NV04_0SRCPTCH, (ob->width * 2)); 721 BESW(NV04_1SRCPTCH, (ob->width * 2)); 722 /* setup output window position */ 723 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 724 /* setup output window size */ 725 BESW(NV04_DSTSIZE, ( 726 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 727 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 728 )); 729 /* setup horizontal and vertical scaling */ 730 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5))); 731 /* enable vertical filtering (b0) */ 732 BESW(NV04_CTRL_V, 0x00000001); 733 /* enable horizontal filtering (no effect?) */ 734 BESW(NV04_CTRL_H, 0x00000111); 735 736 /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 737 BESW(NV04_GENCTRL, 0x00000111); 738 /* select buffer 1 as active (b16) */ 739 BESW(NV04_SU_STATE, 0x00010000); 740 741 /************************** 742 *** setup color keying *** 743 **************************/ 744 745 /* setup colorkeying */ 746 switch(si->dm.space) 747 { 748 case B_RGB15_LITTLE: 749 BESW(NV04_COLKEY, ( 750 ((ow->blue.value & ow->blue.mask) << 0) | 751 ((ow->green.value & ow->green.mask) << 5) | 752 ((ow->red.value & ow->red.mask) << 10) | 753 ((ow->alpha.value & ow->alpha.mask) << 15) 754 )); 755 break; 756 case B_RGB16_LITTLE: 757 BESW(NV04_COLKEY, ( 758 ((ow->blue.value & ow->blue.mask) << 0) | 759 ((ow->green.value & ow->green.mask) << 5) | 760 ((ow->red.value & ow->red.mask) << 11) 761 /* this space has no alpha bits */ 762 )); 763 break; 764 case B_CMAP8: 765 case B_RGB32_LITTLE: 766 default: 767 BESW(NV04_COLKEY, ( 768 ((ow->blue.value & ow->blue.mask) << 0) | 769 ((ow->green.value & ow->green.mask) << 8) | 770 ((ow->red.value & ow->red.mask) << 16) | 771 ((ow->alpha.value & ow->alpha.mask) << 24) 772 )); 773 break; 774 } 775 } 776 else 777 { 778 /* >= NV10A */ 779 780 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 781 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 782 /* setup buffersize */ 783 //fixme if needed: width must be even officially... 784 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 785 /* setup source pitch including slopspace (in bytes), 786 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */ 787 /* Note: 788 * source pitch granularity = 32 pixels on GeForce cards!! */ 789 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 790 /* setup output window position */ 791 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 792 /* setup output window size */ 793 BESW(NV10_0DSTSIZE, ( 794 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 795 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 796 )); 797 /* setup horizontal scaling */ 798 BESW(NV10_0ISCALH, (hiscalv << 4)); 799 /* setup vertical scaling */ 800 BESW(NV10_0ISCALV, (viscalv << 4)); 801 /* setup (unclipped!) buffer startadress in RAM */ 802 BESW(NV10_0BUFADR, moi.a1orgv); 803 /* enable BES (b0 = 0) */ 804 BESW(NV10_GENCTRL, 0x00000000); 805 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 806 /* This also triggers activation of programmed values (double buffered registers feature) */ 807 BESW(NV10_BUFSEL, 0x00000001); 808 809 /************************** 810 *** setup color keying *** 811 **************************/ 812 813 /* setup colorkeying */ 814 switch(si->dm.space) 815 { 816 case B_RGB15_LITTLE: 817 BESW(NV10_COLKEY, ( 818 ((ow->blue.value & ow->blue.mask) << 0) | 819 ((ow->green.value & ow->green.mask) << 5) | 820 ((ow->red.value & ow->red.mask) << 10) | 821 ((ow->alpha.value & ow->alpha.mask) << 15) 822 )); 823 break; 824 case B_RGB16_LITTLE: 825 BESW(NV10_COLKEY, ( 826 ((ow->blue.value & ow->blue.mask) << 0) | 827 ((ow->green.value & ow->green.mask) << 5) | 828 ((ow->red.value & ow->red.mask) << 11) 829 /* this space has no alpha bits */ 830 )); 831 break; 832 case B_CMAP8: 833 case B_RGB32_LITTLE: 834 default: 835 BESW(NV10_COLKEY, ( 836 ((ow->blue.value & ow->blue.mask) << 0) | 837 ((ow->green.value & ow->green.mask) << 8) | 838 ((ow->red.value & ow->red.mask) << 16) | 839 ((ow->alpha.value & ow->alpha.mask) << 24) 840 )); 841 break; 842 } 843 } 844 845 /* note that overlay is in use (for eng_bes_move_overlay()) */ 846 si->overlay.active = true; 847 848 return B_OK; 849 } 850 851 status_t eng_release_bes() 852 { 853 if (si->ps.card_arch < NV10A) 854 { 855 /* setup BES control: disable scaler (b0 = 0) */ 856 BESW(NV04_GENCTRL, 0x00000000); 857 } 858 else 859 { 860 /* setup BES control: disable scaler (b0 = 1) */ 861 BESW(NV10_GENCTRL, 0x00000001); 862 } 863 864 /* note that overlay is not in use (for eng_bes_move_overlay()) */ 865 si->overlay.active = false; 866 867 return B_OK; 868 } 869