1 /* Nvidia TNT and GeForce Back End Scaler functions */ 2 /* Written by Rudolf Cornelissen 05/2002-5/2009 */ 3 4 #define MODULE_BIT 0x00000200 5 6 #include "nv_std.h" 7 8 typedef struct move_overlay_info move_overlay_info; 9 10 struct move_overlay_info 11 { 12 uint32 hcoordv; /* left and right edges of video output window */ 13 uint32 vcoordv; /* top and bottom edges of video output window */ 14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */ 15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */ 16 uintptr_t a1orgv; /* alternate source clipping via startadress of source buffer */ 17 }; 18 19 static void nv_bes_calc_move_overlay(move_overlay_info *moi); 20 static void nv_bes_program_move_overlay(move_overlay_info moi); 21 22 /* move the overlay output window in virtualscreens */ 23 /* Note: 24 * si->dm.h_display_start and si->dm.v_display_start determine where the new 25 * output window is located! */ 26 void nv_bes_move_overlay() 27 { 28 move_overlay_info moi; 29 30 /* abort if overlay is not active */ 31 if (!si->overlay.active) return; 32 33 nv_bes_calc_move_overlay(&moi); 34 nv_bes_program_move_overlay(moi); 35 } 36 37 static void nv_bes_calc_move_overlay(move_overlay_info *moi) 38 { 39 /* misc used variables */ 40 uint16 temp1, temp2; 41 /* visible screen window in virtual workspaces */ 42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 43 44 /* do 'overlay follow head' in dualhead modes on dualhead cards */ 45 if (si->ps.secondary_head) 46 { 47 switch (si->dm.flags & DUALHEAD_BITS) 48 { 49 case DUALHEAD_ON: 50 case DUALHEAD_SWITCH: 51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) < 52 (si->dm.h_display_start + si->dm.timing.h_display)) 53 nv_bes_to_crtc(si->crtc_switch_mode); 54 else 55 nv_bes_to_crtc(!si->crtc_switch_mode); 56 break; 57 default: 58 nv_bes_to_crtc(si->crtc_switch_mode); 59 break; 60 } 61 } 62 63 /* the BES does not respect virtual_workspaces, but adheres to CRTC 64 * constraints only */ 65 crtc_hstart = si->dm.h_display_start; 66 /* make dualhead stretch and switch mode work while we're at it.. */ 67 if (si->overlay.crtc) 68 { 69 crtc_hstart += si->dm.timing.h_display; 70 } 71 72 /* horizontal end is the first position beyond the displayed range on the CRTC */ 73 crtc_hend = crtc_hstart + si->dm.timing.h_display; 74 crtc_vstart = si->dm.v_display_start; 75 /* vertical end is the first position beyond the displayed range on the CRTC */ 76 crtc_vend = crtc_vstart + si->dm.timing.v_display; 77 78 79 /**************************************** 80 *** setup all edges of output window *** 81 ****************************************/ 82 83 /* setup left and right edges of output window */ 84 moi->hcoordv = 0; 85 /* left edge coordinate of output window, must be inside desktop */ 86 /* clipping on the left side */ 87 if (si->overlay.ow.h_start < crtc_hstart) 88 { 89 temp1 = 0; 90 } 91 else 92 { 93 /* clipping on the right side */ 94 if (si->overlay.ow.h_start >= (crtc_hend - 1)) 95 { 96 /* width < 2 is not allowed */ 97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 98 } 99 else 100 /* no clipping here */ 101 { 102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff; 103 } 104 } 105 moi->hcoordv |= temp1 << 16; 106 /* right edge coordinate of output window, must be inside desktop */ 107 /* width < 2 is not allowed */ 108 if (si->overlay.ow.width < 2) 109 { 110 temp2 = (temp1 + 1) & 0x7ff; 111 } 112 else 113 { 114 /* clipping on the right side */ 115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1)) 116 { 117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 118 } 119 else 120 { 121 /* clipping on the left side */ 122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 123 { 124 /* width < 2 is not allowed */ 125 temp2 = 1; 126 } 127 else 128 /* no clipping here */ 129 { 130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff; 131 } 132 } 133 } 134 moi->hcoordv |= temp2 << 0; 135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 136 137 /* setup top and bottom edges of output window */ 138 moi->vcoordv = 0; 139 /* top edge coordinate of output window, must be inside desktop */ 140 /* clipping on the top side */ 141 if (si->overlay.ow.v_start < crtc_vstart) 142 { 143 temp1 = 0; 144 } 145 else 146 { 147 /* clipping on the bottom side */ 148 if (si->overlay.ow.v_start >= (crtc_vend - 1)) 149 { 150 /* height < 2 is not allowed */ 151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 152 } 153 else 154 /* no clipping here */ 155 { 156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff; 157 } 158 } 159 moi->vcoordv |= temp1 << 16; 160 /* bottom edge coordinate of output window, must be inside desktop */ 161 /* height < 2 is not allowed */ 162 if (si->overlay.ow.height < 2) 163 { 164 temp2 = (temp1 + 1) & 0x7ff; 165 } 166 else 167 { 168 /* clipping on the bottom side */ 169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1)) 170 { 171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 172 } 173 else 174 { 175 /* clipping on the top side */ 176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 177 { 178 /* height < 2 is not allowed */ 179 temp2 = 1; 180 } 181 else 182 /* no clipping here */ 183 { 184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff; 185 } 186 } 187 } 188 moi->vcoordv |= temp2 << 0; 189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 190 191 192 /********************************* 193 *** setup horizontal clipping *** 194 *********************************/ 195 196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 197 /* Note: 198 * The method is to calculate, based on 1:1 scaling, based on the output window. 199 * After this is done, include the scaling factor so you get a value based on the input bitmap. 200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 202 /* Note also: 203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 204 moi->hsrcstv = 0; 205 /* check for destination horizontal clipping at left side */ 206 if (si->overlay.ow.h_start < crtc_hstart) 207 { 208 /* check if entire destination picture is clipping left: 209 * (2 pixels will be clamped onscreen at least) */ 210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 211 { 212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 213 moi->hsrcstv += (si->overlay.ow.width - 2); 214 } 215 else 216 { 217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start); 219 } 220 LOG(4,("Overlay: clipping left...\n")); 221 222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 223 * Note that this also already takes care of aligning the value to the BES register! */ 224 moi->hsrcstv *= si->overlay.h_ifactor; 225 } 226 /* take zoom into account */ 227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16; 228 /* AND below required by hardware (> 1024 support confirmed on all cards) */ 229 moi->hsrcstv &= 0x07fffffc; 230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536)); 231 232 233 /******************************* 234 *** setup vertical clipping *** 235 *******************************/ 236 237 /* calculate inputbitmap origin adress */ 238 moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer); 239 moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer); 240 LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv)); 241 242 /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 243 /* Note: 244 * The method is to calculate, based on 1:1 scaling, based on the output window. 245 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 246 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 247 /* Note also: 248 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 249 250 moi->v1srcstv = 0; 251 /* check for destination vertical clipping at top side */ 252 if (si->overlay.ow.v_start < crtc_vstart) 253 { 254 /* check if entire destination picture is clipping at top: 255 * (2 pixels will be clamped onscreen at least) */ 256 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 257 { 258 /* increase 'number of clipping pixels' with 'fixed value': 259 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 260 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor; 261 /* on pre-NV10 we need to do clipping in the source 262 * bitmap because no seperate clipping registers exist... */ 263 if (si->ps.card_arch < NV10A) 264 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 265 } 266 else 267 { 268 /* increase 'first contributing pixel' with: 269 * number of destination picture clipping pixels * inverse scaling factor */ 270 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor; 271 /* on pre-NV10 we need to do clipping in the source 272 * bitmap because no seperate clipping registers exist... */ 273 if (si->ps.card_arch < NV10A) 274 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 275 } 276 LOG(4,("Overlay: clipping at top...\n")); 277 } 278 /* take zoom into account */ 279 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16); 280 if (si->ps.card_arch < NV10A) 281 { 282 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row); 283 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv)); 284 } 285 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536)); 286 287 /* AND below is probably required by hardware. */ 288 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 289 moi->a1orgv &= 0xfffffff0; 290 } 291 292 static void nv_bes_program_move_overlay(move_overlay_info moi) 293 { 294 /************************************* 295 *** sync to BES (Back End Scaler) *** 296 *************************************/ 297 298 /* Done in card hardware: 299 * double buffered registers + trigger if programming complete feature. */ 300 301 302 /************************************** 303 *** actually program the registers *** 304 **************************************/ 305 306 if (si->ps.card_arch < NV10A) 307 { 308 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 309 BESW(NV04_OE_STATE, 0x00000000); 310 /* select buffer 0 as active (b16) */ 311 BESW(NV04_SU_STATE, 0x00000000); 312 /* unknown (no effect?) */ 313 BESW(NV04_RM_STATE, 0x00000000); 314 /* setup clipped(!) buffer startadress in RAM */ 315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 317 /* (program both buffers to prevent sync distortions) */ 318 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 323 /* setup output window position */ 324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 325 /* setup output window size */ 326 BESW(NV04_DSTSIZE, ( 327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 329 )); 330 /* select buffer 1 as active (b16) */ 331 BESW(NV04_SU_STATE, 0x00010000); 332 } 333 else 334 { 335 /* >= NV10A */ 336 337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 339 /* setup output window position */ 340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 341 /* setup output window size */ 342 BESW(NV10_0DSTSIZE, ( 343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 345 )); 346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 347 /* This also triggers activation of programmed values (double buffered registers feature) */ 348 BESW(NV10_BUFSEL, 0x00000001); 349 } 350 } 351 352 status_t nv_bes_to_crtc(bool crtc) 353 { 354 if (si->ps.secondary_head) 355 { 356 if (crtc) 357 { 358 LOG(4,("Overlay: switching overlay to CRTC2\n")); 359 /* switch overlay engine to CRTC2 */ 360 NV_REG32(NV32_FUNCSEL) &= ~0x00001000; 361 NV_REG32(NV32_2FUNCSEL) |= 0x00001000; 362 si->overlay.crtc = !si->crtc_switch_mode; 363 } 364 else 365 { 366 LOG(4,("Overlay: switching overlay to CRTC1\n")); 367 /* switch overlay engine to CRTC1 */ 368 NV_REG32(NV32_2FUNCSEL) &= ~0x00001000; 369 NV_REG32(NV32_FUNCSEL) |= 0x00001000; 370 si->overlay.crtc = si->crtc_switch_mode; 371 } 372 return B_OK; 373 } 374 else 375 { 376 return B_ERROR; 377 } 378 } 379 380 status_t nv_bes_init() 381 { 382 if (si->ps.card_arch < NV10A) 383 { 384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 385 BESW(NV04_INTE, 0x00000000); 386 387 /* setup saturation to be 'neutral' */ 388 BESW(NV04_SAT, 0x00000000); 389 /* setup RGB brightness to be 'neutral' */ 390 BESW(NV04_RED_AMP, 0x00000069); 391 BESW(NV04_GRN_AMP, 0x0000003e); 392 BESW(NV04_BLU_AMP, 0x00000089); 393 394 /* setup fifo for fetching data */ 395 BESW(NV04_FIFOBURL, 0x00000003); 396 BESW(NV04_FIFOTHRS, 0x00000038); 397 398 /* unknown, but needed (registers only have b0 implemented) */ 399 /* (program both buffers to prevent sync distortions) */ 400 BESW(NV04_0OFFSET, 0x00000000); 401 BESW(NV04_1OFFSET, 0x00000000); 402 } 403 else 404 { 405 /* >= NV10A */ 406 407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 408 BESW(NV10_INTE, 0x00000000); 409 /* shut off GeForce4MX MPEG2 decoder */ 410 BESW(DEC_GENCTRL, 0x00000000); 411 /* setup BES memory-range mask */ 412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1)); 413 /* unknown, but needed */ 414 BESW(NV10_0OFFSET, 0x00000000); 415 416 /* setup brightness, contrast and saturation to be 'neutral' */ 417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000)); 418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000)); 419 } 420 421 /* make sure the engine is disabled. */ 422 nv_release_bes(); 423 424 return B_OK; 425 } 426 427 status_t nv_configure_bes 428 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset) 429 { 430 /* yuy2 (4:2:2) colorspace calculations */ 431 432 /* Note: 433 * in BeOS R5.0.3 and DANO: 434 * 'ow->offset_xxx' is always 0, so not used; 435 * 'ow->width' and 'ow->height' are the output window size: does not change 436 * if window is clipping; 437 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output 438 * window. These values can be negative: this means the window is clipping 439 * at the left or the top of the display, respectively. */ 440 441 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually 442 * displayed on screen. This is used for the 'hardware zoom' function. */ 443 444 /* output window position and clipping info for source buffer */ 445 move_overlay_info moi; 446 /* calculated BES register values */ 447 uint32 hiscalv, viscalv; 448 /* interval representation, used for scaling calculations */ 449 uint16 intrep; 450 /* inverse scaling factor, used for source positioning */ 451 uint32 ifactor; 452 /* copy of overlay view which has checked valid values */ 453 overlay_view my_ov; 454 455 456 /************************************************************************************** 457 *** copy, check and limit if needed the user-specified view into the intput bitmap *** 458 **************************************************************************************/ 459 my_ov = *ov; 460 /* check for valid 'coordinates' */ 461 if (my_ov.width == 0) my_ov.width++; 462 if (my_ov.height == 0) my_ov.height++; 463 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 464 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1); 465 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 466 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1); 467 if (my_ov.v_start > (ob->height - 1)) 468 my_ov.v_start = (ob->height - 1); 469 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1)) 470 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1); 471 472 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n", 473 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height)); 474 475 /* save for nv_bes_calc_move_overlay() */ 476 si->overlay.ow = *ow; 477 si->overlay.ob = *ob; 478 si->overlay.my_ov = my_ov; 479 480 481 /******************************** 482 *** setup horizontal scaling *** 483 ********************************/ 484 LOG(4,("Overlay: total input picture width = %d, height = %d\n", 485 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height)); 486 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height)); 487 488 /* determine interval representation value, taking zoom into account */ 489 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) 490 { 491 /* horizontal filtering is ON */ 492 if ((my_ov.width == ow->width) | (ow->width < 2)) 493 { 494 /* no horizontal scaling used, OR destination width < 2 */ 495 intrep = 0; 496 } 497 else 498 { 499 intrep = 1; 500 } 501 } 502 else 503 { 504 /* horizontal filtering is OFF */ 505 if ((ow->width < my_ov.width) & (ow->width >= 2)) 506 { 507 /* horizontal downscaling used AND destination width >= 2 */ 508 intrep = 1; 509 } 510 else 511 { 512 intrep = 0; 513 } 514 } 515 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep)); 516 517 /* calculate inverse horizontal scaling factor, taking zoom into account */ 518 /* standard scaling formula: */ 519 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep); 520 521 /* correct factor to prevent most-right visible 'line' from distorting */ 522 ifactor -= (1 << 2); 523 hiscalv = ifactor; 524 /* save for nv_bes_calc_move_overlay() */ 525 si->overlay.h_ifactor = ifactor; 526 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor)); 527 528 /* check scaling factor (and modify if needed) to be within scaling limits */ 529 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 530 if (hiscalv < 0x00002000) 531 { 532 /* (non-inverse) factor too large, set factor to max. valid value */ 533 hiscalv = 0x00002000; 534 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv)); 535 } 536 switch (si->ps.card_arch) 537 { 538 case NV04A: 539 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 540 * (16bit register with 0.11 format value) */ 541 if (hiscalv > 0x0000ffff) 542 { 543 /* (non-inverse) factor too small, set factor to min. valid value */ 544 hiscalv = 0x0000ffff; 545 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5))); 546 } 547 break; 548 case NV30A: 549 case NV40A: 550 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */ 551 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31)) 552 { 553 /* (non-inverse) factor too small, set factor to min. valid value */ 554 hiscalv = (2 << 16); 555 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 556 } 557 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 558 * So let it fall through... */ 559 if (si->ps.card_type != NV31) break; 560 default: 561 /* the rest has a downscaling limit of 0.125 */ 562 if (hiscalv > (8 << 16)) 563 { 564 /* (non-inverse) factor too small, set factor to min. valid value */ 565 hiscalv = (8 << 16); 566 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 567 } 568 break; 569 } 570 /* AND below is required by hardware */ 571 hiscalv &= 0x001ffffc; 572 573 574 /****************************** 575 *** setup vertical scaling *** 576 ******************************/ 577 578 /* determine interval representation value, taking zoom into account */ 579 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING) 580 { 581 /* vertical filtering is ON */ 582 if ((my_ov.height == ow->height) | (ow->height < 2)) 583 { 584 /* no vertical scaling used, OR destination height < 2 */ 585 intrep = 0; 586 } 587 else 588 { 589 intrep = 1; 590 } 591 } 592 else 593 { 594 /* vertical filtering is OFF */ 595 if ((ow->height < my_ov.height) & (ow->height >= 2)) 596 { 597 /* vertical downscaling used AND destination height >= 2 */ 598 intrep = 1; 599 } 600 else 601 { 602 intrep = 0; 603 } 604 } 605 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep)); 606 607 /* calculate inverse vertical scaling factor, taking zoom into account */ 608 /* standard scaling formula: */ 609 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep); 610 611 /* correct factor to prevent lowest visible line from distorting */ 612 ifactor -= (1 << 2); 613 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor)); 614 615 /* preserve ifactor for source positioning calculations later on */ 616 viscalv = ifactor; 617 /* save for nv_bes_calc_move_overlay() */ 618 si->overlay.v_ifactor = ifactor; 619 620 /* check scaling factor (and modify if needed) to be within scaling limits */ 621 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 622 if (viscalv < 0x00002000) 623 { 624 /* (non-inverse) factor too large, set factor to max. valid value */ 625 viscalv = 0x00002000; 626 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv)); 627 } 628 switch (si->ps.card_arch) 629 { 630 case NV04A: 631 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 632 * (16bit register with 0.11 format value) */ 633 if (viscalv > 0x0000ffff) 634 { 635 /* (non-inverse) factor too small, set factor to min. valid value */ 636 viscalv = 0x0000ffff; 637 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5))); 638 } 639 break; 640 case NV30A: 641 case NV40A: 642 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */ 643 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31)) 644 { 645 /* (non-inverse) factor too small, set factor to min. valid value */ 646 viscalv = (2 << 16); 647 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 648 } 649 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 650 * So let it fall through... */ 651 if (si->ps.card_type != NV31) break; 652 default: 653 /* the rest has a downscaling limit of 0.125 */ 654 if (viscalv > (8 << 16)) 655 { 656 /* (non-inverse) factor too small, set factor to min. valid value */ 657 viscalv = (8 << 16); 658 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 659 } 660 break; 661 } 662 /* AND below is required by hardware */ 663 viscalv &= 0x001ffffc; 664 665 666 /******************************************************************************** 667 *** setup all edges of output window, setup horizontal and vertical clipping *** 668 ********************************************************************************/ 669 nv_bes_calc_move_overlay(&moi); 670 671 672 /***************************** 673 *** log color keying info *** 674 *****************************/ 675 676 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n", 677 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value)); 678 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n", 679 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask)); 680 681 682 /***************** 683 *** log flags *** 684 *****************/ 685 686 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags)); 687 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */ 688 689 690 /************************************* 691 *** sync to BES (Back End Scaler) *** 692 *************************************/ 693 694 /* Done in card hardware: 695 * double buffered registers + trigger if programming complete feature. */ 696 697 698 /************************************** 699 *** actually program the registers *** 700 **************************************/ 701 702 if (si->ps.card_arch < NV10A) 703 { 704 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 705 BESW(NV04_OE_STATE, 0x00000000); 706 /* select buffer 0 as active (b16) */ 707 BESW(NV04_SU_STATE, 0x00000000); 708 /* unknown (no effect?) */ 709 BESW(NV04_RM_STATE, 0x00000000); 710 /* setup clipped(!) buffer startadress in RAM */ 711 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 712 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 713 /* (program both buffers to prevent sync distortions) */ 714 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 715 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 716 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 717 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 718 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 719 /* setup buffer source pitch including slopspace (in bytes). 720 * Note: 721 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 722 /* (program both buffers to prevent sync distortions) */ 723 BESW(NV04_0SRCPTCH, (ob->width * 2)); 724 BESW(NV04_1SRCPTCH, (ob->width * 2)); 725 /* setup output window position */ 726 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 727 /* setup output window size */ 728 BESW(NV04_DSTSIZE, ( 729 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 730 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 731 )); 732 /* setup horizontal and vertical scaling */ 733 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5))); 734 /* enable vertical filtering (b0) */ 735 BESW(NV04_CTRL_V, 0x00000001); 736 /* enable horizontal filtering (no effect?) */ 737 BESW(NV04_CTRL_H, 0x00000111); 738 /* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 739 if (ow->flags & B_OVERLAY_COLOR_KEY) 740 BESW(NV04_GENCTRL, 0x00000111); 741 else 742 BESW(NV04_GENCTRL, 0x00000101); 743 /* select buffer 1 as active (b16) */ 744 BESW(NV04_SU_STATE, 0x00010000); 745 746 /************************** 747 *** setup color keying *** 748 **************************/ 749 750 /* setup colorkeying */ 751 switch(si->dm.space) 752 { 753 case B_RGB15_LITTLE: 754 BESW(NV04_COLKEY, ( 755 ((ow->blue.value & ow->blue.mask) << 0) | 756 ((ow->green.value & ow->green.mask) << 5) | 757 ((ow->red.value & ow->red.mask) << 10) | 758 ((ow->alpha.value & ow->alpha.mask) << 15) 759 )); 760 break; 761 case B_RGB16_LITTLE: 762 BESW(NV04_COLKEY, ( 763 ((ow->blue.value & ow->blue.mask) << 0) | 764 ((ow->green.value & ow->green.mask) << 5) | 765 ((ow->red.value & ow->red.mask) << 11) 766 /* this space has no alpha bits */ 767 )); 768 break; 769 case B_CMAP8: 770 case B_RGB32_LITTLE: 771 default: 772 BESW(NV04_COLKEY, ( 773 ((ow->blue.value & ow->blue.mask) << 0) | 774 ((ow->green.value & ow->green.mask) << 8) | 775 ((ow->red.value & ow->red.mask) << 16) | 776 ((ow->alpha.value & ow->alpha.mask) << 24) 777 )); 778 break; 779 } 780 } 781 else 782 { 783 /* >= NV10A */ 784 785 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 786 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 787 /* setup buffersize */ 788 //fixme if needed: width must be even officially... 789 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 790 /* setup source pitch including slopspace (in bytes), 791 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */ 792 /* Note: 793 * source pitch granularity = 32 pixels on GeForce cards!! */ 794 if (ow->flags & B_OVERLAY_COLOR_KEY) 795 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 796 else 797 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24))); 798 /* setup output window position */ 799 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 800 /* setup output window size */ 801 BESW(NV10_0DSTSIZE, ( 802 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 803 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 804 )); 805 /* setup horizontal scaling */ 806 BESW(NV10_0ISCALH, (hiscalv << 4)); 807 /* setup vertical scaling */ 808 BESW(NV10_0ISCALV, (viscalv << 4)); 809 /* setup (unclipped!) buffer startadress in RAM */ 810 BESW(NV10_0BUFADR, moi.a1orgv); 811 /* enable BES (b0 = 0) */ 812 BESW(NV10_GENCTRL, 0x00000000); 813 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 814 /* This also triggers activation of programmed values (double buffered registers feature) */ 815 BESW(NV10_BUFSEL, 0x00000001); 816 817 /************************** 818 *** setup color keying *** 819 **************************/ 820 821 /* setup colorkeying */ 822 switch(si->dm.space) 823 { 824 case B_RGB15_LITTLE: 825 BESW(NV10_COLKEY, ( 826 ((ow->blue.value & ow->blue.mask) << 0) | 827 ((ow->green.value & ow->green.mask) << 5) | 828 ((ow->red.value & ow->red.mask) << 10) | 829 ((ow->alpha.value & ow->alpha.mask) << 15) 830 )); 831 break; 832 case B_RGB16_LITTLE: 833 BESW(NV10_COLKEY, ( 834 ((ow->blue.value & ow->blue.mask) << 0) | 835 ((ow->green.value & ow->green.mask) << 5) | 836 ((ow->red.value & ow->red.mask) << 11) 837 /* this space has no alpha bits */ 838 )); 839 break; 840 case B_CMAP8: 841 case B_RGB32_LITTLE: 842 default: 843 BESW(NV10_COLKEY, ( 844 ((ow->blue.value & ow->blue.mask) << 0) | 845 ((ow->green.value & ow->green.mask) << 8) | 846 ((ow->red.value & ow->red.mask) << 16) | 847 ((ow->alpha.value & ow->alpha.mask) << 24) 848 )); 849 break; 850 } 851 } 852 853 /* note that overlay is in use (for nv_bes_move_overlay()) */ 854 si->overlay.active = true; 855 856 return B_OK; 857 } 858 859 status_t nv_release_bes() 860 { 861 if (si->ps.card_arch < NV10A) 862 { 863 /* setup BES control: disable scaler (b0 = 0) */ 864 BESW(NV04_GENCTRL, 0x00000000); 865 } 866 else 867 { 868 /* setup BES control: disable scaler (b0 = 1) */ 869 BESW(NV10_GENCTRL, 0x00000001); 870 } 871 872 /* note that overlay is not in use (for nv_bes_move_overlay()) */ 873 si->overlay.active = false; 874 875 return B_OK; 876 } 877