1 /* Nvidia TNT and GeForce Back End Scaler functions */ 2 /* Written by Rudolf Cornelissen 05/2002-7/2004 */ 3 4 #define MODULE_BIT 0x00000200 5 6 #include "nv_std.h" 7 8 typedef struct move_overlay_info move_overlay_info; 9 10 struct move_overlay_info 11 { 12 uint32 hcoordv; /* left and right edges of video output window */ 13 uint32 vcoordv; /* top and bottom edges of video output window */ 14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */ 15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */ 16 uint32 a1orgv; /* alternate source clipping via startadress of source buffer */ 17 }; 18 19 static void nv_bes_calc_move_overlay(move_overlay_info *moi); 20 static void nv_bes_program_move_overlay(move_overlay_info moi); 21 22 /* move the overlay output window in virtualscreens */ 23 /* Note: 24 * si->dm.h_display_start and si->dm.v_display_start determine where the new 25 * output window is located! */ 26 void nv_bes_move_overlay() 27 { 28 move_overlay_info moi; 29 30 /* abort if overlay is not active */ 31 if (!si->overlay.active) return; 32 33 nv_bes_calc_move_overlay(&moi); 34 nv_bes_program_move_overlay(moi); 35 } 36 37 static void nv_bes_calc_move_overlay(move_overlay_info *moi) 38 { 39 /* misc used variables */ 40 uint16 temp1, temp2; 41 /* visible screen window in virtual workspaces */ 42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 43 44 /* do 'overlay follow head' in dualhead modes on dualhead cards */ 45 if (si->ps.secondary_head) 46 { 47 switch (si->dm.flags & DUALHEAD_BITS) 48 { 49 case DUALHEAD_ON: 50 case DUALHEAD_SWITCH: 51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) < 52 (si->dm.h_display_start + si->dm.timing.h_display)) 53 nv_bes_to_crtc(si->crtc_switch_mode); 54 else 55 nv_bes_to_crtc(!si->crtc_switch_mode); 56 break; 57 default: 58 nv_bes_to_crtc(si->crtc_switch_mode); 59 break; 60 } 61 } 62 63 /* the BES does not respect virtual_workspaces, but adheres to CRTC 64 * constraints only */ 65 crtc_hstart = si->dm.h_display_start; 66 /* make dualhead stretch and switch mode work while we're at it.. */ 67 if (si->overlay.crtc) 68 { 69 crtc_hstart += si->dm.timing.h_display; 70 } 71 72 /* horizontal end is the first position beyond the displayed range on the CRTC */ 73 crtc_hend = crtc_hstart + si->dm.timing.h_display; 74 crtc_vstart = si->dm.v_display_start; 75 /* vertical end is the first position beyond the displayed range on the CRTC */ 76 crtc_vend = crtc_vstart + si->dm.timing.v_display; 77 78 79 /**************************************** 80 *** setup all edges of output window *** 81 ****************************************/ 82 83 /* setup left and right edges of output window */ 84 moi->hcoordv = 0; 85 /* left edge coordinate of output window, must be inside desktop */ 86 /* clipping on the left side */ 87 if (si->overlay.ow.h_start < crtc_hstart) 88 { 89 temp1 = 0; 90 } 91 else 92 { 93 /* clipping on the right side */ 94 if (si->overlay.ow.h_start >= (crtc_hend - 1)) 95 { 96 /* width < 2 is not allowed */ 97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 98 } 99 else 100 /* no clipping here */ 101 { 102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff; 103 } 104 } 105 moi->hcoordv |= temp1 << 16; 106 /* right edge coordinate of output window, must be inside desktop */ 107 /* width < 2 is not allowed */ 108 if (si->overlay.ow.width < 2) 109 { 110 temp2 = (temp1 + 1) & 0x7ff; 111 } 112 else 113 { 114 /* clipping on the right side */ 115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1)) 116 { 117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 118 } 119 else 120 { 121 /* clipping on the left side */ 122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 123 { 124 /* width < 2 is not allowed */ 125 temp2 = 1; 126 } 127 else 128 /* no clipping here */ 129 { 130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff; 131 } 132 } 133 } 134 moi->hcoordv |= temp2 << 0; 135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 136 137 /* setup top and bottom edges of output window */ 138 moi->vcoordv = 0; 139 /* top edge coordinate of output window, must be inside desktop */ 140 /* clipping on the top side */ 141 if (si->overlay.ow.v_start < crtc_vstart) 142 { 143 temp1 = 0; 144 } 145 else 146 { 147 /* clipping on the bottom side */ 148 if (si->overlay.ow.v_start >= (crtc_vend - 1)) 149 { 150 /* height < 2 is not allowed */ 151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 152 } 153 else 154 /* no clipping here */ 155 { 156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff; 157 } 158 } 159 moi->vcoordv |= temp1 << 16; 160 /* bottom edge coordinate of output window, must be inside desktop */ 161 /* height < 2 is not allowed */ 162 if (si->overlay.ow.height < 2) 163 { 164 temp2 = (temp1 + 1) & 0x7ff; 165 } 166 else 167 { 168 /* clipping on the bottom side */ 169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1)) 170 { 171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 172 } 173 else 174 { 175 /* clipping on the top side */ 176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 177 { 178 /* height < 2 is not allowed */ 179 temp2 = 1; 180 } 181 else 182 /* no clipping here */ 183 { 184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff; 185 } 186 } 187 } 188 moi->vcoordv |= temp2 << 0; 189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 190 191 192 /********************************* 193 *** setup horizontal clipping *** 194 *********************************/ 195 196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 197 /* Note: 198 * The method is to calculate, based on 1:1 scaling, based on the output window. 199 * After this is done, include the scaling factor so you get a value based on the input bitmap. 200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 202 /* Note also: 203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 204 moi->hsrcstv = 0; 205 /* check for destination horizontal clipping at left side */ 206 if (si->overlay.ow.h_start < crtc_hstart) 207 { 208 /* check if entire destination picture is clipping left: 209 * (2 pixels will be clamped onscreen at least) */ 210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 211 { 212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 213 moi->hsrcstv += (si->overlay.ow.width - 2); 214 } 215 else 216 { 217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start); 219 } 220 LOG(4,("Overlay: clipping left...\n")); 221 222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 223 * Note that this also already takes care of aligning the value to the BES register! */ 224 moi->hsrcstv *= si->overlay.h_ifactor; 225 } 226 /* take zoom into account */ 227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16; 228 /* AND below required by hardware */ 229 moi->hsrcstv &= 0x03fffffc; 230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536)); 231 232 233 /******************************* 234 *** setup vertical clipping *** 235 *******************************/ 236 237 /* calculate inputbitmap origin adress */ 238 moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer); 239 moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer); 240 241 /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 242 /* Note: 243 * The method is to calculate, based on 1:1 scaling, based on the output window. 244 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 245 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 246 /* Note also: 247 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 248 249 moi->v1srcstv = 0; 250 /* check for destination vertical clipping at top side */ 251 if (si->overlay.ow.v_start < crtc_vstart) 252 { 253 /* check if entire destination picture is clipping at top: 254 * (2 pixels will be clamped onscreen at least) */ 255 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 256 { 257 /* increase 'number of clipping pixels' with 'fixed value': 258 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 259 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor; 260 /* on pre-NV10 we need to do clipping in the source 261 * bitmap because no seperate clipping registers exist... */ 262 if (si->ps.card_arch < NV10A) 263 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 264 } 265 else 266 { 267 /* increase 'first contributing pixel' with: 268 * number of destination picture clipping pixels * inverse scaling factor */ 269 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor; 270 /* on pre-NV10 we need to do clipping in the source 271 * bitmap because no seperate clipping registers exist... */ 272 if (si->ps.card_arch < NV10A) 273 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 274 } 275 LOG(4,("Overlay: clipping at top...\n")); 276 } 277 /* take zoom into account */ 278 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16); 279 if (si->ps.card_arch < NV10A) 280 { 281 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row); 282 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv)); 283 } 284 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536)); 285 286 /* AND below is probably required by hardware. */ 287 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 288 moi->a1orgv &= 0xfffffff0; 289 LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv)); 290 } 291 292 static void nv_bes_program_move_overlay(move_overlay_info moi) 293 { 294 /************************************* 295 *** sync to BES (Back End Scaler) *** 296 *************************************/ 297 298 /* Done in card hardware: 299 * double buffered registers + trigger if programming complete feature. */ 300 301 302 /************************************** 303 *** actually program the registers *** 304 **************************************/ 305 306 if (si->ps.card_arch < NV10A) 307 { 308 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 309 BESW(NV04_OE_STATE, 0x00000000); 310 /* select buffer 0 as active (b16) */ 311 BESW(NV04_SU_STATE, 0x00000000); 312 /* unknown (no effect?) */ 313 BESW(NV04_RM_STATE, 0x00000000); 314 /* setup clipped(!) buffer startadress in RAM */ 315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 317 /* (program both buffers to prevent sync distortions) */ 318 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 323 /* setup output window position */ 324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 325 /* setup output window size */ 326 BESW(NV04_DSTSIZE, ( 327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 329 )); 330 /* select buffer 1 as active (b16) */ 331 BESW(NV04_SU_STATE, 0x00010000); 332 } 333 else 334 { 335 /* >= NV10A */ 336 337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 339 /* setup output window position */ 340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 341 /* setup output window size */ 342 BESW(NV10_0DSTSIZE, ( 343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 345 )); 346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 347 /* This also triggers activation of programmed values (double buffered registers feature) */ 348 BESW(NV10_BUFSEL, 0x00000001); 349 } 350 } 351 352 status_t nv_bes_to_crtc(bool crtc) 353 { 354 if (si->ps.secondary_head) 355 { 356 if (crtc) 357 { 358 LOG(4,("Overlay: switching overlay to CRTC2\n")); 359 /* switch overlay engine to CRTC2 */ 360 NV_REG32(NV32_FUNCSEL) &= ~0x00001000; 361 NV_REG32(NV32_2FUNCSEL) |= 0x00001000; 362 si->overlay.crtc = !si->crtc_switch_mode; 363 } 364 else 365 { 366 LOG(4,("Overlay: switching overlay to CRTC1\n")); 367 /* switch overlay engine to CRTC1 */ 368 NV_REG32(NV32_2FUNCSEL) &= ~0x00001000; 369 NV_REG32(NV32_FUNCSEL) |= 0x00001000; 370 si->overlay.crtc = si->crtc_switch_mode; 371 } 372 return B_OK; 373 } 374 else 375 { 376 return B_ERROR; 377 } 378 } 379 380 status_t nv_bes_init() 381 { 382 if (si->ps.card_arch < NV10A) 383 { 384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 385 BESW(NV04_INTE, 0x00000000); 386 387 /* setup saturation to be 'neutral' */ 388 BESW(NV04_SAT, 0x00000000); 389 /* setup RGB brightness to be 'neutral' */ 390 BESW(NV04_RED_AMP, 0x00000069); 391 BESW(NV04_GRN_AMP, 0x0000003e); 392 BESW(NV04_BLU_AMP, 0x00000089); 393 394 /* setup fifo for fetching data */ 395 BESW(NV04_FIFOBURL, 0x00000003); 396 BESW(NV04_FIFOTHRS, 0x00000038); 397 398 /* unknown, but needed (registers only have b0 implemented) */ 399 /* (program both buffers to prevent sync distortions) */ 400 BESW(NV04_0OFFSET, 0x00000000); 401 BESW(NV04_1OFFSET, 0x00000000); 402 } 403 else 404 { 405 /* >= NV10A */ 406 407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 408 BESW(NV10_INTE, 0x00000000); 409 /* shut off GeForce4MX MPEG2 decoder */ 410 BESW(DEC_GENCTRL, 0x00000000); 411 /* setup BES memory-range mask */ 412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1)); 413 /* unknown, but needed */ 414 BESW(NV10_0OFFSET, 0x00000000); 415 416 /* setup brightness, contrast and saturation to be 'neutral' */ 417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000)); 418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000)); 419 } 420 421 return B_OK; 422 } 423 424 status_t nv_configure_bes 425 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset) 426 { 427 /* yuy2 (4:2:2) colorspace calculations */ 428 429 /* Note: 430 * in BeOS R5.0.3 and DANO: 431 * 'ow->offset_xxx' is always 0, so not used; 432 * 'ow->width' and 'ow->height' are the output window size: does not change 433 * if window is clipping; 434 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output 435 * window. These values can be negative: this means the window is clipping 436 * at the left or the top of the display, respectively. */ 437 438 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually 439 * displayed on screen. This is used for the 'hardware zoom' function. */ 440 441 /* output window position and clipping info for source buffer */ 442 move_overlay_info moi; 443 /* calculated BES register values */ 444 uint32 hiscalv, viscalv; 445 /* interval representation, used for scaling calculations */ 446 uint16 intrep; 447 /* inverse scaling factor, used for source positioning */ 448 uint32 ifactor; 449 /* copy of overlay view which has checked valid values */ 450 overlay_view my_ov; 451 452 453 /************************************************************************************** 454 *** copy, check and limit if needed the user-specified view into the intput bitmap *** 455 **************************************************************************************/ 456 my_ov = *ov; 457 /* check for valid 'coordinates' */ 458 if (my_ov.width == 0) my_ov.width++; 459 if (my_ov.height == 0) my_ov.height++; 460 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 461 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1); 462 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 463 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1); 464 if (my_ov.v_start > (ob->height - 1)) 465 my_ov.v_start = (ob->height - 1); 466 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1)) 467 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1); 468 469 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n", 470 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height)); 471 472 /* save for nv_bes_calc_move_overlay() */ 473 si->overlay.ow = *ow; 474 si->overlay.ob = *ob; 475 si->overlay.my_ov = my_ov; 476 477 478 /******************************** 479 *** setup horizontal scaling *** 480 ********************************/ 481 LOG(4,("Overlay: total input picture width = %d, height = %d\n", 482 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height)); 483 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height)); 484 485 /* determine interval representation value, taking zoom into account */ 486 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) 487 { 488 /* horizontal filtering is ON */ 489 if ((my_ov.width == ow->width) | (ow->width < 2)) 490 { 491 /* no horizontal scaling used, OR destination width < 2 */ 492 intrep = 0; 493 } 494 else 495 { 496 intrep = 1; 497 } 498 } 499 else 500 { 501 /* horizontal filtering is OFF */ 502 if ((ow->width < my_ov.width) & (ow->width >= 2)) 503 { 504 /* horizontal downscaling used AND destination width >= 2 */ 505 intrep = 1; 506 } 507 else 508 { 509 intrep = 0; 510 } 511 } 512 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep)); 513 514 /* calculate inverse horizontal scaling factor, taking zoom into account */ 515 /* standard scaling formula: */ 516 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep); 517 518 /* correct factor to prevent most-right visible 'line' from distorting */ 519 ifactor -= (1 << 2); 520 hiscalv = ifactor; 521 /* save for nv_bes_calc_move_overlay() */ 522 si->overlay.h_ifactor = ifactor; 523 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor)); 524 525 /* check scaling factor (and modify if needed) to be within scaling limits */ 526 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 527 if (hiscalv < 0x00002000) 528 { 529 /* (non-inverse) factor too large, set factor to max. valid value */ 530 hiscalv = 0x00002000; 531 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv)); 532 } 533 switch (si->ps.card_arch) 534 { 535 case NV04A: 536 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 537 * (16bit register with 0.11 format value) */ 538 if (hiscalv > 0x0000ffff) 539 { 540 /* (non-inverse) factor too small, set factor to min. valid value */ 541 hiscalv = 0x0000ffff; 542 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5))); 543 } 544 break; 545 case NV30A: 546 /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 547 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31)) 548 { 549 /* (non-inverse) factor too small, set factor to min. valid value */ 550 hiscalv = (2 << 16); 551 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 552 } 553 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 554 * So let it fall through... */ 555 if (si->ps.card_type != NV31) break; 556 default: 557 /* the rest has a downscaling limit of 0.125 */ 558 if (hiscalv > (8 << 16)) 559 { 560 /* (non-inverse) factor too small, set factor to min. valid value */ 561 hiscalv = (8 << 16); 562 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 563 } 564 break; 565 } 566 /* AND below is required by hardware */ 567 hiscalv &= 0x001ffffc; 568 569 570 /****************************** 571 *** setup vertical scaling *** 572 ******************************/ 573 574 /* determine interval representation value, taking zoom into account */ 575 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING) 576 { 577 /* vertical filtering is ON */ 578 if ((my_ov.height == ow->height) | (ow->height < 2)) 579 { 580 /* no vertical scaling used, OR destination height < 2 */ 581 intrep = 0; 582 } 583 else 584 { 585 intrep = 1; 586 } 587 } 588 else 589 { 590 /* vertical filtering is OFF */ 591 if ((ow->height < my_ov.height) & (ow->height >= 2)) 592 { 593 /* vertical downscaling used AND destination height >= 2 */ 594 intrep = 1; 595 } 596 else 597 { 598 intrep = 0; 599 } 600 } 601 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep)); 602 603 /* calculate inverse vertical scaling factor, taking zoom into account */ 604 /* standard scaling formula: */ 605 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep); 606 607 /* correct factor to prevent lowest visible line from distorting */ 608 ifactor -= (1 << 2); 609 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor)); 610 611 /* preserve ifactor for source positioning calculations later on */ 612 viscalv = ifactor; 613 /* save for nv_bes_calc_move_overlay() */ 614 si->overlay.v_ifactor = ifactor; 615 616 /* check scaling factor (and modify if needed) to be within scaling limits */ 617 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 618 if (viscalv < 0x00002000) 619 { 620 /* (non-inverse) factor too large, set factor to max. valid value */ 621 viscalv = 0x00002000; 622 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv)); 623 } 624 switch (si->ps.card_arch) 625 { 626 case NV04A: 627 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 628 * (16bit register with 0.11 format value) */ 629 if (viscalv > 0x0000ffff) 630 { 631 /* (non-inverse) factor too small, set factor to min. valid value */ 632 viscalv = 0x0000ffff; 633 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5))); 634 } 635 break; 636 case NV30A: 637 /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 638 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31)) 639 { 640 /* (non-inverse) factor too small, set factor to min. valid value */ 641 viscalv = (2 << 16); 642 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 643 } 644 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 645 * So let it fall through... */ 646 if (si->ps.card_type != NV31) break; 647 default: 648 /* the rest has a downscaling limit of 0.125 */ 649 if (viscalv > (8 << 16)) 650 { 651 /* (non-inverse) factor too small, set factor to min. valid value */ 652 viscalv = (8 << 16); 653 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 654 } 655 break; 656 } 657 /* AND below is required by hardware */ 658 viscalv &= 0x001ffffc; 659 660 661 /******************************************************************************** 662 *** setup all edges of output window, setup horizontal and vertical clipping *** 663 ********************************************************************************/ 664 nv_bes_calc_move_overlay(&moi); 665 666 667 /***************************** 668 *** log color keying info *** 669 *****************************/ 670 671 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n", 672 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value)); 673 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n", 674 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask)); 675 676 677 /***************** 678 *** log flags *** 679 *****************/ 680 681 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags)); 682 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */ 683 684 685 /************************************* 686 *** sync to BES (Back End Scaler) *** 687 *************************************/ 688 689 /* Done in card hardware: 690 * double buffered registers + trigger if programming complete feature. */ 691 692 693 /************************************** 694 *** actually program the registers *** 695 **************************************/ 696 697 if (si->ps.card_arch < NV10A) 698 { 699 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 700 BESW(NV04_OE_STATE, 0x00000000); 701 /* select buffer 0 as active (b16) */ 702 BESW(NV04_SU_STATE, 0x00000000); 703 /* unknown (no effect?) */ 704 BESW(NV04_RM_STATE, 0x00000000); 705 /* setup clipped(!) buffer startadress in RAM */ 706 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 707 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 708 /* (program both buffers to prevent sync distortions) */ 709 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 710 moi.a1orgv += ((moi.hsrcstv >> 16) * 2); 711 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 712 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03)); 713 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03)); 714 /* setup buffer source pitch including slopspace (in bytes). 715 * Note: 716 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 717 /* (program both buffers to prevent sync distortions) */ 718 BESW(NV04_0SRCPTCH, (ob->width * 2)); 719 BESW(NV04_1SRCPTCH, (ob->width * 2)); 720 /* setup output window position */ 721 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 722 /* setup output window size */ 723 BESW(NV04_DSTSIZE, ( 724 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 725 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 726 )); 727 /* setup horizontal and vertical scaling */ 728 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5))); 729 /* enable vertical filtering (b0) */ 730 BESW(NV04_CTRL_V, 0x00000001); 731 /* enable horizontal filtering (no effect?) */ 732 BESW(NV04_CTRL_H, 0x00000111); 733 734 /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 735 BESW(NV04_GENCTRL, 0x00000111); 736 /* select buffer 1 as active (b16) */ 737 BESW(NV04_SU_STATE, 0x00010000); 738 739 /************************** 740 *** setup color keying *** 741 **************************/ 742 743 /* setup colorkeying */ 744 switch(si->dm.space) 745 { 746 case B_RGB15_LITTLE: 747 BESW(NV04_COLKEY, ( 748 ((ow->blue.value & ow->blue.mask) << 0) | 749 ((ow->green.value & ow->green.mask) << 5) | 750 ((ow->red.value & ow->red.mask) << 10) | 751 ((ow->alpha.value & ow->alpha.mask) << 15) 752 )); 753 break; 754 case B_RGB16_LITTLE: 755 BESW(NV04_COLKEY, ( 756 ((ow->blue.value & ow->blue.mask) << 0) | 757 ((ow->green.value & ow->green.mask) << 5) | 758 ((ow->red.value & ow->red.mask) << 11) 759 /* this space has no alpha bits */ 760 )); 761 break; 762 case B_CMAP8: 763 case B_RGB32_LITTLE: 764 default: 765 BESW(NV04_COLKEY, ( 766 ((ow->blue.value & ow->blue.mask) << 0) | 767 ((ow->green.value & ow->green.mask) << 8) | 768 ((ow->red.value & ow->red.mask) << 16) | 769 ((ow->alpha.value & ow->alpha.mask) << 24) 770 )); 771 break; 772 } 773 } 774 else 775 { 776 /* >= NV10A */ 777 778 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 779 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff)); 780 /* setup buffersize */ 781 //fixme if needed: width must be even officially... 782 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 783 /* setup source pitch including slopspace (in bytes), 784 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */ 785 /* Note: 786 * source pitch granularity = 32 pixels on GeForce cards!! */ 787 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 788 /* setup output window position */ 789 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16))); 790 /* setup output window size */ 791 BESW(NV10_0DSTSIZE, ( 792 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) | 793 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1) 794 )); 795 /* setup horizontal scaling */ 796 BESW(NV10_0ISCALH, (hiscalv << 4)); 797 /* setup vertical scaling */ 798 BESW(NV10_0ISCALV, (viscalv << 4)); 799 /* setup (unclipped!) buffer startadress in RAM */ 800 BESW(NV10_0BUFADR, moi.a1orgv); 801 /* enable BES (b0 = 0) */ 802 BESW(NV10_GENCTRL, 0x00000000); 803 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 804 /* This also triggers activation of programmed values (double buffered registers feature) */ 805 BESW(NV10_BUFSEL, 0x00000001); 806 807 /************************** 808 *** setup color keying *** 809 **************************/ 810 811 /* setup colorkeying */ 812 switch(si->dm.space) 813 { 814 case B_RGB15_LITTLE: 815 BESW(NV10_COLKEY, ( 816 ((ow->blue.value & ow->blue.mask) << 0) | 817 ((ow->green.value & ow->green.mask) << 5) | 818 ((ow->red.value & ow->red.mask) << 10) | 819 ((ow->alpha.value & ow->alpha.mask) << 15) 820 )); 821 break; 822 case B_RGB16_LITTLE: 823 BESW(NV10_COLKEY, ( 824 ((ow->blue.value & ow->blue.mask) << 0) | 825 ((ow->green.value & ow->green.mask) << 5) | 826 ((ow->red.value & ow->red.mask) << 11) 827 /* this space has no alpha bits */ 828 )); 829 break; 830 case B_CMAP8: 831 case B_RGB32_LITTLE: 832 default: 833 BESW(NV10_COLKEY, ( 834 ((ow->blue.value & ow->blue.mask) << 0) | 835 ((ow->green.value & ow->green.mask) << 8) | 836 ((ow->red.value & ow->red.mask) << 16) | 837 ((ow->alpha.value & ow->alpha.mask) << 24) 838 )); 839 break; 840 } 841 } 842 843 /* note that overlay is in use (for nv_bes_move_overlay()) */ 844 si->overlay.active = true; 845 846 return B_OK; 847 } 848 849 status_t nv_release_bes() 850 { 851 if (si->ps.card_arch < NV10A) 852 { 853 /* setup BES control: disable scaler (b0 = 0) */ 854 BESW(NV04_GENCTRL, 0x00000000); 855 } 856 else 857 { 858 /* setup BES control: disable scaler (b0 = 1) */ 859 BESW(NV10_GENCTRL, 0x00000001); 860 } 861 862 /* note that overlay is not in use (for nv_bes_move_overlay()) */ 863 si->overlay.active = false; 864 865 return B_OK; 866 } 867