1 /* Nvidia TNT and GeForce Back End Scaler functions */ 2 /* Written by Rudolf Cornelissen 05/2002-5/2004 */ 3 4 #define MODULE_BIT 0x00000200 5 6 #include "nv_std.h" 7 8 /* move the overlay output window in virtualscreens */ 9 /* Note: 10 * si->dm.h_display_start and si->dm.v_display_start determine where the new 11 * output window is located! */ 12 void nv_bes_move_overlay() 13 { 14 /* calculated BES register values */ 15 uint32 hcoordv, vcoordv, hsrcstv, a1orgv, v1srcstv; 16 /* misc used variables */ 17 uint16 temp1, temp2; 18 /* visible screen window in virtual workspaces */ 19 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 20 21 /* abort if overlay is not active */ 22 if (!si->overlay.active) return; 23 24 /* do 'overlay follow head' in dualhead modes on dualhead cards */ 25 if (si->ps.secondary_head) 26 { 27 switch (si->dm.flags & DUALHEAD_BITS) 28 { 29 case DUALHEAD_ON: 30 case DUALHEAD_SWITCH: 31 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) < 32 (si->dm.h_display_start + si->dm.timing.h_display)) 33 nv_bes_to_crtc(si->crtc_switch_mode); 34 else 35 nv_bes_to_crtc(!si->crtc_switch_mode); 36 break; 37 default: 38 nv_bes_to_crtc(si->crtc_switch_mode); 39 break; 40 } 41 } 42 43 /* the BES does not respect virtual_workspaces, but adheres to CRTC 44 * constraints only */ 45 crtc_hstart = si->dm.h_display_start; 46 /* make dualhead stretch and switch mode work while we're at it.. */ 47 if (si->overlay.crtc) 48 { 49 crtc_hstart += si->dm.timing.h_display; 50 } 51 52 /* horizontal end is the first position beyond the displayed range on the CRTC */ 53 crtc_hend = crtc_hstart + si->dm.timing.h_display; 54 crtc_vstart = si->dm.v_display_start; 55 /* vertical end is the first position beyond the displayed range on the CRTC */ 56 crtc_vend = crtc_vstart + si->dm.timing.v_display; 57 58 59 /**************************************** 60 *** setup all edges of output window *** 61 ****************************************/ 62 63 /* setup left and right edges of output window */ 64 hcoordv = 0; 65 /* left edge coordinate of output window, must be inside desktop */ 66 /* clipping on the left side */ 67 if (si->overlay.ow.h_start < crtc_hstart) 68 { 69 temp1 = 0; 70 } 71 else 72 { 73 /* clipping on the right side */ 74 if (si->overlay.ow.h_start >= (crtc_hend - 1)) 75 { 76 /* width < 2 is not allowed */ 77 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 78 } 79 else 80 /* no clipping here */ 81 { 82 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff; 83 } 84 } 85 hcoordv |= temp1 << 16; 86 /* right edge coordinate of output window, must be inside desktop */ 87 /* width < 2 is not allowed */ 88 if (si->overlay.ow.width < 2) 89 { 90 temp2 = (temp1 + 1) & 0x7ff; 91 } 92 else 93 { 94 /* clipping on the right side */ 95 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1)) 96 { 97 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 98 } 99 else 100 { 101 /* clipping on the left side */ 102 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 103 { 104 /* width < 2 is not allowed */ 105 temp2 = 1; 106 } 107 else 108 /* no clipping here */ 109 { 110 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff; 111 } 112 } 113 } 114 hcoordv |= temp2 << 0; 115 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 116 117 /* setup top and bottom edges of output window */ 118 vcoordv = 0; 119 /* top edge coordinate of output window, must be inside desktop */ 120 /* clipping on the top side */ 121 if (si->overlay.ow.v_start < crtc_vstart) 122 { 123 temp1 = 0; 124 } 125 else 126 { 127 /* clipping on the bottom side */ 128 if (si->overlay.ow.v_start >= (crtc_vend - 1)) 129 { 130 /* height < 2 is not allowed */ 131 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 132 } 133 else 134 /* no clipping here */ 135 { 136 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff; 137 } 138 } 139 vcoordv |= temp1 << 16; 140 /* bottom edge coordinate of output window, must be inside desktop */ 141 /* height < 2 is not allowed */ 142 if (si->overlay.ow.height < 2) 143 { 144 temp2 = (temp1 + 1) & 0x7ff; 145 } 146 else 147 { 148 /* clipping on the bottom side */ 149 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1)) 150 { 151 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 152 } 153 else 154 { 155 /* clipping on the top side */ 156 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 157 { 158 /* height < 2 is not allowed */ 159 temp2 = 1; 160 } 161 else 162 /* no clipping here */ 163 { 164 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff; 165 } 166 } 167 } 168 vcoordv |= temp2 << 0; 169 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 170 171 172 /********************************* 173 *** setup horizontal clipping *** 174 *********************************/ 175 176 /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 177 /* Note: 178 * The method is to calculate, based on 1:1 scaling, based on the output window. 179 * After this is done, include the scaling factor so you get a value based on the input bitmap. 180 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 181 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 182 /* Note also: 183 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 184 hsrcstv = 0; 185 /* check for destination horizontal clipping at left side */ 186 if (si->overlay.ow.h_start < crtc_hstart) 187 { 188 /* check if entire destination picture is clipping left: 189 * (2 pixels will be clamped onscreen at least) */ 190 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1)) 191 { 192 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 193 hsrcstv += (si->overlay.ow.width - 2); 194 } 195 else 196 { 197 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 198 hsrcstv += (crtc_hstart - si->overlay.ow.h_start); 199 } 200 LOG(4,("Overlay: clipping left...\n")); 201 202 /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 203 * Note that this also already takes care of aligning the value to the BES register! */ 204 hsrcstv *= si->overlay.h_ifactor; 205 } 206 /* take zoom into account */ 207 hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16; 208 /* AND below required by hardware */ 209 hsrcstv &= 0x03fffffc; 210 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536)); 211 212 213 /******************************* 214 *** setup vertical clipping *** 215 *******************************/ 216 217 /* calculate inputbitmap origin adress */ 218 a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer); 219 a1orgv -= (uint32)((vuint32 *)si->framebuffer); 220 221 /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 222 /* Note: 223 * The method is to calculate, based on 1:1 scaling, based on the output window. 224 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 225 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 226 /* Note also: 227 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 228 229 v1srcstv = 0; 230 /* check for destination vertical clipping at top side */ 231 if (si->overlay.ow.v_start < crtc_vstart) 232 { 233 /* check if entire destination picture is clipping at top: 234 * (2 pixels will be clamped onscreen at least) */ 235 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1)) 236 { 237 /* increase 'number of clipping pixels' with 'fixed value': 238 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 239 v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor; 240 /* on pre-NV10 we need to do clipping in the source 241 * bitmap because no seperate clipping registers exist... */ 242 if (si->ps.card_arch < NV10A) 243 a1orgv += ((v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 244 } 245 else 246 { 247 /* increase 'first contributing pixel' with: 248 * number of destination picture clipping pixels * inverse scaling factor */ 249 v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor; 250 /* on pre-NV10 we need to do clipping in the source 251 * bitmap because no seperate clipping registers exist... */ 252 if (si->ps.card_arch < NV10A) 253 a1orgv += ((v1srcstv >> 16) * si->overlay.ob.bytes_per_row); 254 } 255 LOG(4,("Overlay: clipping at top...\n")); 256 } 257 /* take zoom into account */ 258 v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16); 259 if (si->ps.card_arch < NV10A) 260 { 261 a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row); 262 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv)); 263 } 264 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536)); 265 266 /* AND below is probably required by hardware. */ 267 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 268 a1orgv &= 0xfffffff0; 269 LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv)); 270 271 272 /************************************* 273 *** sync to BES (Back End Scaler) *** 274 *************************************/ 275 276 /* Done in card hardware: 277 * double buffered registers + trigger if programming complete feature. */ 278 279 280 /************************************** 281 *** actually program the registers *** 282 **************************************/ 283 284 if (si->ps.card_arch < NV10A) 285 { 286 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 287 BESW(NV04_OE_STATE, 0x00000000); 288 /* select buffer 0 as active (b16) */ 289 BESW(NV04_SU_STATE, 0x00000000); 290 /* unknown (no effect?) */ 291 BESW(NV04_RM_STATE, 0x00000000); 292 /* setup clipped(!) buffer startadress in RAM */ 293 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 294 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 295 /* (program both buffers to prevent sync distortions) */ 296 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 297 a1orgv += ((hsrcstv >> 16) * 2); 298 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 299 BESW(NV04_0BUFADR, (a1orgv & ~0x03)); 300 BESW(NV04_1BUFADR, (a1orgv & ~0x03)); 301 302 /* setup buffer source pitch including slopspace (in bytes). 303 * Note: 304 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 305 /* (program both buffers to prevent sync distortions) */ 306 // BESW(NV04_0SRCPTCH, (ob->width * 2)); 307 // BESW(NV04_1SRCPTCH, (ob->width * 2)); 308 /* setup output window position */ 309 BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 310 /* setup output window size */ 311 BESW(NV04_DSTSIZE, ( 312 (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 313 ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 314 )); 315 316 /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 317 // BESW(NV04_GENCTRL, 0x00000111); 318 /* select buffer 1 as active (b16) */ 319 BESW(NV04_SU_STATE, 0x00010000); 320 } 321 else 322 { 323 /* >= NV10A */ 324 325 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 326 BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff)); 327 /* setup buffersize */ 328 //fixme if needed: width must be even officially... 329 // BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 330 /* setup source pitch including slopspace (in bytes), 331 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */ 332 /* Note: 333 * source pitch granularity = 32 pixels on GeForce cards!! */ 334 // BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 335 /* setup output window position */ 336 BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 337 /* setup output window size */ 338 BESW(NV10_0DSTSIZE, ( 339 (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 340 ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 341 )); 342 /* setup (unclipped!) buffer startadress in RAM */ 343 // BESW(NV10_0BUFADR, a1orgv); 344 /* enable BES (b0 = 0) */ 345 // BESW(NV10_GENCTRL, 0x00000000); 346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 347 /* This also triggers activation of programmed values (double buffered registers feature) */ 348 BESW(NV10_BUFSEL, 0x00000001); 349 } 350 } 351 352 status_t nv_bes_to_crtc(bool crtc) 353 { 354 if (si->ps.secondary_head) 355 { 356 if (crtc) 357 { 358 LOG(4,("Overlay: switching overlay to CRTC2\n")); 359 /* switch overlay engine to CRTC2 */ 360 NV_REG32(NV32_FUNCSEL) &= ~0x00001000; 361 NV_REG32(NV32_2FUNCSEL) |= 0x00001000; 362 si->overlay.crtc = !si->crtc_switch_mode; 363 } 364 else 365 { 366 LOG(4,("Overlay: switching overlay to CRTC1\n")); 367 /* switch overlay engine to CRTC1 */ 368 NV_REG32(NV32_2FUNCSEL) &= ~0x00001000; 369 NV_REG32(NV32_FUNCSEL) |= 0x00001000; 370 si->overlay.crtc = si->crtc_switch_mode; 371 } 372 return B_OK; 373 } 374 else 375 { 376 return B_ERROR; 377 } 378 } 379 380 status_t nv_bes_init() 381 { 382 if (si->ps.card_arch < NV10A) 383 { 384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 385 BESW(NV04_INTE, 0x00000000); 386 387 /* setup saturation to be 'neutral' */ 388 BESW(NV04_SAT, 0x00000000); 389 /* setup RGB brightness to be 'neutral' */ 390 BESW(NV04_RED_AMP, 0x00000069); 391 BESW(NV04_GRN_AMP, 0x0000003e); 392 BESW(NV04_BLU_AMP, 0x00000089); 393 394 /* setup fifo for fetching data */ 395 BESW(NV04_FIFOBURL, 0x00000003); 396 BESW(NV04_FIFOTHRS, 0x00000038); 397 398 /* unknown, but needed (registers only have b0 implemented) */ 399 /* (program both buffers to prevent sync distortions) */ 400 BESW(NV04_0OFFSET, 0x00000000); 401 BESW(NV04_1OFFSET, 0x00000000); 402 } 403 else 404 { 405 /* >= NV10A */ 406 407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 408 BESW(NV10_INTE, 0x00000000); 409 /* shut off GeForce4MX MPEG2 decoder */ 410 BESW(DEC_GENCTRL, 0x00000000); 411 /* setup BES memory-range mask */ 412 BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1)); 413 /* unknown, but needed */ 414 BESW(NV10_0OFFSET, 0x00000000); 415 416 /* setup brightness, contrast and saturation to be 'neutral' */ 417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000)); 418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000)); 419 } 420 421 return B_OK; 422 } 423 424 status_t nv_configure_bes 425 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset) 426 { 427 /* yuy2 (4:2:2) colorspace calculations */ 428 429 /* Note: 430 * in BeOS R5.0.3 and DANO: 431 * 'ow->offset_xxx' is always 0, so not used; 432 * 'ow->width' and 'ow->height' are the output window size: does not change 433 * if window is clipping; 434 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output 435 * window. These values can be negative: this means the window is clipping 436 * at the left or the top of the display, respectively. */ 437 438 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually 439 * displayed on screen. This is used for the 'hardware zoom' function. */ 440 441 /* calculated BES register values */ 442 uint32 hcoordv, vcoordv, hiscalv, hsrcstv, viscalv, a1orgv, v1srcstv; 443 /* misc used variables */ 444 uint16 temp1, temp2; 445 /* interval representation, used for scaling calculations */ 446 uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 447 /* inverse scaling factor, used for source positioning */ 448 uint32 ifactor; 449 /* copy of overlay view which has checked valid values */ 450 overlay_view my_ov; 451 452 453 /************************************************************************************** 454 *** copy, check and limit if needed the user-specified view into the intput bitmap *** 455 **************************************************************************************/ 456 my_ov = *ov; 457 /* check for valid 'coordinates' */ 458 if (my_ov.width == 0) my_ov.width++; 459 if (my_ov.height == 0) my_ov.height++; 460 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 461 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1); 462 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 463 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1); 464 if (my_ov.v_start > (ob->height - 1)) 465 my_ov.v_start = (ob->height - 1); 466 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1)) 467 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1); 468 469 LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n", 470 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height)); 471 472 /* save for nv_bes_move_overlay() */ 473 si->overlay.ow = *ow; 474 si->overlay.ob = *ob; 475 si->overlay.my_ov = my_ov; 476 477 /* the BES does not respect virtual_workspaces, but adheres to CRTC 478 * constraints only */ 479 crtc_hstart = si->dm.h_display_start; 480 /* make dualhead stretch and switch mode work while we're at it.. */ 481 if (si->overlay.crtc) 482 { 483 crtc_hstart += si->dm.timing.h_display; 484 } 485 486 /* horizontal end is the first position beyond the displayed range on the CRTC */ 487 crtc_hend = crtc_hstart + si->dm.timing.h_display; 488 crtc_vstart = si->dm.v_display_start; 489 /* vertical end is the first position beyond the displayed range on the CRTC */ 490 crtc_vend = crtc_vstart + si->dm.timing.v_display; 491 492 493 /**************************************** 494 *** setup all edges of output window *** 495 ****************************************/ 496 497 /* setup left and right edges of output window */ 498 hcoordv = 0; 499 /* left edge coordinate of output window, must be inside desktop */ 500 /* clipping on the left side */ 501 if (ow->h_start < crtc_hstart) 502 { 503 temp1 = 0; 504 } 505 else 506 { 507 /* clipping on the right side */ 508 if (ow->h_start >= (crtc_hend - 1)) 509 { 510 /* width < 2 is not allowed */ 511 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 512 } 513 else 514 /* no clipping here */ 515 { 516 temp1 = (ow->h_start - crtc_hstart) & 0x7ff; 517 } 518 } 519 hcoordv |= temp1 << 16; 520 /* right edge coordinate of output window, must be inside desktop */ 521 /* width < 2 is not allowed */ 522 if (ow->width < 2) 523 { 524 temp2 = (temp1 + 1) & 0x7ff; 525 } 526 else 527 { 528 /* clipping on the right side */ 529 if ((ow->h_start + ow->width - 1) > (crtc_hend - 1)) 530 { 531 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 532 } 533 else 534 { 535 /* clipping on the left side */ 536 if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1)) 537 { 538 /* width < 2 is not allowed */ 539 temp2 = 1; 540 } 541 else 542 /* no clipping here */ 543 { 544 temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff; 545 } 546 } 547 } 548 hcoordv |= temp2 << 0; 549 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 550 551 /* setup top and bottom edges of output window */ 552 vcoordv = 0; 553 /* top edge coordinate of output window, must be inside desktop */ 554 /* clipping on the top side */ 555 if (ow->v_start < crtc_vstart) 556 { 557 temp1 = 0; 558 } 559 else 560 { 561 /* clipping on the bottom side */ 562 if (ow->v_start >= (crtc_vend - 1)) 563 { 564 /* height < 2 is not allowed */ 565 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 566 } 567 else 568 /* no clipping here */ 569 { 570 temp1 = (ow->v_start - crtc_vstart) & 0x7ff; 571 } 572 } 573 vcoordv |= temp1 << 16; 574 /* bottom edge coordinate of output window, must be inside desktop */ 575 /* height < 2 is not allowed */ 576 if (ow->height < 2) 577 { 578 temp2 = (temp1 + 1) & 0x7ff; 579 } 580 else 581 { 582 /* clipping on the bottom side */ 583 if ((ow->v_start + ow->height - 1) > (crtc_vend - 1)) 584 { 585 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 586 } 587 else 588 { 589 /* clipping on the top side */ 590 if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1)) 591 { 592 /* height < 2 is not allowed */ 593 temp2 = 1; 594 } 595 else 596 /* no clipping here */ 597 { 598 temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff; 599 } 600 } 601 } 602 vcoordv |= temp2 << 0; 603 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 604 605 606 /********************************************* 607 *** setup horizontal scaling and clipping *** 608 *********************************************/ 609 610 LOG(6,("Overlay: total input picture width = %d, height = %d\n", 611 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height)); 612 LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height)); 613 614 /* do horizontal scaling... */ 615 /* determine interval representation value, taking zoom into account */ 616 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) 617 { 618 /* horizontal filtering is ON */ 619 if ((my_ov.width == ow->width) | (ow->width < 2)) 620 { 621 /* no horizontal scaling used, OR destination width < 2 */ 622 intrep = 0; 623 } 624 else 625 { 626 intrep = 1; 627 } 628 } 629 else 630 { 631 /* horizontal filtering is OFF */ 632 if ((ow->width < my_ov.width) & (ow->width >= 2)) 633 { 634 /* horizontal downscaling used AND destination width >= 2 */ 635 intrep = 1; 636 } 637 else 638 { 639 intrep = 0; 640 } 641 } 642 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep)); 643 644 /* calculate inverse horizontal scaling factor, taking zoom into account */ 645 /* standard scaling formula: */ 646 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep); 647 648 /* correct factor to prevent most-right visible 'line' from distorting */ 649 ifactor -= (1 << 2); 650 hiscalv = ifactor; 651 /* save for nv_bes_move_overlay() */ 652 si->overlay.h_ifactor = ifactor; 653 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor)); 654 655 /* check scaling factor (and modify if needed) to be within scaling limits */ 656 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 657 if (hiscalv < 0x00002000) 658 { 659 /* (non-inverse) factor too large, set factor to max. valid value */ 660 hiscalv = 0x00002000; 661 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv)); 662 } 663 switch (si->ps.card_arch) 664 { 665 case NV04A: 666 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 667 * (16bit register with 0.11 format value) */ 668 if (hiscalv > 0x0000ffff) 669 { 670 /* (non-inverse) factor too small, set factor to min. valid value */ 671 hiscalv = 0x0000ffff; 672 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5))); 673 } 674 break; 675 case NV30A: 676 /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 677 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31)) 678 { 679 /* (non-inverse) factor too small, set factor to min. valid value */ 680 hiscalv = (2 << 16); 681 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 682 } 683 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 684 * So let it fall through... */ 685 if (si->ps.card_type != NV31) break; 686 default: 687 /* the rest has a downscaling limit of 0.125 */ 688 if (hiscalv > (8 << 16)) 689 { 690 /* (non-inverse) factor too small, set factor to min. valid value */ 691 hiscalv = (8 << 16); 692 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 693 } 694 break; 695 } 696 /* AND below is required by hardware */ 697 hiscalv &= 0x001ffffc; 698 699 700 /* do horizontal clipping... */ 701 /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 702 /* Note: 703 * The method is to calculate, based on 1:1 scaling, based on the output window. 704 * After this is done, include the scaling factor so you get a value based on the input bitmap. 705 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 706 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 707 /* Note also: 708 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 709 hsrcstv = 0; 710 /* check for destination horizontal clipping at left side */ 711 if (ow->h_start < crtc_hstart) 712 { 713 /* check if entire destination picture is clipping left: 714 * (2 pixels will be clamped onscreen at least) */ 715 if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1)) 716 { 717 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 718 hsrcstv += (ow->width - 2); 719 } 720 else 721 { 722 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 723 hsrcstv += (crtc_hstart - ow->h_start); 724 } 725 LOG(4,("Overlay: clipping left...\n")); 726 727 /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 728 * Note that this also already takes care of aligning the value to the BES register! */ 729 hsrcstv *= ifactor; 730 } 731 /* take zoom into account */ 732 hsrcstv += ((uint32)my_ov.h_start) << 16; 733 /* AND below required by hardware */ 734 hsrcstv &= 0x03fffffc; 735 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536)); 736 737 738 /******************************************* 739 *** setup vertical scaling and clipping *** 740 *******************************************/ 741 742 /* do vertical scaling... */ 743 /* determine interval representation value, taking zoom into account */ 744 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING) 745 { 746 /* vertical filtering is ON */ 747 if ((my_ov.height == ow->height) | (ow->height < 2)) 748 { 749 /* no vertical scaling used, OR destination height < 2 */ 750 intrep = 0; 751 } 752 else 753 { 754 intrep = 1; 755 } 756 } 757 else 758 { 759 /* vertical filtering is OFF */ 760 if ((ow->height < my_ov.height) & (ow->height >= 2)) 761 { 762 /* vertical downscaling used AND destination height >= 2 */ 763 intrep = 1; 764 } 765 else 766 { 767 intrep = 0; 768 } 769 } 770 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep)); 771 772 /* calculate inverse vertical scaling factor, taking zoom into account */ 773 /* standard scaling formula: */ 774 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep); 775 776 /* correct factor to prevent lowest visible line from distorting */ 777 ifactor -= (1 << 2); 778 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor)); 779 780 /* preserve ifactor for source positioning calculations later on */ 781 viscalv = ifactor; 782 /* save for nv_bes_move_overlay() */ 783 si->overlay.v_ifactor = ifactor; 784 785 /* check scaling factor (and modify if needed) to be within scaling limits */ 786 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */ 787 if (viscalv < 0x00002000) 788 { 789 /* (non-inverse) factor too large, set factor to max. valid value */ 790 viscalv = 0x00002000; 791 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv)); 792 } 793 switch (si->ps.card_arch) 794 { 795 case NV04A: 796 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 797 * (16bit register with 0.11 format value) */ 798 if (viscalv > 0x0000ffff) 799 { 800 /* (non-inverse) factor too small, set factor to min. valid value */ 801 viscalv = 0x0000ffff; 802 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5))); 803 } 804 break; 805 case NV30A: 806 /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 807 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31)) 808 { 809 /* (non-inverse) factor too small, set factor to min. valid value */ 810 viscalv = (2 << 16); 811 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 812 } 813 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 814 * So let it fall through... */ 815 if (si->ps.card_type != NV31) break; 816 default: 817 /* the rest has a downscaling limit of 0.125 */ 818 if (viscalv > (8 << 16)) 819 { 820 /* (non-inverse) factor too small, set factor to min. valid value */ 821 viscalv = (8 << 16); 822 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 823 } 824 break; 825 } 826 /* AND below is required by hardware */ 827 viscalv &= 0x001ffffc; 828 829 830 /* calculate inputbitmap origin adress */ 831 a1orgv = (uint32)((vuint32 *)ob->buffer); 832 a1orgv -= (uint32)((vuint32 *)si->framebuffer); 833 834 /* do vertical clipping... */ 835 /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 836 /* Note: 837 * The method is to calculate, based on 1:1 scaling, based on the output window. 838 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 839 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 840 /* Note also: 841 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 842 843 v1srcstv = 0; 844 /* check for destination vertical clipping at top side */ 845 if (ow->v_start < crtc_vstart) 846 { 847 /* check if entire destination picture is clipping at top: 848 * (2 pixels will be clamped onscreen at least) */ 849 if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1)) 850 { 851 /* increase 'number of clipping pixels' with 'fixed value': 852 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 853 v1srcstv = (ow->height - 2) * ifactor; 854 /* on pre-NV10 we need to do clipping in the source 855 * bitmap because no seperate clipping registers exist... */ 856 if (si->ps.card_arch < NV10A) 857 a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row); 858 } 859 else 860 { 861 /* increase 'first contributing pixel' with: 862 * number of destination picture clipping pixels * inverse scaling factor */ 863 v1srcstv = (crtc_vstart - ow->v_start) * ifactor; 864 /* on pre-NV10 we need to do clipping in the source 865 * bitmap because no seperate clipping registers exist... */ 866 if (si->ps.card_arch < NV10A) 867 a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row); 868 } 869 LOG(4,("Overlay: clipping at top...\n")); 870 } 871 /* take zoom into account */ 872 v1srcstv += (((uint32)my_ov.v_start) << 16); 873 if (si->ps.card_arch < NV10A) 874 { 875 a1orgv += (my_ov.v_start * ob->bytes_per_row); 876 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv)); 877 } 878 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536)); 879 880 /* AND below is probably required by hardware. */ 881 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 882 a1orgv &= 0xfffffff0; 883 LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv)); 884 885 886 /***************************** 887 *** log color keying info *** 888 *****************************/ 889 890 LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n", 891 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value)); 892 LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n", 893 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask)); 894 895 896 /***************** 897 *** log flags *** 898 *****************/ 899 900 LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags)); 901 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */ 902 903 904 /************************************* 905 *** sync to BES (Back End Scaler) *** 906 *************************************/ 907 908 /* Done in card hardware: 909 * double buffered registers + trigger if programming complete feature. */ 910 911 912 /************************************** 913 *** actually program the registers *** 914 **************************************/ 915 916 if (si->ps.card_arch < NV10A) 917 { 918 /* unknown, but needed (otherwise high-res distortions and only half the frames */ 919 BESW(NV04_OE_STATE, 0x00000000); 920 /* select buffer 0 as active (b16) */ 921 BESW(NV04_SU_STATE, 0x00000000); 922 /* unknown (no effect?) */ 923 BESW(NV04_RM_STATE, 0x00000000); 924 /* setup clipped(!) buffer startadress in RAM */ 925 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 926 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 927 /* (program both buffers to prevent sync distortions) */ 928 /* first include 'pixel precise' left clipping... (top clipping was already included) */ 929 a1orgv += ((hsrcstv >> 16) * 2); 930 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 931 BESW(NV04_0BUFADR, (a1orgv & ~0x03)); 932 BESW(NV04_1BUFADR, (a1orgv & ~0x03)); 933 /* setup buffer source pitch including slopspace (in bytes). 934 * Note: 935 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 936 /* (program both buffers to prevent sync distortions) */ 937 BESW(NV04_0SRCPTCH, (ob->width * 2)); 938 BESW(NV04_1SRCPTCH, (ob->width * 2)); 939 /* setup output window position */ 940 BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 941 /* setup output window size */ 942 BESW(NV04_DSTSIZE, ( 943 (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 944 ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 945 )); 946 /* setup horizontal and vertical scaling */ 947 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5))); 948 /* enable vertical filtering (b0) */ 949 BESW(NV04_CTRL_V, 0x00000001); 950 /* enable horizontal filtering (no effect?) */ 951 BESW(NV04_CTRL_H, 0x00000111); 952 953 /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 954 BESW(NV04_GENCTRL, 0x00000111); 955 /* select buffer 1 as active (b16) */ 956 BESW(NV04_SU_STATE, 0x00010000); 957 958 /************************** 959 *** setup color keying *** 960 **************************/ 961 962 /* setup colorkeying */ 963 switch(si->dm.space) 964 { 965 case B_RGB15_LITTLE: 966 BESW(NV04_COLKEY, ( 967 ((ow->blue.value & ow->blue.mask) << 0) | 968 ((ow->green.value & ow->green.mask) << 5) | 969 ((ow->red.value & ow->red.mask) << 10) | 970 ((ow->alpha.value & ow->alpha.mask) << 15) 971 )); 972 break; 973 case B_RGB16_LITTLE: 974 BESW(NV04_COLKEY, ( 975 ((ow->blue.value & ow->blue.mask) << 0) | 976 ((ow->green.value & ow->green.mask) << 5) | 977 ((ow->red.value & ow->red.mask) << 11) 978 /* this space has no alpha bits */ 979 )); 980 break; 981 case B_CMAP8: 982 case B_RGB32_LITTLE: 983 default: 984 BESW(NV04_COLKEY, ( 985 ((ow->blue.value & ow->blue.mask) << 0) | 986 ((ow->green.value & ow->green.mask) << 8) | 987 ((ow->red.value & ow->red.mask) << 16) | 988 ((ow->alpha.value & ow->alpha.mask) << 24) 989 )); 990 break; 991 } 992 } 993 else 994 { 995 /* >= NV10A */ 996 997 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 998 BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff)); 999 /* setup buffersize */ 1000 //fixme if needed: width must be even officially... 1001 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 1002 /* setup source pitch including slopspace (in bytes), 1003 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */ 1004 /* Note: 1005 * source pitch granularity = 32 pixels on GeForce cards!! */ 1006 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 1007 /* setup output window position */ 1008 BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 1009 /* setup output window size */ 1010 BESW(NV10_0DSTSIZE, ( 1011 (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 1012 ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 1013 )); 1014 /* setup horizontal scaling */ 1015 BESW(NV10_0ISCALH, (hiscalv << 4)); 1016 /* setup vertical scaling */ 1017 BESW(NV10_0ISCALV, (viscalv << 4)); 1018 /* setup (unclipped!) buffer startadress in RAM */ 1019 BESW(NV10_0BUFADR, a1orgv); 1020 /* enable BES (b0 = 0) */ 1021 BESW(NV10_GENCTRL, 0x00000000); 1022 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 1023 /* This also triggers activation of programmed values (double buffered registers feature) */ 1024 BESW(NV10_BUFSEL, 0x00000001); 1025 1026 /************************** 1027 *** setup color keying *** 1028 **************************/ 1029 1030 /* setup colorkeying */ 1031 switch(si->dm.space) 1032 { 1033 case B_RGB15_LITTLE: 1034 BESW(NV10_COLKEY, ( 1035 ((ow->blue.value & ow->blue.mask) << 0) | 1036 ((ow->green.value & ow->green.mask) << 5) | 1037 ((ow->red.value & ow->red.mask) << 10) | 1038 ((ow->alpha.value & ow->alpha.mask) << 15) 1039 )); 1040 break; 1041 case B_RGB16_LITTLE: 1042 BESW(NV10_COLKEY, ( 1043 ((ow->blue.value & ow->blue.mask) << 0) | 1044 ((ow->green.value & ow->green.mask) << 5) | 1045 ((ow->red.value & ow->red.mask) << 11) 1046 /* this space has no alpha bits */ 1047 )); 1048 break; 1049 case B_CMAP8: 1050 case B_RGB32_LITTLE: 1051 default: 1052 BESW(NV10_COLKEY, ( 1053 ((ow->blue.value & ow->blue.mask) << 0) | 1054 ((ow->green.value & ow->green.mask) << 8) | 1055 ((ow->red.value & ow->red.mask) << 16) | 1056 ((ow->alpha.value & ow->alpha.mask) << 24) 1057 )); 1058 break; 1059 } 1060 } 1061 1062 /* note that overlay is in use (for nv_bes_move_overlay()) */ 1063 si->overlay.active = true; 1064 1065 return B_OK; 1066 } 1067 1068 status_t nv_release_bes() 1069 { 1070 if (si->ps.card_arch < NV10A) 1071 { 1072 /* setup BES control: disable scaler (b0 = 0) */ 1073 BESW(NV04_GENCTRL, 0x00000000); 1074 } 1075 else 1076 { 1077 /* setup BES control: disable scaler (b0 = 1) */ 1078 BESW(NV10_GENCTRL, 0x00000001); 1079 } 1080 1081 /* note that overlay is not in use (for nv_bes_move_overlay()) */ 1082 si->overlay.active = false; 1083 1084 return B_OK; 1085 } 1086