1aa1e552fSshatty /* Nvidia TNT and GeForce Back End Scaler functions */ 27b820745SRudolf Cornelissen /* Written by Rudolf Cornelissen 05/2002-1/2004 */ 308705d96Sshatty 408705d96Sshatty #define MODULE_BIT 0x00000200 508705d96Sshatty 608705d96Sshatty #include "nv_std.h" 708705d96Sshatty 808705d96Sshatty //fixme: implement: (used for virtual screens!) 908705d96Sshatty //void move_overlay(uint16 hdisp_start, uint16 vdisp_start); 1008705d96Sshatty 11*cc6f5542SRudolf Cornelissen status_t nv_bes_to_crtc(uint8 crtc) 12*cc6f5542SRudolf Cornelissen { 13*cc6f5542SRudolf Cornelissen if (si->ps.secondary_head) 14*cc6f5542SRudolf Cornelissen { 15*cc6f5542SRudolf Cornelissen if (crtc) 16*cc6f5542SRudolf Cornelissen { 17*cc6f5542SRudolf Cornelissen LOG(4,("Overlay: switching overlay to CRTC2\n")); 18*cc6f5542SRudolf Cornelissen /* switch overlay engine to CRTC2 */ 19*cc6f5542SRudolf Cornelissen NV_REG32(NV32_FUNCSEL) &= ~0x00001000; 20*cc6f5542SRudolf Cornelissen NV_REG32(NV32_2FUNCSEL) |= 0x00001000; 21*cc6f5542SRudolf Cornelissen si->overlay.crtc = 1; 22*cc6f5542SRudolf Cornelissen } 23*cc6f5542SRudolf Cornelissen else 24*cc6f5542SRudolf Cornelissen { 25*cc6f5542SRudolf Cornelissen LOG(4,("Overlay: switching overlay to CRTC1\n")); 26*cc6f5542SRudolf Cornelissen /* switch overlay engine to CRTC1 */ 27*cc6f5542SRudolf Cornelissen NV_REG32(NV32_2FUNCSEL) &= ~0x00001000; 28*cc6f5542SRudolf Cornelissen NV_REG32(NV32_FUNCSEL) |= 0x00001000; 29*cc6f5542SRudolf Cornelissen si->overlay.crtc = 0; 30*cc6f5542SRudolf Cornelissen } 31*cc6f5542SRudolf Cornelissen return B_OK; 32*cc6f5542SRudolf Cornelissen } 33*cc6f5542SRudolf Cornelissen else 34*cc6f5542SRudolf Cornelissen { 35*cc6f5542SRudolf Cornelissen return B_ERROR; 36*cc6f5542SRudolf Cornelissen } 37*cc6f5542SRudolf Cornelissen } 38*cc6f5542SRudolf Cornelissen 39b4c44701Sshatty status_t nv_bes_init() 40b4c44701Sshatty { 41aa1e552fSshatty if (si->ps.card_arch < NV10A) 42aa1e552fSshatty { 43aa1e552fSshatty /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 44aa1e552fSshatty BESW(NV04_INTE, 0x00000000); 45aa1e552fSshatty 46aa1e552fSshatty /* setup saturation to be 'neutral' */ 47aa1e552fSshatty BESW(NV04_SAT, 0x00000000); 48aa1e552fSshatty /* setup RGB brightness to be 'neutral' */ 49aa1e552fSshatty BESW(NV04_RED_AMP, 0x00000069); 50aa1e552fSshatty BESW(NV04_GRN_AMP, 0x0000003e); 51aa1e552fSshatty BESW(NV04_BLU_AMP, 0x00000089); 52aa1e552fSshatty 53aa1e552fSshatty /* setup fifo for fetching data */ 54aa1e552fSshatty BESW(NV04_FIFOBURL, 0x00000003); 55aa1e552fSshatty BESW(NV04_FIFOTHRS, 0x00000038); 56aa1e552fSshatty 57aa1e552fSshatty /* unknown, but needed (registers only have b0 implemented) */ 58aa1e552fSshatty /* (program both buffers to prevent sync distortions) */ 59aa1e552fSshatty BESW(NV04_0OFFSET, 0x00000000); 60aa1e552fSshatty BESW(NV04_1OFFSET, 0x00000000); 61aa1e552fSshatty } 62aa1e552fSshatty else 63aa1e552fSshatty { 64aa1e552fSshatty /* >= NV10A */ 65aa1e552fSshatty 66b4c44701Sshatty /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */ 67b4c44701Sshatty BESW(NV10_INTE, 0x00000000); 68b4c44701Sshatty /* shut off GeForce4MX MPEG2 decoder */ 69b4c44701Sshatty BESW(DEC_GENCTRL, 0x00000000); 70b4c44701Sshatty /* setup BES memory-range mask */ 71b4c44701Sshatty BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1)); 72b4c44701Sshatty /* unknown, but needed */ 73aa1e552fSshatty BESW(NV10_0OFFSET, 0x00000000); 74b4c44701Sshatty 75b4c44701Sshatty /* setup brightness, contrast and saturation to be 'neutral' */ 76b4c44701Sshatty BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000)); 77b4c44701Sshatty BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000)); 78aa1e552fSshatty } 79b4c44701Sshatty 80b4c44701Sshatty return B_OK; 81b4c44701Sshatty } 82b4c44701Sshatty 8308705d96Sshatty status_t nv_configure_bes 8408705d96Sshatty (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset) 8508705d96Sshatty { 8608705d96Sshatty /* yuy2 (4:2:2) colorspace calculations */ 8708705d96Sshatty 8808705d96Sshatty /* Note: 8908705d96Sshatty * in BeOS R5.0.3 and DANO: 9008705d96Sshatty * 'ow->offset_xxx' is always 0, so not used; 9108705d96Sshatty * 'ow->width' and 'ow->height' are the output window size: does not change 9208705d96Sshatty * if window is clipping; 9308705d96Sshatty * 'ow->h_start' and 'ow->v_start' are the left-top position of the output 9408705d96Sshatty * window. These values can be negative: this means the window is clipping 9508705d96Sshatty * at the left or the top of the display, respectively. */ 9608705d96Sshatty 9708705d96Sshatty /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually 9808705d96Sshatty * displayed on screen. This is used for the 'hardware zoom' function. */ 9908705d96Sshatty 10008705d96Sshatty /* calculated BES register values */ 10105b269c0Sshatty uint32 hcoordv, vcoordv, hiscalv, hsrcstv, viscalv, a1orgv, v1srcstv; 10208705d96Sshatty /* misc used variables */ 10308705d96Sshatty uint16 temp1, temp2; 10408705d96Sshatty /* interval representation, used for scaling calculations */ 10508705d96Sshatty uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend; 10608705d96Sshatty /* inverse scaling factor, used for source positioning */ 10708705d96Sshatty uint32 ifactor; 10808705d96Sshatty /* copy of overlay view which has checked valid values */ 10908705d96Sshatty overlay_view my_ov; 11008705d96Sshatty 11108705d96Sshatty 11208705d96Sshatty /************************************************************************************** 11308705d96Sshatty *** copy, check and limit if needed the user-specified view into the intput bitmap *** 11408705d96Sshatty **************************************************************************************/ 11508705d96Sshatty my_ov = *ov; 11608705d96Sshatty /* check for valid 'coordinates' */ 11708705d96Sshatty if (my_ov.width == 0) my_ov.width++; 11808705d96Sshatty if (my_ov.height == 0) my_ov.height++; 11908705d96Sshatty if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 12008705d96Sshatty my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1); 12108705d96Sshatty if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1)) 12208705d96Sshatty my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1); 12308705d96Sshatty if (my_ov.v_start > (ob->height - 1)) 12408705d96Sshatty my_ov.v_start = (ob->height - 1); 12508705d96Sshatty if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1)) 12608705d96Sshatty my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1); 12708705d96Sshatty 12808705d96Sshatty LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n", 12908705d96Sshatty my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height)); 13008705d96Sshatty 13108705d96Sshatty /* the BES does not respect virtual_workspaces, but adheres to CRTC 13208705d96Sshatty * constraints only */ 13308705d96Sshatty crtc_hstart = si->dm.h_display_start; 134*cc6f5542SRudolf Cornelissen /* make dualhead stretch and switch mode work while we're at it.. */ 135*cc6f5542SRudolf Cornelissen if (si->overlay.crtc) 13608705d96Sshatty { 13708705d96Sshatty crtc_hstart += si->dm.timing.h_display; 13808705d96Sshatty } 139*cc6f5542SRudolf Cornelissen 14008705d96Sshatty /* horizontal end is the first position beyond the displayed range on the CRTC */ 14108705d96Sshatty crtc_hend = crtc_hstart + si->dm.timing.h_display; 14208705d96Sshatty crtc_vstart = si->dm.v_display_start; 14308705d96Sshatty /* vertical end is the first position beyond the displayed range on the CRTC */ 14408705d96Sshatty crtc_vend = crtc_vstart + si->dm.timing.v_display; 14508705d96Sshatty 14608705d96Sshatty 14708705d96Sshatty /**************************************** 14808705d96Sshatty *** setup all edges of output window *** 14908705d96Sshatty ****************************************/ 15008705d96Sshatty 15108705d96Sshatty /* setup left and right edges of output window */ 15208705d96Sshatty hcoordv = 0; 15308705d96Sshatty /* left edge coordinate of output window, must be inside desktop */ 15408705d96Sshatty /* clipping on the left side */ 15508705d96Sshatty if (ow->h_start < crtc_hstart) 15608705d96Sshatty { 15708705d96Sshatty temp1 = 0; 15808705d96Sshatty } 15908705d96Sshatty else 16008705d96Sshatty { 16108705d96Sshatty /* clipping on the right side */ 16208705d96Sshatty if (ow->h_start >= (crtc_hend - 1)) 16308705d96Sshatty { 16408705d96Sshatty /* width < 2 is not allowed */ 16508705d96Sshatty temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff; 16608705d96Sshatty } 16708705d96Sshatty else 16808705d96Sshatty /* no clipping here */ 16908705d96Sshatty { 17008705d96Sshatty temp1 = (ow->h_start - crtc_hstart) & 0x7ff; 17108705d96Sshatty } 17208705d96Sshatty } 17308705d96Sshatty hcoordv |= temp1 << 16; 17408705d96Sshatty /* right edge coordinate of output window, must be inside desktop */ 17508705d96Sshatty /* width < 2 is not allowed */ 17608705d96Sshatty if (ow->width < 2) 17708705d96Sshatty { 17808705d96Sshatty temp2 = (temp1 + 1) & 0x7ff; 17908705d96Sshatty } 18008705d96Sshatty else 18108705d96Sshatty { 18208705d96Sshatty /* clipping on the right side */ 18308705d96Sshatty if ((ow->h_start + ow->width - 1) > (crtc_hend - 1)) 18408705d96Sshatty { 18508705d96Sshatty temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff; 18608705d96Sshatty } 18708705d96Sshatty else 18808705d96Sshatty { 18908705d96Sshatty /* clipping on the left side */ 19008705d96Sshatty if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1)) 19108705d96Sshatty { 19208705d96Sshatty /* width < 2 is not allowed */ 19308705d96Sshatty temp2 = 1; 19408705d96Sshatty } 19508705d96Sshatty else 19608705d96Sshatty /* no clipping here */ 19708705d96Sshatty { 19808705d96Sshatty temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff; 19908705d96Sshatty } 20008705d96Sshatty } 20108705d96Sshatty } 20208705d96Sshatty hcoordv |= temp2 << 0; 20308705d96Sshatty LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2)); 20408705d96Sshatty 20508705d96Sshatty /* setup top and bottom edges of output window */ 20608705d96Sshatty vcoordv = 0; 20708705d96Sshatty /* top edge coordinate of output window, must be inside desktop */ 20808705d96Sshatty /* clipping on the top side */ 20908705d96Sshatty if (ow->v_start < crtc_vstart) 21008705d96Sshatty { 21108705d96Sshatty temp1 = 0; 21208705d96Sshatty } 21308705d96Sshatty else 21408705d96Sshatty { 21508705d96Sshatty /* clipping on the bottom side */ 21608705d96Sshatty if (ow->v_start >= (crtc_vend - 1)) 21708705d96Sshatty { 21808705d96Sshatty /* height < 2 is not allowed */ 21908705d96Sshatty temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff; 22008705d96Sshatty } 22108705d96Sshatty else 22208705d96Sshatty /* no clipping here */ 22308705d96Sshatty { 22408705d96Sshatty temp1 = (ow->v_start - crtc_vstart) & 0x7ff; 22508705d96Sshatty } 22608705d96Sshatty } 22708705d96Sshatty vcoordv |= temp1 << 16; 22808705d96Sshatty /* bottom edge coordinate of output window, must be inside desktop */ 22908705d96Sshatty /* height < 2 is not allowed */ 23008705d96Sshatty if (ow->height < 2) 23108705d96Sshatty { 23208705d96Sshatty temp2 = (temp1 + 1) & 0x7ff; 23308705d96Sshatty } 23408705d96Sshatty else 23508705d96Sshatty { 23608705d96Sshatty /* clipping on the bottom side */ 23708705d96Sshatty if ((ow->v_start + ow->height - 1) > (crtc_vend - 1)) 23808705d96Sshatty { 23908705d96Sshatty temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff; 24008705d96Sshatty } 24108705d96Sshatty else 24208705d96Sshatty { 24308705d96Sshatty /* clipping on the top side */ 24408705d96Sshatty if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1)) 24508705d96Sshatty { 24608705d96Sshatty /* height < 2 is not allowed */ 24708705d96Sshatty temp2 = 1; 24808705d96Sshatty } 24908705d96Sshatty else 25008705d96Sshatty /* no clipping here */ 25108705d96Sshatty { 25208705d96Sshatty temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff; 25308705d96Sshatty } 25408705d96Sshatty } 25508705d96Sshatty } 25608705d96Sshatty vcoordv |= temp2 << 0; 25708705d96Sshatty LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2)); 25808705d96Sshatty 25908705d96Sshatty 26008705d96Sshatty /********************************************* 26108705d96Sshatty *** setup horizontal scaling and clipping *** 26208705d96Sshatty *********************************************/ 26308705d96Sshatty 26408705d96Sshatty LOG(6,("Overlay: total input picture width = %d, height = %d\n", 26508705d96Sshatty (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height)); 26608705d96Sshatty LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height)); 26708705d96Sshatty 26808705d96Sshatty /* do horizontal scaling... */ 26908705d96Sshatty /* determine interval representation value, taking zoom into account */ 27008705d96Sshatty if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) 27108705d96Sshatty { 27208705d96Sshatty /* horizontal filtering is ON */ 27308705d96Sshatty if ((my_ov.width == ow->width) | (ow->width < 2)) 27408705d96Sshatty { 27508705d96Sshatty /* no horizontal scaling used, OR destination width < 2 */ 27608705d96Sshatty intrep = 0; 27708705d96Sshatty } 27808705d96Sshatty else 27908705d96Sshatty { 28008705d96Sshatty intrep = 1; 28108705d96Sshatty } 28208705d96Sshatty } 28308705d96Sshatty else 28408705d96Sshatty { 28508705d96Sshatty /* horizontal filtering is OFF */ 28608705d96Sshatty if ((ow->width < my_ov.width) & (ow->width >= 2)) 28708705d96Sshatty { 28808705d96Sshatty /* horizontal downscaling used AND destination width >= 2 */ 28908705d96Sshatty intrep = 1; 29008705d96Sshatty } 29108705d96Sshatty else 29208705d96Sshatty { 29308705d96Sshatty intrep = 0; 29408705d96Sshatty } 29508705d96Sshatty } 29608705d96Sshatty LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep)); 29708705d96Sshatty 29808705d96Sshatty /* calculate inverse horizontal scaling factor, taking zoom into account */ 29908705d96Sshatty /* standard scaling formula: */ 30008705d96Sshatty ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep); 30108705d96Sshatty 30208705d96Sshatty /* correct factor to prevent most-right visible 'line' from distorting */ 30308705d96Sshatty ifactor -= (1 << 2); 30405b269c0Sshatty hiscalv = ifactor; 30508705d96Sshatty LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor)); 30608705d96Sshatty 30708705d96Sshatty /* check scaling factor (and modify if needed) to be within scaling limits */ 308aa1e552fSshatty /* (assuming) all cards have a upscaling limit of 8.0 */ 309aa1e552fSshatty if (hiscalv < 0x00002000) 31008705d96Sshatty { 31108705d96Sshatty /* (non-inverse) factor too large, set factor to max. valid value */ 312aa1e552fSshatty hiscalv = 0x00002000; 31308705d96Sshatty LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv)); 31408705d96Sshatty } 315887d4abbSshatty switch (si->ps.card_arch) 316887d4abbSshatty { 317aa1e552fSshatty case NV04A: 318aa1e552fSshatty /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 319aa1e552fSshatty * (16bit register with 0.11 format value) */ 320aa1e552fSshatty if (hiscalv > 0x0000ffff) 321aa1e552fSshatty { 322aa1e552fSshatty /* (non-inverse) factor too small, set factor to min. valid value */ 323aa1e552fSshatty hiscalv = 0x0000ffff; 324aa1e552fSshatty LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5))); 325aa1e552fSshatty } 326aa1e552fSshatty break; 327887d4abbSshatty case NV30A: 328e8d5d47cSRudolf Cornelissen /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 329e8d5d47cSRudolf Cornelissen if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31)) 330887d4abbSshatty { 331887d4abbSshatty /* (non-inverse) factor too small, set factor to min. valid value */ 332887d4abbSshatty hiscalv = (2 << 16); 333887d4abbSshatty LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 334887d4abbSshatty } 335e8d5d47cSRudolf Cornelissen /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 336e8d5d47cSRudolf Cornelissen * So let it fall through... */ 337e8d5d47cSRudolf Cornelissen if (si->ps.card_type != NV31) break; 338887d4abbSshatty default: 339aa1e552fSshatty /* the rest has a downscaling limit of 0.125 */ 34005b269c0Sshatty if (hiscalv > (8 << 16)) 34108705d96Sshatty { 34208705d96Sshatty /* (non-inverse) factor too small, set factor to min. valid value */ 34305b269c0Sshatty hiscalv = (8 << 16); 34408705d96Sshatty LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv)); 34508705d96Sshatty } 346887d4abbSshatty break; 347887d4abbSshatty } 34808705d96Sshatty /* AND below is required by hardware */ 34908705d96Sshatty hiscalv &= 0x001ffffc; 35008705d96Sshatty 35108705d96Sshatty 35208705d96Sshatty /* do horizontal clipping... */ 35308705d96Sshatty /* Setup horizontal source start: first (sub)pixel contributing to output picture */ 35408705d96Sshatty /* Note: 35508705d96Sshatty * The method is to calculate, based on 1:1 scaling, based on the output window. 35608705d96Sshatty * After this is done, include the scaling factor so you get a value based on the input bitmap. 35708705d96Sshatty * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed. 35808705d96Sshatty * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */ 35908705d96Sshatty /* Note also: 36008705d96Sshatty * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 36108705d96Sshatty hsrcstv = 0; 36208705d96Sshatty /* check for destination horizontal clipping at left side */ 36308705d96Sshatty if (ow->h_start < crtc_hstart) 36408705d96Sshatty { 36508705d96Sshatty /* check if entire destination picture is clipping left: 36608705d96Sshatty * (2 pixels will be clamped onscreen at least) */ 36708705d96Sshatty if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1)) 36808705d96Sshatty { 36908705d96Sshatty /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */ 37008705d96Sshatty hsrcstv += (ow->width - 2); 37108705d96Sshatty } 37208705d96Sshatty else 37308705d96Sshatty { 37408705d96Sshatty /* increase 'first contributing pixel' with actual number of dest. clipping pixels */ 37508705d96Sshatty hsrcstv += (crtc_hstart - ow->h_start); 37608705d96Sshatty } 37708705d96Sshatty LOG(4,("Overlay: clipping left...\n")); 37808705d96Sshatty 37908705d96Sshatty /* The calculated value is based on scaling = 1x. So we now compensate for scaling. 38008705d96Sshatty * Note that this also already takes care of aligning the value to the BES register! */ 38108705d96Sshatty hsrcstv *= ifactor; 38208705d96Sshatty } 38308705d96Sshatty /* take zoom into account */ 38408705d96Sshatty hsrcstv += ((uint32)my_ov.h_start) << 16; 38508705d96Sshatty /* AND below required by hardware */ 38608705d96Sshatty hsrcstv &= 0x03fffffc; 38708705d96Sshatty LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536)); 38808705d96Sshatty 38908705d96Sshatty 39008705d96Sshatty /******************************************* 39108705d96Sshatty *** setup vertical scaling and clipping *** 39208705d96Sshatty *******************************************/ 39308705d96Sshatty 39408705d96Sshatty /* do vertical scaling... */ 39508705d96Sshatty /* determine interval representation value, taking zoom into account */ 39608705d96Sshatty if (ow->flags & B_OVERLAY_VERTICAL_FILTERING) 39708705d96Sshatty { 39808705d96Sshatty /* vertical filtering is ON */ 39908705d96Sshatty if ((my_ov.height == ow->height) | (ow->height < 2)) 40008705d96Sshatty { 40108705d96Sshatty /* no vertical scaling used, OR destination height < 2 */ 40208705d96Sshatty intrep = 0; 40308705d96Sshatty } 40408705d96Sshatty else 40508705d96Sshatty { 40608705d96Sshatty intrep = 1; 40708705d96Sshatty } 40808705d96Sshatty } 40908705d96Sshatty else 41008705d96Sshatty { 41108705d96Sshatty /* vertical filtering is OFF */ 41208705d96Sshatty if ((ow->height < my_ov.height) & (ow->height >= 2)) 41308705d96Sshatty { 41408705d96Sshatty /* vertical downscaling used AND destination height >= 2 */ 41508705d96Sshatty intrep = 1; 41608705d96Sshatty } 41708705d96Sshatty else 41808705d96Sshatty { 41908705d96Sshatty intrep = 0; 42008705d96Sshatty } 42108705d96Sshatty } 42208705d96Sshatty LOG(4,("Overlay: vertical interval representation value is %d\n",intrep)); 42308705d96Sshatty 42408705d96Sshatty /* calculate inverse vertical scaling factor, taking zoom into account */ 42508705d96Sshatty /* standard scaling formula: */ 42608705d96Sshatty ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep); 42708705d96Sshatty 42808705d96Sshatty /* correct factor to prevent lowest visible line from distorting */ 42908705d96Sshatty ifactor -= (1 << 2); 43008705d96Sshatty LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor)); 43108705d96Sshatty 43208705d96Sshatty /* preserve ifactor for source positioning calculations later on */ 43308705d96Sshatty viscalv = ifactor; 43408705d96Sshatty 43508705d96Sshatty /* check scaling factor (and modify if needed) to be within scaling limits */ 436aa1e552fSshatty /* (assuming) all cards have a upscaling limit of 8.0 */ 437aa1e552fSshatty if (viscalv < 0x00002000) 43808705d96Sshatty { 43908705d96Sshatty /* (non-inverse) factor too large, set factor to max. valid value */ 440aa1e552fSshatty viscalv = 0x00002000; 44108705d96Sshatty LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv)); 44208705d96Sshatty } 443887d4abbSshatty switch (si->ps.card_arch) 444887d4abbSshatty { 445aa1e552fSshatty case NV04A: 446aa1e552fSshatty /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489 447aa1e552fSshatty * (16bit register with 0.11 format value) */ 448aa1e552fSshatty if (viscalv > 0x0000ffff) 449aa1e552fSshatty { 450aa1e552fSshatty /* (non-inverse) factor too small, set factor to min. valid value */ 451aa1e552fSshatty viscalv = 0x0000ffff; 452aa1e552fSshatty LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5))); 453aa1e552fSshatty } 454aa1e552fSshatty break; 455887d4abbSshatty case NV30A: 456e8d5d47cSRudolf Cornelissen /* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */ 457e8d5d47cSRudolf Cornelissen if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31)) 458887d4abbSshatty { 459887d4abbSshatty /* (non-inverse) factor too small, set factor to min. valid value */ 460887d4abbSshatty viscalv = (2 << 16); 461887d4abbSshatty LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 462887d4abbSshatty } 463e8d5d47cSRudolf Cornelissen /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits! 464e8d5d47cSRudolf Cornelissen * So let it fall through... */ 465e8d5d47cSRudolf Cornelissen if (si->ps.card_type != NV31) break; 466887d4abbSshatty default: 467aa1e552fSshatty /* the rest has a downscaling limit of 0.125 */ 46805b269c0Sshatty if (viscalv > (8 << 16)) 46908705d96Sshatty { 47008705d96Sshatty /* (non-inverse) factor too small, set factor to min. valid value */ 47105b269c0Sshatty viscalv = (8 << 16); 47208705d96Sshatty LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv)); 47308705d96Sshatty } 474887d4abbSshatty break; 475887d4abbSshatty } 47608705d96Sshatty /* AND below is required by hardware */ 47708705d96Sshatty viscalv &= 0x001ffffc; 47808705d96Sshatty 47908705d96Sshatty 480aa1e552fSshatty /* calculate inputbitmap origin adress */ 481aa1e552fSshatty a1orgv = (uint32)((vuint32 *)ob->buffer); 482aa1e552fSshatty a1orgv -= (uint32)((vuint32 *)si->framebuffer); 483aa1e552fSshatty 48408705d96Sshatty /* do vertical clipping... */ 48505b269c0Sshatty /* Setup vertical source start: first (sub)pixel contributing to output picture. */ 48608705d96Sshatty /* Note: 48708705d96Sshatty * The method is to calculate, based on 1:1 scaling, based on the output window. 48808705d96Sshatty * 'After' this is done, include the scaling factor so you get a value based on the input bitmap. 48908705d96Sshatty * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */ 49008705d96Sshatty /* Note also: 49108705d96Sshatty * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */ 49208705d96Sshatty 49305b269c0Sshatty v1srcstv = 0; 49408705d96Sshatty /* check for destination vertical clipping at top side */ 49508705d96Sshatty if (ow->v_start < crtc_vstart) 49608705d96Sshatty { 49708705d96Sshatty /* check if entire destination picture is clipping at top: 49808705d96Sshatty * (2 pixels will be clamped onscreen at least) */ 49908705d96Sshatty if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1)) 50008705d96Sshatty { 50105b269c0Sshatty /* increase 'number of clipping pixels' with 'fixed value': 50205b269c0Sshatty * 'total height - 2' of dest. picture in pixels * inverse scaling factor */ 50305b269c0Sshatty v1srcstv = (ow->height - 2) * ifactor; 504aa1e552fSshatty /* on pre-NV10 we need to do clipping in the source 505aa1e552fSshatty * bitmap because no seperate clipping registers exist... */ 506aa1e552fSshatty if (si->ps.card_arch < NV10A) 507aa1e552fSshatty a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row); 50808705d96Sshatty } 50908705d96Sshatty else 51008705d96Sshatty { 51105b269c0Sshatty /* increase 'first contributing pixel' with: 51205b269c0Sshatty * number of destination picture clipping pixels * inverse scaling factor */ 51305b269c0Sshatty v1srcstv = (crtc_vstart - ow->v_start) * ifactor; 514aa1e552fSshatty /* on pre-NV10 we need to do clipping in the source 515aa1e552fSshatty * bitmap because no seperate clipping registers exist... */ 516aa1e552fSshatty if (si->ps.card_arch < NV10A) 517aa1e552fSshatty a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row); 51808705d96Sshatty } 51908705d96Sshatty LOG(4,("Overlay: clipping at top...\n")); 52008705d96Sshatty } 52108705d96Sshatty /* take zoom into account */ 52205b269c0Sshatty v1srcstv += (((uint32)my_ov.v_start) << 16); 523aa1e552fSshatty if (si->ps.card_arch < NV10A) 524aa1e552fSshatty { 525aa1e552fSshatty a1orgv += (my_ov.v_start * ob->bytes_per_row); 526aa1e552fSshatty LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv)); 527aa1e552fSshatty } 52805b269c0Sshatty LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536)); 52908705d96Sshatty 53005b269c0Sshatty /* AND below is probably required by hardware. */ 53108705d96Sshatty /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */ 53205b269c0Sshatty a1orgv &= 0xfffffff0; 53305b269c0Sshatty LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv)); 53408705d96Sshatty 53508705d96Sshatty 53608705d96Sshatty /***************************** 53708705d96Sshatty *** log color keying info *** 53808705d96Sshatty *****************************/ 53908705d96Sshatty 54008705d96Sshatty LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n", 54108705d96Sshatty ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value)); 54208705d96Sshatty LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n", 54308705d96Sshatty ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask)); 54408705d96Sshatty 54508705d96Sshatty 54605b269c0Sshatty /***************** 54705b269c0Sshatty *** log flags *** 54805b269c0Sshatty *****************/ 54908705d96Sshatty 55008705d96Sshatty LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags)); 55105b269c0Sshatty /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */ 55208705d96Sshatty 55308705d96Sshatty 55408705d96Sshatty /************************************* 55508705d96Sshatty *** sync to BES (Back End Scaler) *** 55608705d96Sshatty *************************************/ 55708705d96Sshatty 558b4c44701Sshatty /* Done in card hardware: 559b4c44701Sshatty * double buffered registers + trigger if programming complete feature. */ 56008705d96Sshatty 56108705d96Sshatty 56208705d96Sshatty /************************************** 56308705d96Sshatty *** actually program the registers *** 56408705d96Sshatty **************************************/ 56508705d96Sshatty 566aa1e552fSshatty if (si->ps.card_arch < NV10A) 567aa1e552fSshatty { 568aa1e552fSshatty /* unknown, but needed (otherwise high-res distortions and only half the frames */ 569aa1e552fSshatty BESW(NV04_OE_STATE, 0x00000000); 570aa1e552fSshatty /* select buffer 0 as active (b16) */ 571aa1e552fSshatty BESW(NV04_SU_STATE, 0x00000000); 572aa1e552fSshatty /* unknown (no effect?) */ 573aa1e552fSshatty BESW(NV04_RM_STATE, 0x00000000); 574aa1e552fSshatty /* setup clipped(!) buffer startadress in RAM */ 575aa1e552fSshatty /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping 576aa1e552fSshatty * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */ 577aa1e552fSshatty /* (program both buffers to prevent sync distortions) */ 578aa1e552fSshatty /* first include 'pixel precise' left clipping... (top clipping was already included) */ 579aa1e552fSshatty a1orgv += ((hsrcstv >> 16) * 2); 580aa1e552fSshatty /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */ 581aa1e552fSshatty BESW(NV04_0BUFADR, (a1orgv & ~0x03)); 582aa1e552fSshatty BESW(NV04_1BUFADR, (a1orgv & ~0x03)); 583aa1e552fSshatty /* setup buffer source pitch including slopspace (in bytes). 584aa1e552fSshatty * Note: 585aa1e552fSshatty * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */ 586aa1e552fSshatty /* (program both buffers to prevent sync distortions) */ 587aa1e552fSshatty BESW(NV04_0SRCPTCH, (ob->width * 2)); 588aa1e552fSshatty BESW(NV04_1SRCPTCH, (ob->width * 2)); 589aa1e552fSshatty /* setup output window position */ 590aa1e552fSshatty BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 591aa1e552fSshatty /* setup output window size */ 592aa1e552fSshatty BESW(NV04_DSTSIZE, ( 593aa1e552fSshatty (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 594aa1e552fSshatty ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 595aa1e552fSshatty )); 596aa1e552fSshatty /* setup horizontal and vertical scaling */ 597aa1e552fSshatty BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5))); 598aa1e552fSshatty /* enable vertical filtering (b0) */ 599aa1e552fSshatty BESW(NV04_CTRL_V, 0x00000001); 600aa1e552fSshatty /* enable horizontal filtering (no effect?) */ 601aa1e552fSshatty BESW(NV04_CTRL_H, 0x00000111); 602aa1e552fSshatty 603aa1e552fSshatty /* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */ 604aa1e552fSshatty BESW(NV04_GENCTRL, 0x00000111); 605aa1e552fSshatty /* select buffer 1 as active (b16) */ 606aa1e552fSshatty BESW(NV04_SU_STATE, 0x00010000); 607aa1e552fSshatty 608aa1e552fSshatty /************************** 609aa1e552fSshatty *** setup color keying *** 610aa1e552fSshatty **************************/ 611aa1e552fSshatty 612aa1e552fSshatty /* setup colorkeying */ 613aa1e552fSshatty switch(si->dm.space) 614aa1e552fSshatty { 615aa1e552fSshatty case B_RGB15_LITTLE: 616aa1e552fSshatty BESW(NV04_COLKEY, ( 617aa1e552fSshatty ((ow->blue.value & ow->blue.mask) << 0) | 618aa1e552fSshatty ((ow->green.value & ow->green.mask) << 5) | 619aa1e552fSshatty ((ow->red.value & ow->red.mask) << 10) | 620aa1e552fSshatty ((ow->alpha.value & ow->alpha.mask) << 15) 621aa1e552fSshatty )); 622aa1e552fSshatty break; 623aa1e552fSshatty case B_RGB16_LITTLE: 624aa1e552fSshatty BESW(NV04_COLKEY, ( 625aa1e552fSshatty ((ow->blue.value & ow->blue.mask) << 0) | 626aa1e552fSshatty ((ow->green.value & ow->green.mask) << 5) | 627aa1e552fSshatty ((ow->red.value & ow->red.mask) << 11) 628aa1e552fSshatty /* this space has no alpha bits */ 629aa1e552fSshatty )); 630aa1e552fSshatty break; 631aa1e552fSshatty case B_CMAP8: 632aa1e552fSshatty case B_RGB32_LITTLE: 633aa1e552fSshatty default: 634aa1e552fSshatty BESW(NV04_COLKEY, ( 635aa1e552fSshatty ((ow->blue.value & ow->blue.mask) << 0) | 636aa1e552fSshatty ((ow->green.value & ow->green.mask) << 8) | 637aa1e552fSshatty ((ow->red.value & ow->red.mask) << 16) | 638aa1e552fSshatty ((ow->alpha.value & ow->alpha.mask) << 24) 639aa1e552fSshatty )); 640aa1e552fSshatty break; 641aa1e552fSshatty } 642aa1e552fSshatty } 643aa1e552fSshatty else 644aa1e552fSshatty { 645aa1e552fSshatty /* >= NV10A */ 646aa1e552fSshatty 64705b269c0Sshatty /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */ 64805b269c0Sshatty BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff)); 64905b269c0Sshatty /* setup buffersize */ 650aa1e552fSshatty //fixme if needed: width must be even officially... 65105b269c0Sshatty BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width)); 65205b269c0Sshatty /* setup source pitch including slopspace (in bytes), 653aa1e552fSshatty * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */ 65405b269c0Sshatty /* Note: 65505b269c0Sshatty * source pitch granularity = 32 pixels on GeForce cards!! */ 65605b269c0Sshatty BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24))); 65705b269c0Sshatty /* setup output window position */ 65805b269c0Sshatty BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16))); 65905b269c0Sshatty /* setup output window size */ 66005b269c0Sshatty BESW(NV10_0DSTSIZE, ( 66105b269c0Sshatty (((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) | 66205b269c0Sshatty ((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1) 66305b269c0Sshatty )); 66405b269c0Sshatty /* setup horizontal scaling */ 66505b269c0Sshatty BESW(NV10_0ISCALH, (hiscalv << 4)); 66605b269c0Sshatty /* setup vertical scaling */ 66705b269c0Sshatty BESW(NV10_0ISCALV, (viscalv << 4)); 66805b269c0Sshatty /* setup (unclipped!) buffer startadress in RAM */ 66905b269c0Sshatty BESW(NV10_0BUFADR, a1orgv); 67005b269c0Sshatty /* enable BES (b0 = 0) */ 67105b269c0Sshatty BESW(NV10_GENCTRL, 0x00000000); 672aa1e552fSshatty /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */ 673aa1e552fSshatty /* This also triggers activation of programmed values (double buffered registers feature) */ 674aa1e552fSshatty BESW(NV10_BUFSEL, 0x00000001); 67508705d96Sshatty 67608705d96Sshatty /************************** 67708705d96Sshatty *** setup color keying *** 67808705d96Sshatty **************************/ 67908705d96Sshatty 68008705d96Sshatty /* setup colorkeying */ 68105b269c0Sshatty switch(si->dm.space) 68205b269c0Sshatty { 68305b269c0Sshatty case B_RGB15_LITTLE: 68405b269c0Sshatty BESW(NV10_COLKEY, ( 68505b269c0Sshatty ((ow->blue.value & ow->blue.mask) << 0) | 68605b269c0Sshatty ((ow->green.value & ow->green.mask) << 5) | 68705b269c0Sshatty ((ow->red.value & ow->red.mask) << 10) | 68805b269c0Sshatty ((ow->alpha.value & ow->alpha.mask) << 15) 68905b269c0Sshatty )); 69005b269c0Sshatty break; 69105b269c0Sshatty case B_RGB16_LITTLE: 69205b269c0Sshatty BESW(NV10_COLKEY, ( 69305b269c0Sshatty ((ow->blue.value & ow->blue.mask) << 0) | 69405b269c0Sshatty ((ow->green.value & ow->green.mask) << 5) | 69505b269c0Sshatty ((ow->red.value & ow->red.mask) << 11) 69605b269c0Sshatty /* this space has no alpha bits */ 69705b269c0Sshatty )); 69805b269c0Sshatty break; 69905b269c0Sshatty case B_CMAP8: 70005b269c0Sshatty case B_RGB32_LITTLE: 70105b269c0Sshatty default: 70205b269c0Sshatty BESW(NV10_COLKEY, ( 70305b269c0Sshatty ((ow->blue.value & ow->blue.mask) << 0) | 70405b269c0Sshatty ((ow->green.value & ow->green.mask) << 8) | 70505b269c0Sshatty ((ow->red.value & ow->red.mask) << 16) | 70605b269c0Sshatty ((ow->alpha.value & ow->alpha.mask) << 24) 70705b269c0Sshatty )); 70805b269c0Sshatty break; 70905b269c0Sshatty } 710aa1e552fSshatty } 71108705d96Sshatty 71208705d96Sshatty return B_OK; 71308705d96Sshatty } 71408705d96Sshatty 71508705d96Sshatty status_t nv_release_bes() 71608705d96Sshatty { 717aa1e552fSshatty if (si->ps.card_arch < NV10A) 718aa1e552fSshatty { 719aa1e552fSshatty /* setup BES control: disable scaler (b0 = 0) */ 720aa1e552fSshatty BESW(NV04_GENCTRL, 0x00000000); 721aa1e552fSshatty } 722aa1e552fSshatty else 723aa1e552fSshatty { 72405b269c0Sshatty /* setup BES control: disable scaler (b0 = 1) */ 72505b269c0Sshatty BESW(NV10_GENCTRL, 0x00000001); 726aa1e552fSshatty } 72708705d96Sshatty 72808705d96Sshatty return B_OK; 72908705d96Sshatty } 730