xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision 5a84c3b178b092b0c4dc61f1e946639c2783d80a)
1aa1e552fSshatty /* Nvidia TNT and GeForce Back End Scaler functions */
2*5a84c3b1SRudolf Cornelissen /* Written by Rudolf Cornelissen 05/2002-2/2004 */
308705d96Sshatty 
408705d96Sshatty #define MODULE_BIT 0x00000200
508705d96Sshatty 
608705d96Sshatty #include "nv_std.h"
708705d96Sshatty 
808705d96Sshatty //fixme: implement: (used for virtual screens!)
908705d96Sshatty //void move_overlay(uint16 hdisp_start, uint16 vdisp_start);
1008705d96Sshatty 
11cc6f5542SRudolf Cornelissen status_t nv_bes_to_crtc(uint8 crtc)
12cc6f5542SRudolf Cornelissen {
13cc6f5542SRudolf Cornelissen 	if (si->ps.secondary_head)
14cc6f5542SRudolf Cornelissen 	{
15cc6f5542SRudolf Cornelissen 		if (crtc)
16cc6f5542SRudolf Cornelissen 		{
17cc6f5542SRudolf Cornelissen 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
18cc6f5542SRudolf Cornelissen 			/* switch overlay engine to CRTC2 */
19cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
20cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
21cc6f5542SRudolf Cornelissen 			si->overlay.crtc = 1;
22cc6f5542SRudolf Cornelissen 		}
23cc6f5542SRudolf Cornelissen 		else
24cc6f5542SRudolf Cornelissen 		{
25cc6f5542SRudolf Cornelissen 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
26cc6f5542SRudolf Cornelissen 			/* switch overlay engine to CRTC1 */
27cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
28cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
29cc6f5542SRudolf Cornelissen 			si->overlay.crtc = 0;
30cc6f5542SRudolf Cornelissen 		}
31cc6f5542SRudolf Cornelissen 		return B_OK;
32cc6f5542SRudolf Cornelissen 	}
33cc6f5542SRudolf Cornelissen 	else
34cc6f5542SRudolf Cornelissen 	{
35cc6f5542SRudolf Cornelissen 		return B_ERROR;
36cc6f5542SRudolf Cornelissen 	}
37cc6f5542SRudolf Cornelissen }
38cc6f5542SRudolf Cornelissen 
39b4c44701Sshatty status_t nv_bes_init()
40b4c44701Sshatty {
41aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
42aa1e552fSshatty 	{
43aa1e552fSshatty 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
44aa1e552fSshatty 		BESW(NV04_INTE, 0x00000000);
45aa1e552fSshatty 
46aa1e552fSshatty 		/* setup saturation to be 'neutral' */
47aa1e552fSshatty 		BESW(NV04_SAT, 0x00000000);
48aa1e552fSshatty 		/* setup RGB brightness to be 'neutral' */
49aa1e552fSshatty 		BESW(NV04_RED_AMP, 0x00000069);
50aa1e552fSshatty 		BESW(NV04_GRN_AMP, 0x0000003e);
51aa1e552fSshatty 		BESW(NV04_BLU_AMP, 0x00000089);
52aa1e552fSshatty 
53aa1e552fSshatty 		/* setup fifo for fetching data */
54aa1e552fSshatty 		BESW(NV04_FIFOBURL, 0x00000003);
55aa1e552fSshatty 		BESW(NV04_FIFOTHRS, 0x00000038);
56aa1e552fSshatty 
57aa1e552fSshatty 		/* unknown, but needed (registers only have b0 implemented) */
58aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
59aa1e552fSshatty 		BESW(NV04_0OFFSET, 0x00000000);
60aa1e552fSshatty 		BESW(NV04_1OFFSET, 0x00000000);
61aa1e552fSshatty 	}
62aa1e552fSshatty 	else
63aa1e552fSshatty 	{
64aa1e552fSshatty 		/* >= NV10A */
65aa1e552fSshatty 
66b4c44701Sshatty 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
67b4c44701Sshatty 		BESW(NV10_INTE, 0x00000000);
68b4c44701Sshatty 		/* shut off GeForce4MX MPEG2 decoder */
69b4c44701Sshatty 		BESW(DEC_GENCTRL, 0x00000000);
70b4c44701Sshatty 		/* setup BES memory-range mask */
71b4c44701Sshatty 		BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1));
72b4c44701Sshatty 		/* unknown, but needed */
73aa1e552fSshatty 		BESW(NV10_0OFFSET, 0x00000000);
74b4c44701Sshatty 
75b4c44701Sshatty 		/* setup brightness, contrast and saturation to be 'neutral' */
76b4c44701Sshatty 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
77b4c44701Sshatty 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
78aa1e552fSshatty 	}
79b4c44701Sshatty 
80b4c44701Sshatty 	return B_OK;
81b4c44701Sshatty }
82b4c44701Sshatty 
8308705d96Sshatty status_t nv_configure_bes
8408705d96Sshatty 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
8508705d96Sshatty {
8608705d96Sshatty 	/* yuy2 (4:2:2) colorspace calculations */
8708705d96Sshatty 
8808705d96Sshatty 	/* Note:
8908705d96Sshatty 	 * in BeOS R5.0.3 and DANO:
9008705d96Sshatty 	 * 'ow->offset_xxx' is always 0, so not used;
9108705d96Sshatty 	 * 'ow->width' and 'ow->height' are the output window size: does not change
9208705d96Sshatty 	 * if window is clipping;
9308705d96Sshatty 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
9408705d96Sshatty 	 * window. These values can be negative: this means the window is clipping
9508705d96Sshatty 	 * at the left or the top of the display, respectively. */
9608705d96Sshatty 
9708705d96Sshatty 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
9808705d96Sshatty 	 * displayed on screen. This is used for the 'hardware zoom' function. */
9908705d96Sshatty 
10008705d96Sshatty 	/* calculated BES register values */
10105b269c0Sshatty 	uint32 	hcoordv, vcoordv, hiscalv, hsrcstv,	viscalv, a1orgv, v1srcstv;
10208705d96Sshatty 	/* misc used variables */
10308705d96Sshatty 	uint16 temp1, temp2;
10408705d96Sshatty 	/* interval representation, used for scaling calculations */
10508705d96Sshatty 	uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
10608705d96Sshatty 	/* inverse scaling factor, used for source positioning */
10708705d96Sshatty 	uint32 ifactor;
10808705d96Sshatty 	/* copy of overlay view which has checked valid values */
10908705d96Sshatty 	overlay_view my_ov;
11008705d96Sshatty 
11108705d96Sshatty 
11208705d96Sshatty 	/**************************************************************************************
11308705d96Sshatty 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
11408705d96Sshatty 	 **************************************************************************************/
11508705d96Sshatty 	my_ov = *ov;
11608705d96Sshatty 	/* check for valid 'coordinates' */
11708705d96Sshatty 	if (my_ov.width == 0) my_ov.width++;
11808705d96Sshatty 	if (my_ov.height == 0) my_ov.height++;
11908705d96Sshatty 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
12008705d96Sshatty 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
12108705d96Sshatty 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
12208705d96Sshatty 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
12308705d96Sshatty 	if (my_ov.v_start > (ob->height - 1))
12408705d96Sshatty 		my_ov.v_start = (ob->height - 1);
12508705d96Sshatty 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
12608705d96Sshatty 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
12708705d96Sshatty 
12808705d96Sshatty 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
12908705d96Sshatty 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
13008705d96Sshatty 
13108705d96Sshatty 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
13208705d96Sshatty 	 * constraints only */
13308705d96Sshatty 	crtc_hstart = si->dm.h_display_start;
134cc6f5542SRudolf Cornelissen 	/* make dualhead stretch and switch mode work while we're at it.. */
135cc6f5542SRudolf Cornelissen 	if (si->overlay.crtc)
13608705d96Sshatty 	{
13708705d96Sshatty 		crtc_hstart += si->dm.timing.h_display;
13808705d96Sshatty 	}
139cc6f5542SRudolf Cornelissen 
14008705d96Sshatty 	/* horizontal end is the first position beyond the displayed range on the CRTC */
14108705d96Sshatty 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
14208705d96Sshatty 	crtc_vstart = si->dm.v_display_start;
14308705d96Sshatty 	/* vertical end is the first position beyond the displayed range on the CRTC */
14408705d96Sshatty 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
14508705d96Sshatty 
14608705d96Sshatty 
14708705d96Sshatty 	/****************************************
14808705d96Sshatty 	 *** setup all edges of output window ***
14908705d96Sshatty 	 ****************************************/
15008705d96Sshatty 
15108705d96Sshatty 	/* setup left and right edges of output window */
15208705d96Sshatty 	hcoordv = 0;
15308705d96Sshatty 	/* left edge coordinate of output window, must be inside desktop */
15408705d96Sshatty 	/* clipping on the left side */
15508705d96Sshatty 	if (ow->h_start < crtc_hstart)
15608705d96Sshatty 	{
15708705d96Sshatty 		temp1 = 0;
15808705d96Sshatty 	}
15908705d96Sshatty 	else
16008705d96Sshatty 	{
16108705d96Sshatty 		/* clipping on the right side */
16208705d96Sshatty 		if (ow->h_start >= (crtc_hend - 1))
16308705d96Sshatty 		{
16408705d96Sshatty 			/* width < 2 is not allowed */
16508705d96Sshatty 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
16608705d96Sshatty 		}
16708705d96Sshatty 		else
16808705d96Sshatty 		/* no clipping here */
16908705d96Sshatty 		{
17008705d96Sshatty 			temp1 = (ow->h_start - crtc_hstart) & 0x7ff;
17108705d96Sshatty 		}
17208705d96Sshatty 	}
17308705d96Sshatty 	hcoordv |= temp1 << 16;
17408705d96Sshatty 	/* right edge coordinate of output window, must be inside desktop */
17508705d96Sshatty 	/* width < 2 is not allowed */
17608705d96Sshatty 	if (ow->width < 2)
17708705d96Sshatty 	{
17808705d96Sshatty 		temp2 = (temp1 + 1) & 0x7ff;
17908705d96Sshatty 	}
18008705d96Sshatty 	else
18108705d96Sshatty 	{
18208705d96Sshatty 		/* clipping on the right side */
18308705d96Sshatty 		if ((ow->h_start + ow->width - 1) > (crtc_hend - 1))
18408705d96Sshatty 		{
18508705d96Sshatty 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
18608705d96Sshatty 		}
18708705d96Sshatty 		else
18808705d96Sshatty 		{
18908705d96Sshatty 			/* clipping on the left side */
19008705d96Sshatty 			if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
19108705d96Sshatty 			{
19208705d96Sshatty 				/* width < 2 is not allowed */
19308705d96Sshatty 				temp2 = 1;
19408705d96Sshatty 			}
19508705d96Sshatty 			else
19608705d96Sshatty 			/* no clipping here */
19708705d96Sshatty 			{
19808705d96Sshatty 				temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff;
19908705d96Sshatty 			}
20008705d96Sshatty 		}
20108705d96Sshatty 	}
20208705d96Sshatty 	hcoordv |= temp2 << 0;
20308705d96Sshatty 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
20408705d96Sshatty 
20508705d96Sshatty 	/* setup top and bottom edges of output window */
20608705d96Sshatty 	vcoordv = 0;
20708705d96Sshatty 	/* top edge coordinate of output window, must be inside desktop */
20808705d96Sshatty 	/* clipping on the top side */
20908705d96Sshatty 	if (ow->v_start < crtc_vstart)
21008705d96Sshatty 	{
21108705d96Sshatty 		temp1 = 0;
21208705d96Sshatty 	}
21308705d96Sshatty 	else
21408705d96Sshatty 	{
21508705d96Sshatty 		/* clipping on the bottom side */
21608705d96Sshatty 		if (ow->v_start >= (crtc_vend - 1))
21708705d96Sshatty 		{
21808705d96Sshatty 			/* height < 2 is not allowed */
21908705d96Sshatty 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
22008705d96Sshatty 		}
22108705d96Sshatty 		else
22208705d96Sshatty 		/* no clipping here */
22308705d96Sshatty 		{
22408705d96Sshatty 			temp1 = (ow->v_start - crtc_vstart) & 0x7ff;
22508705d96Sshatty 		}
22608705d96Sshatty 	}
22708705d96Sshatty 	vcoordv |= temp1 << 16;
22808705d96Sshatty 	/* bottom edge coordinate of output window, must be inside desktop */
22908705d96Sshatty 	/* height < 2 is not allowed */
23008705d96Sshatty 	if (ow->height < 2)
23108705d96Sshatty 	{
23208705d96Sshatty 		temp2 = (temp1 + 1) & 0x7ff;
23308705d96Sshatty 	}
23408705d96Sshatty 	else
23508705d96Sshatty 	{
23608705d96Sshatty 		/* clipping on the bottom side */
23708705d96Sshatty 		if ((ow->v_start + ow->height - 1) > (crtc_vend - 1))
23808705d96Sshatty 		{
23908705d96Sshatty 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
24008705d96Sshatty 		}
24108705d96Sshatty 		else
24208705d96Sshatty 		{
24308705d96Sshatty 			/* clipping on the top side */
24408705d96Sshatty 			if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
24508705d96Sshatty 			{
24608705d96Sshatty 				/* height < 2 is not allowed */
24708705d96Sshatty 				temp2 = 1;
24808705d96Sshatty 			}
24908705d96Sshatty 			else
25008705d96Sshatty 			/* no clipping here */
25108705d96Sshatty 			{
25208705d96Sshatty 				temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff;
25308705d96Sshatty 			}
25408705d96Sshatty 		}
25508705d96Sshatty 	}
25608705d96Sshatty 	vcoordv |= temp2 << 0;
25708705d96Sshatty 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
25808705d96Sshatty 
25908705d96Sshatty 
26008705d96Sshatty 	/*********************************************
26108705d96Sshatty 	 *** setup horizontal scaling and clipping ***
26208705d96Sshatty 	 *********************************************/
26308705d96Sshatty 
26408705d96Sshatty 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
26508705d96Sshatty 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
26608705d96Sshatty 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
26708705d96Sshatty 
26808705d96Sshatty 	/* do horizontal scaling... */
26908705d96Sshatty 	/* determine interval representation value, taking zoom into account */
27008705d96Sshatty 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
27108705d96Sshatty 	{
27208705d96Sshatty 		/* horizontal filtering is ON */
27308705d96Sshatty 		if ((my_ov.width == ow->width) | (ow->width < 2))
27408705d96Sshatty 		{
27508705d96Sshatty 			/* no horizontal scaling used, OR destination width < 2 */
27608705d96Sshatty 			intrep = 0;
27708705d96Sshatty 		}
27808705d96Sshatty 		else
27908705d96Sshatty 		{
28008705d96Sshatty 			intrep = 1;
28108705d96Sshatty 		}
28208705d96Sshatty 	}
28308705d96Sshatty 	else
28408705d96Sshatty 	{
28508705d96Sshatty 		/* horizontal filtering is OFF */
28608705d96Sshatty 		if ((ow->width < my_ov.width) & (ow->width >= 2))
28708705d96Sshatty 		{
28808705d96Sshatty 			/* horizontal downscaling used AND destination width >= 2 */
28908705d96Sshatty 			intrep = 1;
29008705d96Sshatty 		}
29108705d96Sshatty 		else
29208705d96Sshatty 		{
29308705d96Sshatty 			intrep = 0;
29408705d96Sshatty 		}
29508705d96Sshatty 	}
29608705d96Sshatty 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
29708705d96Sshatty 
29808705d96Sshatty 	/* calculate inverse horizontal scaling factor, taking zoom into account */
29908705d96Sshatty 	/* standard scaling formula: */
30008705d96Sshatty 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
30108705d96Sshatty 
30208705d96Sshatty 	/* correct factor to prevent most-right visible 'line' from distorting */
30308705d96Sshatty 	ifactor -= (1 << 2);
30405b269c0Sshatty 	hiscalv = ifactor;
30508705d96Sshatty 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
30608705d96Sshatty 
30708705d96Sshatty 	/* check scaling factor (and modify if needed) to be within scaling limits */
308*5a84c3b1SRudolf Cornelissen 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
309aa1e552fSshatty 	if (hiscalv < 0x00002000)
31008705d96Sshatty 	{
31108705d96Sshatty 		/* (non-inverse) factor too large, set factor to max. valid value */
312aa1e552fSshatty 		hiscalv = 0x00002000;
31308705d96Sshatty 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
31408705d96Sshatty 	}
315887d4abbSshatty 	switch (si->ps.card_arch)
316887d4abbSshatty 	{
317aa1e552fSshatty 	case NV04A:
318aa1e552fSshatty 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
319aa1e552fSshatty 		 * (16bit register with 0.11 format value) */
320aa1e552fSshatty 		if (hiscalv > 0x0000ffff)
321aa1e552fSshatty 		{
322aa1e552fSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
323aa1e552fSshatty 			hiscalv = 0x0000ffff;
324aa1e552fSshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
325aa1e552fSshatty 		}
326aa1e552fSshatty 		break;
327887d4abbSshatty 	case NV30A:
328e8d5d47cSRudolf Cornelissen 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
329e8d5d47cSRudolf Cornelissen 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
330887d4abbSshatty 		{
331887d4abbSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
332887d4abbSshatty 			hiscalv = (2 << 16);
333887d4abbSshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
334887d4abbSshatty 		}
335e8d5d47cSRudolf Cornelissen 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
336e8d5d47cSRudolf Cornelissen 		 * So let it fall through... */
337e8d5d47cSRudolf Cornelissen 		if (si->ps.card_type != NV31) break;
338887d4abbSshatty 	default:
339aa1e552fSshatty 		/* the rest has a downscaling limit of 0.125 */
34005b269c0Sshatty 		if (hiscalv > (8 << 16))
34108705d96Sshatty 		{
34208705d96Sshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
34305b269c0Sshatty 			hiscalv = (8 << 16);
34408705d96Sshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
34508705d96Sshatty 		}
346887d4abbSshatty 		break;
347887d4abbSshatty 	}
34808705d96Sshatty 	/* AND below is required by hardware */
34908705d96Sshatty 	hiscalv &= 0x001ffffc;
35008705d96Sshatty 
35108705d96Sshatty 
35208705d96Sshatty 	/* do horizontal clipping... */
35308705d96Sshatty 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
35408705d96Sshatty 	/* Note:
35508705d96Sshatty 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
35608705d96Sshatty 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
35708705d96Sshatty 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
35808705d96Sshatty 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
35908705d96Sshatty 	/* Note also:
36008705d96Sshatty 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
36108705d96Sshatty 	hsrcstv = 0;
36208705d96Sshatty 	/* check for destination horizontal clipping at left side */
36308705d96Sshatty 	if (ow->h_start < crtc_hstart)
36408705d96Sshatty 	{
36508705d96Sshatty 		/* check if entire destination picture is clipping left:
36608705d96Sshatty 		 * (2 pixels will be clamped onscreen at least) */
36708705d96Sshatty 		if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
36808705d96Sshatty 		{
36908705d96Sshatty 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
37008705d96Sshatty 			hsrcstv += (ow->width - 2);
37108705d96Sshatty 		}
37208705d96Sshatty 		else
37308705d96Sshatty 		{
37408705d96Sshatty 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
37508705d96Sshatty 			hsrcstv += (crtc_hstart - ow->h_start);
37608705d96Sshatty 		}
37708705d96Sshatty 		LOG(4,("Overlay: clipping left...\n"));
37808705d96Sshatty 
37908705d96Sshatty 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
38008705d96Sshatty 		 * Note that this also already takes care of aligning the value to the BES register! */
38108705d96Sshatty 		hsrcstv *= ifactor;
38208705d96Sshatty 	}
38308705d96Sshatty 	/* take zoom into account */
38408705d96Sshatty 	hsrcstv += ((uint32)my_ov.h_start) << 16;
38508705d96Sshatty 	/* AND below required by hardware */
38608705d96Sshatty 	hsrcstv &= 0x03fffffc;
38708705d96Sshatty 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
38808705d96Sshatty 
38908705d96Sshatty 
39008705d96Sshatty 	/*******************************************
39108705d96Sshatty 	 *** setup vertical scaling and clipping ***
39208705d96Sshatty 	 *******************************************/
39308705d96Sshatty 
39408705d96Sshatty 	/* do vertical scaling... */
39508705d96Sshatty 	/* determine interval representation value, taking zoom into account */
39608705d96Sshatty 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
39708705d96Sshatty 	{
39808705d96Sshatty 		/* vertical filtering is ON */
39908705d96Sshatty 		if ((my_ov.height == ow->height) | (ow->height < 2))
40008705d96Sshatty 		{
40108705d96Sshatty 			/* no vertical scaling used, OR destination height < 2 */
40208705d96Sshatty 			intrep = 0;
40308705d96Sshatty 		}
40408705d96Sshatty 		else
40508705d96Sshatty 		{
40608705d96Sshatty 			intrep = 1;
40708705d96Sshatty 		}
40808705d96Sshatty 	}
40908705d96Sshatty 	else
41008705d96Sshatty 	{
41108705d96Sshatty 		/* vertical filtering is OFF */
41208705d96Sshatty 		if ((ow->height < my_ov.height) & (ow->height >= 2))
41308705d96Sshatty 		{
41408705d96Sshatty 			/* vertical downscaling used AND destination height >= 2 */
41508705d96Sshatty 			intrep = 1;
41608705d96Sshatty 		}
41708705d96Sshatty 		else
41808705d96Sshatty 		{
41908705d96Sshatty 			intrep = 0;
42008705d96Sshatty 		}
42108705d96Sshatty 	}
42208705d96Sshatty 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
42308705d96Sshatty 
42408705d96Sshatty 	/* calculate inverse vertical scaling factor, taking zoom into account */
42508705d96Sshatty 	/* standard scaling formula: */
42608705d96Sshatty 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
42708705d96Sshatty 
42808705d96Sshatty 	/* correct factor to prevent lowest visible line from distorting */
42908705d96Sshatty 	ifactor -= (1 << 2);
43008705d96Sshatty 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
43108705d96Sshatty 
43208705d96Sshatty 	/* preserve ifactor for source positioning calculations later on */
43308705d96Sshatty 	viscalv = ifactor;
43408705d96Sshatty 
43508705d96Sshatty 	/* check scaling factor (and modify if needed) to be within scaling limits */
436*5a84c3b1SRudolf Cornelissen 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
437aa1e552fSshatty 	if (viscalv < 0x00002000)
43808705d96Sshatty 	{
43908705d96Sshatty 		/* (non-inverse) factor too large, set factor to max. valid value */
440aa1e552fSshatty 		viscalv = 0x00002000;
44108705d96Sshatty 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
44208705d96Sshatty 	}
443887d4abbSshatty 	switch (si->ps.card_arch)
444887d4abbSshatty 	{
445aa1e552fSshatty 	case NV04A:
446aa1e552fSshatty 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
447aa1e552fSshatty 		 * (16bit register with 0.11 format value) */
448aa1e552fSshatty 		if (viscalv > 0x0000ffff)
449aa1e552fSshatty 		{
450aa1e552fSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
451aa1e552fSshatty 			viscalv = 0x0000ffff;
452aa1e552fSshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
453aa1e552fSshatty 		}
454aa1e552fSshatty 		break;
455887d4abbSshatty 	case NV30A:
456e8d5d47cSRudolf Cornelissen 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
457e8d5d47cSRudolf Cornelissen 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
458887d4abbSshatty 		{
459887d4abbSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
460887d4abbSshatty 			viscalv = (2 << 16);
461887d4abbSshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
462887d4abbSshatty 		}
463e8d5d47cSRudolf Cornelissen 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
464e8d5d47cSRudolf Cornelissen 		 * So let it fall through... */
465e8d5d47cSRudolf Cornelissen 		if (si->ps.card_type != NV31) break;
466887d4abbSshatty 	default:
467aa1e552fSshatty 		/* the rest has a downscaling limit of 0.125 */
46805b269c0Sshatty 		if (viscalv > (8 << 16))
46908705d96Sshatty 		{
47008705d96Sshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
47105b269c0Sshatty 			viscalv = (8 << 16);
47208705d96Sshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
47308705d96Sshatty 		}
474887d4abbSshatty 		break;
475887d4abbSshatty 	}
47608705d96Sshatty 	/* AND below is required by hardware */
47708705d96Sshatty 	viscalv &= 0x001ffffc;
47808705d96Sshatty 
47908705d96Sshatty 
480aa1e552fSshatty 	/* calculate inputbitmap origin adress */
481aa1e552fSshatty 	a1orgv = (uint32)((vuint32 *)ob->buffer);
482aa1e552fSshatty 	a1orgv -= (uint32)((vuint32 *)si->framebuffer);
483aa1e552fSshatty 
48408705d96Sshatty 	/* do vertical clipping... */
48505b269c0Sshatty 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
48608705d96Sshatty 	/* Note:
48708705d96Sshatty 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
48808705d96Sshatty 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
48908705d96Sshatty 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
49008705d96Sshatty 	/* Note also:
49108705d96Sshatty 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
49208705d96Sshatty 
49305b269c0Sshatty 	v1srcstv = 0;
49408705d96Sshatty 	/* check for destination vertical clipping at top side */
49508705d96Sshatty 	if (ow->v_start < crtc_vstart)
49608705d96Sshatty 	{
49708705d96Sshatty 		/* check if entire destination picture is clipping at top:
49808705d96Sshatty 		 * (2 pixels will be clamped onscreen at least) */
49908705d96Sshatty 		if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
50008705d96Sshatty 		{
50105b269c0Sshatty 			/* increase 'number of clipping pixels' with 'fixed value':
50205b269c0Sshatty 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
50305b269c0Sshatty 			v1srcstv = (ow->height - 2) * ifactor;
504aa1e552fSshatty 			/* on pre-NV10 we need to do clipping in the source
505aa1e552fSshatty 			 * bitmap because no seperate clipping registers exist... */
506aa1e552fSshatty 			if (si->ps.card_arch < NV10A)
507aa1e552fSshatty 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
50808705d96Sshatty 		}
50908705d96Sshatty 		else
51008705d96Sshatty 		{
51105b269c0Sshatty 			/* increase 'first contributing pixel' with:
51205b269c0Sshatty 			 * number of destination picture clipping pixels * inverse scaling factor */
51305b269c0Sshatty 			v1srcstv = (crtc_vstart - ow->v_start) * ifactor;
514aa1e552fSshatty 			/* on pre-NV10 we need to do clipping in the source
515aa1e552fSshatty 			 * bitmap because no seperate clipping registers exist... */
516aa1e552fSshatty 			if (si->ps.card_arch < NV10A)
517aa1e552fSshatty 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
51808705d96Sshatty 		}
51908705d96Sshatty 		LOG(4,("Overlay: clipping at top...\n"));
52008705d96Sshatty 	}
52108705d96Sshatty 	/* take zoom into account */
52205b269c0Sshatty 	v1srcstv += (((uint32)my_ov.v_start) << 16);
523aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
524aa1e552fSshatty 	{
525aa1e552fSshatty 		a1orgv += (my_ov.v_start * ob->bytes_per_row);
526aa1e552fSshatty 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv));
527aa1e552fSshatty 	}
52805b269c0Sshatty 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536));
52908705d96Sshatty 
53005b269c0Sshatty 	/* AND below is probably required by hardware. */
53108705d96Sshatty 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
53205b269c0Sshatty 	a1orgv &= 0xfffffff0;
53305b269c0Sshatty 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv));
53408705d96Sshatty 
53508705d96Sshatty 
53608705d96Sshatty 	/*****************************
53708705d96Sshatty 	 *** log color keying info ***
53808705d96Sshatty 	 *****************************/
53908705d96Sshatty 
54008705d96Sshatty 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
54108705d96Sshatty 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
54208705d96Sshatty 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
54308705d96Sshatty 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
54408705d96Sshatty 
54508705d96Sshatty 
54605b269c0Sshatty 	/*****************
54705b269c0Sshatty 	 *** log flags ***
54805b269c0Sshatty 	 *****************/
54908705d96Sshatty 
55008705d96Sshatty 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
55105b269c0Sshatty 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
55208705d96Sshatty 
55308705d96Sshatty 
55408705d96Sshatty 	/*************************************
55508705d96Sshatty 	 *** sync to BES (Back End Scaler) ***
55608705d96Sshatty 	 *************************************/
55708705d96Sshatty 
558b4c44701Sshatty 	/* Done in card hardware:
559b4c44701Sshatty 	 * double buffered registers + trigger if programming complete feature. */
56008705d96Sshatty 
56108705d96Sshatty 
56208705d96Sshatty 	/**************************************
56308705d96Sshatty 	 *** actually program the registers ***
56408705d96Sshatty 	 **************************************/
56508705d96Sshatty 
566aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
567aa1e552fSshatty 	{
568aa1e552fSshatty 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
569aa1e552fSshatty 		BESW(NV04_OE_STATE, 0x00000000);
570aa1e552fSshatty 		/* select buffer 0 as active (b16) */
571aa1e552fSshatty 		BESW(NV04_SU_STATE, 0x00000000);
572aa1e552fSshatty 		/* unknown (no effect?) */
573aa1e552fSshatty 		BESW(NV04_RM_STATE, 0x00000000);
574aa1e552fSshatty 		/* setup clipped(!) buffer startadress in RAM */
575aa1e552fSshatty 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
576aa1e552fSshatty 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
577aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
578aa1e552fSshatty 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
579aa1e552fSshatty 		a1orgv += ((hsrcstv >> 16) * 2);
580aa1e552fSshatty 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
581aa1e552fSshatty 		BESW(NV04_0BUFADR, (a1orgv & ~0x03));
582aa1e552fSshatty 		BESW(NV04_1BUFADR, (a1orgv & ~0x03));
583aa1e552fSshatty 		/* setup buffer source pitch including slopspace (in bytes).
584aa1e552fSshatty 		 * Note:
585aa1e552fSshatty 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
586aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
587aa1e552fSshatty 		BESW(NV04_0SRCPTCH, (ob->width * 2));
588aa1e552fSshatty 		BESW(NV04_1SRCPTCH, (ob->width * 2));
589aa1e552fSshatty 		/* setup output window position */
590aa1e552fSshatty 		BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
591aa1e552fSshatty 		/* setup output window size */
592aa1e552fSshatty 		BESW(NV04_DSTSIZE, (
593aa1e552fSshatty 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
594aa1e552fSshatty 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
595aa1e552fSshatty 			));
596aa1e552fSshatty 		/* setup horizontal and vertical scaling */
597aa1e552fSshatty 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
598aa1e552fSshatty 		/* enable vertical filtering (b0) */
599aa1e552fSshatty 		BESW(NV04_CTRL_V, 0x00000001);
600aa1e552fSshatty 		/* enable horizontal filtering (no effect?) */
601aa1e552fSshatty 		BESW(NV04_CTRL_H, 0x00000111);
602aa1e552fSshatty 
603aa1e552fSshatty 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
604aa1e552fSshatty 		BESW(NV04_GENCTRL, 0x00000111);
605aa1e552fSshatty 		/* select buffer 1 as active (b16) */
606aa1e552fSshatty 		BESW(NV04_SU_STATE, 0x00010000);
607aa1e552fSshatty 
608aa1e552fSshatty 		/**************************
609aa1e552fSshatty 		 *** setup color keying ***
610aa1e552fSshatty 		 **************************/
611aa1e552fSshatty 
612aa1e552fSshatty 		/* setup colorkeying */
613aa1e552fSshatty 		switch(si->dm.space)
614aa1e552fSshatty 		{
615aa1e552fSshatty 		case B_RGB15_LITTLE:
616aa1e552fSshatty 			BESW(NV04_COLKEY, (
617aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
618aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 5) |
619aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 10)    |
620aa1e552fSshatty 				((ow->alpha.value & ow->alpha.mask) << 15)
621aa1e552fSshatty 				));
622aa1e552fSshatty 			break;
623aa1e552fSshatty 		case B_RGB16_LITTLE:
624aa1e552fSshatty 			BESW(NV04_COLKEY, (
625aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
626aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 5) |
627aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 11)
628aa1e552fSshatty 				/* this space has no alpha bits */
629aa1e552fSshatty 				));
630aa1e552fSshatty 			break;
631aa1e552fSshatty 		case B_CMAP8:
632aa1e552fSshatty 		case B_RGB32_LITTLE:
633aa1e552fSshatty 		default:
634aa1e552fSshatty 			BESW(NV04_COLKEY, (
635aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
636aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 8) |
637aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 16)    |
638aa1e552fSshatty 				((ow->alpha.value & ow->alpha.mask) << 24)
639aa1e552fSshatty 				));
640aa1e552fSshatty 			break;
641aa1e552fSshatty 		}
642aa1e552fSshatty 	}
643aa1e552fSshatty 	else
644aa1e552fSshatty 	{
645aa1e552fSshatty 		/* >= NV10A */
646aa1e552fSshatty 
64705b269c0Sshatty 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
64805b269c0Sshatty 		BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff));
64905b269c0Sshatty 		/* setup buffersize */
650aa1e552fSshatty 		//fixme if needed: width must be even officially...
65105b269c0Sshatty 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
65205b269c0Sshatty 		/* setup source pitch including slopspace (in bytes),
653aa1e552fSshatty 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
65405b269c0Sshatty 		/* Note:
65505b269c0Sshatty 		 * source pitch granularity = 32 pixels on GeForce cards!! */
65605b269c0Sshatty 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
65705b269c0Sshatty 		/* setup output window position */
65805b269c0Sshatty 		BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
65905b269c0Sshatty 		/* setup output window size */
66005b269c0Sshatty 		BESW(NV10_0DSTSIZE, (
66105b269c0Sshatty 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
66205b269c0Sshatty 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
66305b269c0Sshatty 			));
66405b269c0Sshatty 		/* setup horizontal scaling */
66505b269c0Sshatty 		BESW(NV10_0ISCALH, (hiscalv << 4));
66605b269c0Sshatty 		/* setup vertical scaling */
66705b269c0Sshatty 		BESW(NV10_0ISCALV, (viscalv << 4));
66805b269c0Sshatty 		/* setup (unclipped!) buffer startadress in RAM */
66905b269c0Sshatty 		BESW(NV10_0BUFADR, a1orgv);
67005b269c0Sshatty 		/* enable BES (b0 = 0) */
67105b269c0Sshatty 		BESW(NV10_GENCTRL, 0x00000000);
672aa1e552fSshatty 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
673aa1e552fSshatty 		/* This also triggers activation of programmed values (double buffered registers feature) */
674aa1e552fSshatty 		BESW(NV10_BUFSEL, 0x00000001);
67508705d96Sshatty 
67608705d96Sshatty 		/**************************
67708705d96Sshatty 		 *** setup color keying ***
67808705d96Sshatty 		 **************************/
67908705d96Sshatty 
68008705d96Sshatty 		/* setup colorkeying */
68105b269c0Sshatty 		switch(si->dm.space)
68205b269c0Sshatty 		{
68305b269c0Sshatty 		case B_RGB15_LITTLE:
68405b269c0Sshatty 			BESW(NV10_COLKEY, (
68505b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
68605b269c0Sshatty 				((ow->green.value & ow->green.mask) << 5) |
68705b269c0Sshatty 				((ow->red.value & ow->red.mask) << 10)    |
68805b269c0Sshatty 				((ow->alpha.value & ow->alpha.mask) << 15)
68905b269c0Sshatty 				));
69005b269c0Sshatty 			break;
69105b269c0Sshatty 		case B_RGB16_LITTLE:
69205b269c0Sshatty 			BESW(NV10_COLKEY, (
69305b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
69405b269c0Sshatty 				((ow->green.value & ow->green.mask) << 5) |
69505b269c0Sshatty 				((ow->red.value & ow->red.mask) << 11)
69605b269c0Sshatty 				/* this space has no alpha bits */
69705b269c0Sshatty 				));
69805b269c0Sshatty 			break;
69905b269c0Sshatty 		case B_CMAP8:
70005b269c0Sshatty 		case B_RGB32_LITTLE:
70105b269c0Sshatty 		default:
70205b269c0Sshatty 			BESW(NV10_COLKEY, (
70305b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
70405b269c0Sshatty 				((ow->green.value & ow->green.mask) << 8) |
70505b269c0Sshatty 				((ow->red.value & ow->red.mask) << 16)    |
70605b269c0Sshatty 				((ow->alpha.value & ow->alpha.mask) << 24)
70705b269c0Sshatty 				));
70805b269c0Sshatty 			break;
70905b269c0Sshatty 		}
710aa1e552fSshatty 	}
71108705d96Sshatty 
71208705d96Sshatty 	return B_OK;
71308705d96Sshatty }
71408705d96Sshatty 
71508705d96Sshatty status_t nv_release_bes()
71608705d96Sshatty {
717aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
718aa1e552fSshatty 	{
719aa1e552fSshatty 		/* setup BES control: disable scaler (b0 = 0) */
720aa1e552fSshatty 		BESW(NV04_GENCTRL, 0x00000000);
721aa1e552fSshatty 	}
722aa1e552fSshatty 	else
723aa1e552fSshatty 	{
72405b269c0Sshatty 		/* setup BES control: disable scaler (b0 = 1) */
72505b269c0Sshatty 		BESW(NV10_GENCTRL, 0x00000001);
726aa1e552fSshatty 	}
72708705d96Sshatty 
72808705d96Sshatty 	return B_OK;
72908705d96Sshatty }
730