xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision dc234e798d854e75a197ee9fc85c6a1eede67fef)
1aa1e552fSshatty /* Nvidia TNT and GeForce Back End Scaler functions */
2ce6115a1SRudolf Cornelissen /* Written by Rudolf Cornelissen 05/2002-5/2009 */
308705d96Sshatty 
408705d96Sshatty #define MODULE_BIT 0x00000200
508705d96Sshatty 
608705d96Sshatty #include "nv_std.h"
708705d96Sshatty 
804e6b7ceSRudolf Cornelissen typedef struct move_overlay_info move_overlay_info;
904e6b7ceSRudolf Cornelissen 
1004e6b7ceSRudolf Cornelissen struct move_overlay_info
1104e6b7ceSRudolf Cornelissen {
1204e6b7ceSRudolf Cornelissen 	uint32 hcoordv;		/* left and right edges of video output window */
1304e6b7ceSRudolf Cornelissen 	uint32 vcoordv;		/* top and bottom edges of video output window */
1404e6b7ceSRudolf Cornelissen 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
1504e6b7ceSRudolf Cornelissen 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16*dc234e79SAdrien Destugues 	uintptr_t a1orgv;		/* alternate source clipping via startadress of source buffer */
1704e6b7ceSRudolf Cornelissen };
1804e6b7ceSRudolf Cornelissen 
1904e6b7ceSRudolf Cornelissen static void nv_bes_calc_move_overlay(move_overlay_info *moi);
2004e6b7ceSRudolf Cornelissen static void nv_bes_program_move_overlay(move_overlay_info moi);
2104e6b7ceSRudolf Cornelissen 
22ac83e70cSRudolf Cornelissen /* move the overlay output window in virtualscreens */
23ac83e70cSRudolf Cornelissen /* Note:
24ac83e70cSRudolf Cornelissen  * si->dm.h_display_start and si->dm.v_display_start determine where the new
25ac83e70cSRudolf Cornelissen  * output window is located! */
nv_bes_move_overlay()26ac83e70cSRudolf Cornelissen void nv_bes_move_overlay()
27ac83e70cSRudolf Cornelissen {
2804e6b7ceSRudolf Cornelissen 	move_overlay_info moi;
2904e6b7ceSRudolf Cornelissen 
3004e6b7ceSRudolf Cornelissen 	/* abort if overlay is not active */
3104e6b7ceSRudolf Cornelissen 	if (!si->overlay.active) return;
3204e6b7ceSRudolf Cornelissen 
3304e6b7ceSRudolf Cornelissen 	nv_bes_calc_move_overlay(&moi);
3404e6b7ceSRudolf Cornelissen 	nv_bes_program_move_overlay(moi);
3504e6b7ceSRudolf Cornelissen }
3604e6b7ceSRudolf Cornelissen 
nv_bes_calc_move_overlay(move_overlay_info * moi)3704e6b7ceSRudolf Cornelissen static void nv_bes_calc_move_overlay(move_overlay_info *moi)
3804e6b7ceSRudolf Cornelissen {
39ac83e70cSRudolf Cornelissen 	/* misc used variables */
40ac83e70cSRudolf Cornelissen 	uint16 temp1, temp2;
41ac83e70cSRudolf Cornelissen 	/* visible screen window in virtual workspaces */
42ac83e70cSRudolf Cornelissen 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43ac83e70cSRudolf Cornelissen 
44979945aeSRudolf Cornelissen 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45979945aeSRudolf Cornelissen 	if (si->ps.secondary_head)
46979945aeSRudolf Cornelissen 	{
47979945aeSRudolf Cornelissen 		switch (si->dm.flags & DUALHEAD_BITS)
48979945aeSRudolf Cornelissen 		{
49979945aeSRudolf Cornelissen 		case DUALHEAD_ON:
50979945aeSRudolf Cornelissen 		case DUALHEAD_SWITCH:
51979945aeSRudolf Cornelissen 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52979945aeSRudolf Cornelissen 					(si->dm.h_display_start + si->dm.timing.h_display))
53979945aeSRudolf Cornelissen 				nv_bes_to_crtc(si->crtc_switch_mode);
54979945aeSRudolf Cornelissen 			else
55979945aeSRudolf Cornelissen 				nv_bes_to_crtc(!si->crtc_switch_mode);
56979945aeSRudolf Cornelissen 			break;
57979945aeSRudolf Cornelissen 		default:
58979945aeSRudolf Cornelissen 				nv_bes_to_crtc(si->crtc_switch_mode);
59979945aeSRudolf Cornelissen 			break;
60979945aeSRudolf Cornelissen 		}
61979945aeSRudolf Cornelissen 	}
62979945aeSRudolf Cornelissen 
63ac83e70cSRudolf Cornelissen 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64ac83e70cSRudolf Cornelissen 	 * constraints only */
65ac83e70cSRudolf Cornelissen 	crtc_hstart = si->dm.h_display_start;
66ac83e70cSRudolf Cornelissen 	/* make dualhead stretch and switch mode work while we're at it.. */
67ac83e70cSRudolf Cornelissen 	if (si->overlay.crtc)
68ac83e70cSRudolf Cornelissen 	{
69ac83e70cSRudolf Cornelissen 		crtc_hstart += si->dm.timing.h_display;
70ac83e70cSRudolf Cornelissen 	}
71ac83e70cSRudolf Cornelissen 
72ac83e70cSRudolf Cornelissen 	/* horizontal end is the first position beyond the displayed range on the CRTC */
73ac83e70cSRudolf Cornelissen 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74ac83e70cSRudolf Cornelissen 	crtc_vstart = si->dm.v_display_start;
75ac83e70cSRudolf Cornelissen 	/* vertical end is the first position beyond the displayed range on the CRTC */
76ac83e70cSRudolf Cornelissen 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77ac83e70cSRudolf Cornelissen 
78ac83e70cSRudolf Cornelissen 
79ac83e70cSRudolf Cornelissen 	/****************************************
80ac83e70cSRudolf Cornelissen 	 *** setup all edges of output window ***
81ac83e70cSRudolf Cornelissen 	 ****************************************/
82ac83e70cSRudolf Cornelissen 
83ac83e70cSRudolf Cornelissen 	/* setup left and right edges of output window */
8404e6b7ceSRudolf Cornelissen 	moi->hcoordv = 0;
85ac83e70cSRudolf Cornelissen 	/* left edge coordinate of output window, must be inside desktop */
86ac83e70cSRudolf Cornelissen 	/* clipping on the left side */
87ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.h_start < crtc_hstart)
88ac83e70cSRudolf Cornelissen 	{
89ac83e70cSRudolf Cornelissen 		temp1 = 0;
90ac83e70cSRudolf Cornelissen 	}
91ac83e70cSRudolf Cornelissen 	else
92ac83e70cSRudolf Cornelissen 	{
93ac83e70cSRudolf Cornelissen 		/* clipping on the right side */
94ac83e70cSRudolf Cornelissen 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95ac83e70cSRudolf Cornelissen 		{
96ac83e70cSRudolf Cornelissen 			/* width < 2 is not allowed */
97ac83e70cSRudolf Cornelissen 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98ac83e70cSRudolf Cornelissen 		}
99ac83e70cSRudolf Cornelissen 		else
100ac83e70cSRudolf Cornelissen 		/* no clipping here */
101ac83e70cSRudolf Cornelissen 		{
102ac83e70cSRudolf Cornelissen 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103ac83e70cSRudolf Cornelissen 		}
104ac83e70cSRudolf Cornelissen 	}
10504e6b7ceSRudolf Cornelissen 	moi->hcoordv |= temp1 << 16;
106ac83e70cSRudolf Cornelissen 	/* right edge coordinate of output window, must be inside desktop */
107ac83e70cSRudolf Cornelissen 	/* width < 2 is not allowed */
108ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.width < 2)
109ac83e70cSRudolf Cornelissen 	{
110ac83e70cSRudolf Cornelissen 		temp2 = (temp1 + 1) & 0x7ff;
111ac83e70cSRudolf Cornelissen 	}
112ac83e70cSRudolf Cornelissen 	else
113ac83e70cSRudolf Cornelissen 	{
114ac83e70cSRudolf Cornelissen 		/* clipping on the right side */
115ac83e70cSRudolf Cornelissen 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116ac83e70cSRudolf Cornelissen 		{
117ac83e70cSRudolf Cornelissen 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118ac83e70cSRudolf Cornelissen 		}
119ac83e70cSRudolf Cornelissen 		else
120ac83e70cSRudolf Cornelissen 		{
121ac83e70cSRudolf Cornelissen 			/* clipping on the left side */
122ac83e70cSRudolf Cornelissen 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123ac83e70cSRudolf Cornelissen 			{
124ac83e70cSRudolf Cornelissen 				/* width < 2 is not allowed */
125ac83e70cSRudolf Cornelissen 				temp2 = 1;
126ac83e70cSRudolf Cornelissen 			}
127ac83e70cSRudolf Cornelissen 			else
128ac83e70cSRudolf Cornelissen 			/* no clipping here */
129ac83e70cSRudolf Cornelissen 			{
130ac83e70cSRudolf Cornelissen 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131ac83e70cSRudolf Cornelissen 			}
132ac83e70cSRudolf Cornelissen 		}
133ac83e70cSRudolf Cornelissen 	}
13404e6b7ceSRudolf Cornelissen 	moi->hcoordv |= temp2 << 0;
135ac83e70cSRudolf Cornelissen 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136ac83e70cSRudolf Cornelissen 
137ac83e70cSRudolf Cornelissen 	/* setup top and bottom edges of output window */
13804e6b7ceSRudolf Cornelissen 	moi->vcoordv = 0;
139ac83e70cSRudolf Cornelissen 	/* top edge coordinate of output window, must be inside desktop */
140ac83e70cSRudolf Cornelissen 	/* clipping on the top side */
141ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.v_start < crtc_vstart)
142ac83e70cSRudolf Cornelissen 	{
143ac83e70cSRudolf Cornelissen 		temp1 = 0;
144ac83e70cSRudolf Cornelissen 	}
145ac83e70cSRudolf Cornelissen 	else
146ac83e70cSRudolf Cornelissen 	{
147ac83e70cSRudolf Cornelissen 		/* clipping on the bottom side */
148ac83e70cSRudolf Cornelissen 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149ac83e70cSRudolf Cornelissen 		{
150ac83e70cSRudolf Cornelissen 			/* height < 2 is not allowed */
151ac83e70cSRudolf Cornelissen 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152ac83e70cSRudolf Cornelissen 		}
153ac83e70cSRudolf Cornelissen 		else
154ac83e70cSRudolf Cornelissen 		/* no clipping here */
155ac83e70cSRudolf Cornelissen 		{
156ac83e70cSRudolf Cornelissen 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157ac83e70cSRudolf Cornelissen 		}
158ac83e70cSRudolf Cornelissen 	}
15904e6b7ceSRudolf Cornelissen 	moi->vcoordv |= temp1 << 16;
160ac83e70cSRudolf Cornelissen 	/* bottom edge coordinate of output window, must be inside desktop */
161ac83e70cSRudolf Cornelissen 	/* height < 2 is not allowed */
162ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.height < 2)
163ac83e70cSRudolf Cornelissen 	{
164ac83e70cSRudolf Cornelissen 		temp2 = (temp1 + 1) & 0x7ff;
165ac83e70cSRudolf Cornelissen 	}
166ac83e70cSRudolf Cornelissen 	else
167ac83e70cSRudolf Cornelissen 	{
168ac83e70cSRudolf Cornelissen 		/* clipping on the bottom side */
169ac83e70cSRudolf Cornelissen 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170ac83e70cSRudolf Cornelissen 		{
171ac83e70cSRudolf Cornelissen 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172ac83e70cSRudolf Cornelissen 		}
173ac83e70cSRudolf Cornelissen 		else
174ac83e70cSRudolf Cornelissen 		{
175ac83e70cSRudolf Cornelissen 			/* clipping on the top side */
176ac83e70cSRudolf Cornelissen 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177ac83e70cSRudolf Cornelissen 			{
178ac83e70cSRudolf Cornelissen 				/* height < 2 is not allowed */
179ac83e70cSRudolf Cornelissen 				temp2 = 1;
180ac83e70cSRudolf Cornelissen 			}
181ac83e70cSRudolf Cornelissen 			else
182ac83e70cSRudolf Cornelissen 			/* no clipping here */
183ac83e70cSRudolf Cornelissen 			{
184ac83e70cSRudolf Cornelissen 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185ac83e70cSRudolf Cornelissen 			}
186ac83e70cSRudolf Cornelissen 		}
187ac83e70cSRudolf Cornelissen 	}
18804e6b7ceSRudolf Cornelissen 	moi->vcoordv |= temp2 << 0;
189ac83e70cSRudolf Cornelissen 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190ac83e70cSRudolf Cornelissen 
191ac83e70cSRudolf Cornelissen 
192ac83e70cSRudolf Cornelissen 	/*********************************
193ac83e70cSRudolf Cornelissen 	 *** setup horizontal clipping ***
194ac83e70cSRudolf Cornelissen 	 *********************************/
195ac83e70cSRudolf Cornelissen 
196ac83e70cSRudolf Cornelissen 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197ac83e70cSRudolf Cornelissen 	/* Note:
198ac83e70cSRudolf Cornelissen 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199ac83e70cSRudolf Cornelissen 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200ac83e70cSRudolf Cornelissen 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201ac83e70cSRudolf Cornelissen 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202ac83e70cSRudolf Cornelissen 	/* Note also:
203ac83e70cSRudolf Cornelissen 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
20404e6b7ceSRudolf Cornelissen 	moi->hsrcstv = 0;
205ac83e70cSRudolf Cornelissen 	/* check for destination horizontal clipping at left side */
206ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.h_start < crtc_hstart)
207ac83e70cSRudolf Cornelissen 	{
208ac83e70cSRudolf Cornelissen 		/* check if entire destination picture is clipping left:
209ac83e70cSRudolf Cornelissen 		 * (2 pixels will be clamped onscreen at least) */
210ac83e70cSRudolf Cornelissen 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211ac83e70cSRudolf Cornelissen 		{
212ac83e70cSRudolf Cornelissen 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
21304e6b7ceSRudolf Cornelissen 			moi->hsrcstv += (si->overlay.ow.width - 2);
214ac83e70cSRudolf Cornelissen 		}
215ac83e70cSRudolf Cornelissen 		else
216ac83e70cSRudolf Cornelissen 		{
217ac83e70cSRudolf Cornelissen 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
21804e6b7ceSRudolf Cornelissen 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219ac83e70cSRudolf Cornelissen 		}
220ac83e70cSRudolf Cornelissen 		LOG(4,("Overlay: clipping left...\n"));
221ac83e70cSRudolf Cornelissen 
222ac83e70cSRudolf Cornelissen 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223ac83e70cSRudolf Cornelissen 		 * Note that this also already takes care of aligning the value to the BES register! */
22404e6b7ceSRudolf Cornelissen 		moi->hsrcstv *= si->overlay.h_ifactor;
225ac83e70cSRudolf Cornelissen 	}
226ac83e70cSRudolf Cornelissen 	/* take zoom into account */
22704e6b7ceSRudolf Cornelissen 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228ce6115a1SRudolf Cornelissen 	/* AND below required by hardware (> 1024 support confirmed on all cards) */
229ce6115a1SRudolf Cornelissen 	moi->hsrcstv &= 0x07fffffc;
23004e6b7ceSRudolf Cornelissen 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231ac83e70cSRudolf Cornelissen 
232ac83e70cSRudolf Cornelissen 
233ac83e70cSRudolf Cornelissen 	/*******************************
234ac83e70cSRudolf Cornelissen 	 *** setup vertical clipping ***
235ac83e70cSRudolf Cornelissen 	 *******************************/
236ac83e70cSRudolf Cornelissen 
237ac83e70cSRudolf Cornelissen 	/* calculate inputbitmap origin adress */
238*dc234e79SAdrien Destugues 	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
239*dc234e79SAdrien Destugues 	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
240cfab0cc0SRudolf Cornelissen 	LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241ac83e70cSRudolf Cornelissen 
242ac83e70cSRudolf Cornelissen 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
243ac83e70cSRudolf Cornelissen 	/* Note:
244ac83e70cSRudolf Cornelissen 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
245ac83e70cSRudolf Cornelissen 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246ac83e70cSRudolf Cornelissen 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247ac83e70cSRudolf Cornelissen 	/* Note also:
248ac83e70cSRudolf Cornelissen 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249ac83e70cSRudolf Cornelissen 
25004e6b7ceSRudolf Cornelissen 	moi->v1srcstv = 0;
251ac83e70cSRudolf Cornelissen 	/* check for destination vertical clipping at top side */
252ac83e70cSRudolf Cornelissen 	if (si->overlay.ow.v_start < crtc_vstart)
253ac83e70cSRudolf Cornelissen 	{
254ac83e70cSRudolf Cornelissen 		/* check if entire destination picture is clipping at top:
255ac83e70cSRudolf Cornelissen 		 * (2 pixels will be clamped onscreen at least) */
256ac83e70cSRudolf Cornelissen 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257ac83e70cSRudolf Cornelissen 		{
258ac83e70cSRudolf Cornelissen 			/* increase 'number of clipping pixels' with 'fixed value':
259ac83e70cSRudolf Cornelissen 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
26004e6b7ceSRudolf Cornelissen 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261ac83e70cSRudolf Cornelissen 			/* on pre-NV10 we need to do clipping in the source
262ac83e70cSRudolf Cornelissen 			 * bitmap because no seperate clipping registers exist... */
263ac83e70cSRudolf Cornelissen 			if (si->ps.card_arch < NV10A)
26404e6b7ceSRudolf Cornelissen 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265ac83e70cSRudolf Cornelissen 		}
266ac83e70cSRudolf Cornelissen 		else
267ac83e70cSRudolf Cornelissen 		{
268ac83e70cSRudolf Cornelissen 			/* increase 'first contributing pixel' with:
269ac83e70cSRudolf Cornelissen 			 * number of destination picture clipping pixels * inverse scaling factor */
27004e6b7ceSRudolf Cornelissen 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271ac83e70cSRudolf Cornelissen 			/* on pre-NV10 we need to do clipping in the source
272ac83e70cSRudolf Cornelissen 			 * bitmap because no seperate clipping registers exist... */
273ac83e70cSRudolf Cornelissen 			if (si->ps.card_arch < NV10A)
27404e6b7ceSRudolf Cornelissen 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275ac83e70cSRudolf Cornelissen 		}
276ac83e70cSRudolf Cornelissen 		LOG(4,("Overlay: clipping at top...\n"));
277ac83e70cSRudolf Cornelissen 	}
278ac83e70cSRudolf Cornelissen 	/* take zoom into account */
27904e6b7ceSRudolf Cornelissen 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280ac83e70cSRudolf Cornelissen 	if (si->ps.card_arch < NV10A)
281ac83e70cSRudolf Cornelissen 	{
28204e6b7ceSRudolf Cornelissen 		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
28304e6b7ceSRudolf Cornelissen 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284ac83e70cSRudolf Cornelissen 	}
28504e6b7ceSRudolf Cornelissen 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286ac83e70cSRudolf Cornelissen 
287ac83e70cSRudolf Cornelissen 	/* AND below is probably required by hardware. */
288ac83e70cSRudolf Cornelissen 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
28904e6b7ceSRudolf Cornelissen 	moi->a1orgv &= 0xfffffff0;
29004e6b7ceSRudolf Cornelissen }
291ac83e70cSRudolf Cornelissen 
nv_bes_program_move_overlay(move_overlay_info moi)29204e6b7ceSRudolf Cornelissen static void nv_bes_program_move_overlay(move_overlay_info moi)
29304e6b7ceSRudolf Cornelissen {
294ac83e70cSRudolf Cornelissen 	/*************************************
295ac83e70cSRudolf Cornelissen 	 *** sync to BES (Back End Scaler) ***
296ac83e70cSRudolf Cornelissen 	 *************************************/
297ac83e70cSRudolf Cornelissen 
298ac83e70cSRudolf Cornelissen 	/* Done in card hardware:
299ac83e70cSRudolf Cornelissen 	 * double buffered registers + trigger if programming complete feature. */
300ac83e70cSRudolf Cornelissen 
301ac83e70cSRudolf Cornelissen 
302ac83e70cSRudolf Cornelissen 	/**************************************
303ac83e70cSRudolf Cornelissen 	 *** actually program the registers ***
304ac83e70cSRudolf Cornelissen 	 **************************************/
305ac83e70cSRudolf Cornelissen 
306ac83e70cSRudolf Cornelissen 	if (si->ps.card_arch < NV10A)
307ac83e70cSRudolf Cornelissen 	{
308ac83e70cSRudolf Cornelissen 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309ac83e70cSRudolf Cornelissen 		BESW(NV04_OE_STATE, 0x00000000);
310ac83e70cSRudolf Cornelissen 		/* select buffer 0 as active (b16) */
311ac83e70cSRudolf Cornelissen 		BESW(NV04_SU_STATE, 0x00000000);
312ac83e70cSRudolf Cornelissen 		/* unknown (no effect?) */
313ac83e70cSRudolf Cornelissen 		BESW(NV04_RM_STATE, 0x00000000);
314ac83e70cSRudolf Cornelissen 		/* setup clipped(!) buffer startadress in RAM */
315ac83e70cSRudolf Cornelissen 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316ac83e70cSRudolf Cornelissen 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317ac83e70cSRudolf Cornelissen 		/* (program both buffers to prevent sync distortions) */
318ac83e70cSRudolf Cornelissen 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
31904e6b7ceSRudolf Cornelissen 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320ac83e70cSRudolf Cornelissen 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
32104e6b7ceSRudolf Cornelissen 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
32204e6b7ceSRudolf Cornelissen 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323ac83e70cSRudolf Cornelissen 		/* setup output window position */
32404e6b7ceSRudolf Cornelissen 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325ac83e70cSRudolf Cornelissen 		/* setup output window size */
326ac83e70cSRudolf Cornelissen 		BESW(NV04_DSTSIZE, (
32704e6b7ceSRudolf Cornelissen 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
32804e6b7ceSRudolf Cornelissen 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329ac83e70cSRudolf Cornelissen 			));
330ac83e70cSRudolf Cornelissen 		/* select buffer 1 as active (b16) */
331ac83e70cSRudolf Cornelissen 		BESW(NV04_SU_STATE, 0x00010000);
332ac83e70cSRudolf Cornelissen 	}
333ac83e70cSRudolf Cornelissen 	else
334ac83e70cSRudolf Cornelissen 	{
335ac83e70cSRudolf Cornelissen 		/* >= NV10A */
336ac83e70cSRudolf Cornelissen 
337ac83e70cSRudolf Cornelissen 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
33804e6b7ceSRudolf Cornelissen 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339ac83e70cSRudolf Cornelissen 		/* setup output window position */
34004e6b7ceSRudolf Cornelissen 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341ac83e70cSRudolf Cornelissen 		/* setup output window size */
342ac83e70cSRudolf Cornelissen 		BESW(NV10_0DSTSIZE, (
34304e6b7ceSRudolf Cornelissen 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
34404e6b7ceSRudolf Cornelissen 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345ac83e70cSRudolf Cornelissen 			));
346ac83e70cSRudolf Cornelissen 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347ac83e70cSRudolf Cornelissen 		/* This also triggers activation of programmed values (double buffered registers feature) */
348ac83e70cSRudolf Cornelissen 		BESW(NV10_BUFSEL, 0x00000001);
349ac83e70cSRudolf Cornelissen 	}
350ac83e70cSRudolf Cornelissen }
35108705d96Sshatty 
nv_bes_to_crtc(bool crtc)35230f76422SRudolf Cornelissen status_t nv_bes_to_crtc(bool crtc)
353cc6f5542SRudolf Cornelissen {
354cc6f5542SRudolf Cornelissen 	if (si->ps.secondary_head)
355cc6f5542SRudolf Cornelissen 	{
356cc6f5542SRudolf Cornelissen 		if (crtc)
357cc6f5542SRudolf Cornelissen 		{
358cc6f5542SRudolf Cornelissen 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359cc6f5542SRudolf Cornelissen 			/* switch overlay engine to CRTC2 */
360cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
36230f76422SRudolf Cornelissen 			si->overlay.crtc = !si->crtc_switch_mode;
363cc6f5542SRudolf Cornelissen 		}
364cc6f5542SRudolf Cornelissen 		else
365cc6f5542SRudolf Cornelissen 		{
366cc6f5542SRudolf Cornelissen 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367cc6f5542SRudolf Cornelissen 			/* switch overlay engine to CRTC1 */
368cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369cc6f5542SRudolf Cornelissen 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
37030f76422SRudolf Cornelissen 			si->overlay.crtc = si->crtc_switch_mode;
371cc6f5542SRudolf Cornelissen 		}
372cc6f5542SRudolf Cornelissen 		return B_OK;
373cc6f5542SRudolf Cornelissen 	}
374cc6f5542SRudolf Cornelissen 	else
375cc6f5542SRudolf Cornelissen 	{
376cc6f5542SRudolf Cornelissen 		return B_ERROR;
377cc6f5542SRudolf Cornelissen 	}
378cc6f5542SRudolf Cornelissen }
379cc6f5542SRudolf Cornelissen 
nv_bes_init()380b4c44701Sshatty status_t nv_bes_init()
381b4c44701Sshatty {
382aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
383aa1e552fSshatty 	{
384aa1e552fSshatty 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385aa1e552fSshatty 		BESW(NV04_INTE, 0x00000000);
386aa1e552fSshatty 
387aa1e552fSshatty 		/* setup saturation to be 'neutral' */
388aa1e552fSshatty 		BESW(NV04_SAT, 0x00000000);
389aa1e552fSshatty 		/* setup RGB brightness to be 'neutral' */
390aa1e552fSshatty 		BESW(NV04_RED_AMP, 0x00000069);
391aa1e552fSshatty 		BESW(NV04_GRN_AMP, 0x0000003e);
392aa1e552fSshatty 		BESW(NV04_BLU_AMP, 0x00000089);
393aa1e552fSshatty 
394aa1e552fSshatty 		/* setup fifo for fetching data */
395aa1e552fSshatty 		BESW(NV04_FIFOBURL, 0x00000003);
396aa1e552fSshatty 		BESW(NV04_FIFOTHRS, 0x00000038);
397aa1e552fSshatty 
398aa1e552fSshatty 		/* unknown, but needed (registers only have b0 implemented) */
399aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
400aa1e552fSshatty 		BESW(NV04_0OFFSET, 0x00000000);
401aa1e552fSshatty 		BESW(NV04_1OFFSET, 0x00000000);
402aa1e552fSshatty 	}
403aa1e552fSshatty 	else
404aa1e552fSshatty 	{
405aa1e552fSshatty 		/* >= NV10A */
406aa1e552fSshatty 
407b4c44701Sshatty 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408b4c44701Sshatty 		BESW(NV10_INTE, 0x00000000);
409b4c44701Sshatty 		/* shut off GeForce4MX MPEG2 decoder */
410b4c44701Sshatty 		BESW(DEC_GENCTRL, 0x00000000);
411b4c44701Sshatty 		/* setup BES memory-range mask */
412a7b24bbaSRudolf Cornelissen 		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413b4c44701Sshatty 		/* unknown, but needed */
414aa1e552fSshatty 		BESW(NV10_0OFFSET, 0x00000000);
415b4c44701Sshatty 
416b4c44701Sshatty 		/* setup brightness, contrast and saturation to be 'neutral' */
417b4c44701Sshatty 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418b4c44701Sshatty 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419aa1e552fSshatty 	}
420b4c44701Sshatty 
4213d83865eSRudolf Cornelissen 	/* make sure the engine is disabled. */
4223d83865eSRudolf Cornelissen 	nv_release_bes();
4233d83865eSRudolf Cornelissen 
424b4c44701Sshatty 	return B_OK;
425b4c44701Sshatty }
426b4c44701Sshatty 
nv_configure_bes(const overlay_buffer * ob,const overlay_window * ow,const overlay_view * ov,int offset)42708705d96Sshatty status_t nv_configure_bes
42808705d96Sshatty 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
42908705d96Sshatty {
43008705d96Sshatty 	/* yuy2 (4:2:2) colorspace calculations */
43108705d96Sshatty 
43208705d96Sshatty 	/* Note:
43308705d96Sshatty 	 * in BeOS R5.0.3 and DANO:
43408705d96Sshatty 	 * 'ow->offset_xxx' is always 0, so not used;
43508705d96Sshatty 	 * 'ow->width' and 'ow->height' are the output window size: does not change
43608705d96Sshatty 	 * if window is clipping;
43708705d96Sshatty 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
43808705d96Sshatty 	 * window. These values can be negative: this means the window is clipping
43908705d96Sshatty 	 * at the left or the top of the display, respectively. */
44008705d96Sshatty 
44108705d96Sshatty 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
44208705d96Sshatty 	 * displayed on screen. This is used for the 'hardware zoom' function. */
44308705d96Sshatty 
44404e6b7ceSRudolf Cornelissen 	/* output window position and clipping info for source buffer */
44504e6b7ceSRudolf Cornelissen 	move_overlay_info moi;
44608705d96Sshatty 	/* calculated BES register values */
44704e6b7ceSRudolf Cornelissen 	uint32 	hiscalv, viscalv;
44808705d96Sshatty 	/* interval representation, used for scaling calculations */
44904e6b7ceSRudolf Cornelissen 	uint16 intrep;
45008705d96Sshatty 	/* inverse scaling factor, used for source positioning */
45108705d96Sshatty 	uint32 ifactor;
45208705d96Sshatty 	/* copy of overlay view which has checked valid values */
45308705d96Sshatty 	overlay_view my_ov;
45408705d96Sshatty 
45508705d96Sshatty 
45608705d96Sshatty 	/**************************************************************************************
45708705d96Sshatty 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
45808705d96Sshatty 	 **************************************************************************************/
45908705d96Sshatty 	my_ov = *ov;
46008705d96Sshatty 	/* check for valid 'coordinates' */
46108705d96Sshatty 	if (my_ov.width == 0) my_ov.width++;
46208705d96Sshatty 	if (my_ov.height == 0) my_ov.height++;
46308705d96Sshatty 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
46408705d96Sshatty 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
46508705d96Sshatty 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
46608705d96Sshatty 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
46708705d96Sshatty 	if (my_ov.v_start > (ob->height - 1))
46808705d96Sshatty 		my_ov.v_start = (ob->height - 1);
46908705d96Sshatty 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
47008705d96Sshatty 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
47108705d96Sshatty 
4725f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
47308705d96Sshatty 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
47408705d96Sshatty 
47504e6b7ceSRudolf Cornelissen 	/* save for nv_bes_calc_move_overlay() */
476ac83e70cSRudolf Cornelissen 	si->overlay.ow = *ow;
477ac83e70cSRudolf Cornelissen 	si->overlay.ob = *ob;
478ac83e70cSRudolf Cornelissen 	si->overlay.my_ov = my_ov;
479ac83e70cSRudolf Cornelissen 
480cc6f5542SRudolf Cornelissen 
48104e6b7ceSRudolf Cornelissen 	/********************************
48204e6b7ceSRudolf Cornelissen 	 *** setup horizontal scaling ***
48304e6b7ceSRudolf Cornelissen 	 ********************************/
4845f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
48508705d96Sshatty 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
4865f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
48708705d96Sshatty 
48808705d96Sshatty 	/* determine interval representation value, taking zoom into account */
48908705d96Sshatty 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
49008705d96Sshatty 	{
49108705d96Sshatty 		/* horizontal filtering is ON */
49208705d96Sshatty 		if ((my_ov.width == ow->width) | (ow->width < 2))
49308705d96Sshatty 		{
49408705d96Sshatty 			/* no horizontal scaling used, OR destination width < 2 */
49508705d96Sshatty 			intrep = 0;
49608705d96Sshatty 		}
49708705d96Sshatty 		else
49808705d96Sshatty 		{
49908705d96Sshatty 			intrep = 1;
50008705d96Sshatty 		}
50108705d96Sshatty 	}
50208705d96Sshatty 	else
50308705d96Sshatty 	{
50408705d96Sshatty 		/* horizontal filtering is OFF */
50508705d96Sshatty 		if ((ow->width < my_ov.width) & (ow->width >= 2))
50608705d96Sshatty 		{
50708705d96Sshatty 			/* horizontal downscaling used AND destination width >= 2 */
50808705d96Sshatty 			intrep = 1;
50908705d96Sshatty 		}
51008705d96Sshatty 		else
51108705d96Sshatty 		{
51208705d96Sshatty 			intrep = 0;
51308705d96Sshatty 		}
51408705d96Sshatty 	}
51508705d96Sshatty 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
51608705d96Sshatty 
51708705d96Sshatty 	/* calculate inverse horizontal scaling factor, taking zoom into account */
51808705d96Sshatty 	/* standard scaling formula: */
51908705d96Sshatty 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
52008705d96Sshatty 
52108705d96Sshatty 	/* correct factor to prevent most-right visible 'line' from distorting */
52208705d96Sshatty 	ifactor -= (1 << 2);
52305b269c0Sshatty 	hiscalv = ifactor;
52404e6b7ceSRudolf Cornelissen 	/* save for nv_bes_calc_move_overlay() */
525ac83e70cSRudolf Cornelissen 	si->overlay.h_ifactor = ifactor;
52608705d96Sshatty 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
52708705d96Sshatty 
52808705d96Sshatty 	/* check scaling factor (and modify if needed) to be within scaling limits */
5295a84c3b1SRudolf Cornelissen 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
530aa1e552fSshatty 	if (hiscalv < 0x00002000)
53108705d96Sshatty 	{
53208705d96Sshatty 		/* (non-inverse) factor too large, set factor to max. valid value */
533aa1e552fSshatty 		hiscalv = 0x00002000;
53408705d96Sshatty 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
53508705d96Sshatty 	}
536887d4abbSshatty 	switch (si->ps.card_arch)
537887d4abbSshatty 	{
538aa1e552fSshatty 	case NV04A:
539aa1e552fSshatty 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
540aa1e552fSshatty 		 * (16bit register with 0.11 format value) */
541aa1e552fSshatty 		if (hiscalv > 0x0000ffff)
542aa1e552fSshatty 		{
543aa1e552fSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
544aa1e552fSshatty 			hiscalv = 0x0000ffff;
545aa1e552fSshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
546aa1e552fSshatty 		}
547aa1e552fSshatty 		break;
548887d4abbSshatty 	case NV30A:
54917cf1498SRudolf Cornelissen 	case NV40A:
55017cf1498SRudolf Cornelissen 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
551e8d5d47cSRudolf Cornelissen 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
552887d4abbSshatty 		{
553887d4abbSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
554887d4abbSshatty 			hiscalv = (2 << 16);
555887d4abbSshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
556887d4abbSshatty 		}
557e8d5d47cSRudolf Cornelissen 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
558e8d5d47cSRudolf Cornelissen 		 * So let it fall through... */
559e8d5d47cSRudolf Cornelissen 		if (si->ps.card_type != NV31) break;
560887d4abbSshatty 	default:
561aa1e552fSshatty 		/* the rest has a downscaling limit of 0.125 */
56205b269c0Sshatty 		if (hiscalv > (8 << 16))
56308705d96Sshatty 		{
56408705d96Sshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
56505b269c0Sshatty 			hiscalv = (8 << 16);
56608705d96Sshatty 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
56708705d96Sshatty 		}
568887d4abbSshatty 		break;
569887d4abbSshatty 	}
57008705d96Sshatty 	/* AND below is required by hardware */
57108705d96Sshatty 	hiscalv &= 0x001ffffc;
57208705d96Sshatty 
57308705d96Sshatty 
57404e6b7ceSRudolf Cornelissen 	/******************************
57504e6b7ceSRudolf Cornelissen 	 *** setup vertical scaling ***
57604e6b7ceSRudolf Cornelissen 	 ******************************/
57708705d96Sshatty 
57808705d96Sshatty 	/* determine interval representation value, taking zoom into account */
57908705d96Sshatty 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
58008705d96Sshatty 	{
58108705d96Sshatty 		/* vertical filtering is ON */
58208705d96Sshatty 		if ((my_ov.height == ow->height) | (ow->height < 2))
58308705d96Sshatty 		{
58408705d96Sshatty 			/* no vertical scaling used, OR destination height < 2 */
58508705d96Sshatty 			intrep = 0;
58608705d96Sshatty 		}
58708705d96Sshatty 		else
58808705d96Sshatty 		{
58908705d96Sshatty 			intrep = 1;
59008705d96Sshatty 		}
59108705d96Sshatty 	}
59208705d96Sshatty 	else
59308705d96Sshatty 	{
59408705d96Sshatty 		/* vertical filtering is OFF */
59508705d96Sshatty 		if ((ow->height < my_ov.height) & (ow->height >= 2))
59608705d96Sshatty 		{
59708705d96Sshatty 			/* vertical downscaling used AND destination height >= 2 */
59808705d96Sshatty 			intrep = 1;
59908705d96Sshatty 		}
60008705d96Sshatty 		else
60108705d96Sshatty 		{
60208705d96Sshatty 			intrep = 0;
60308705d96Sshatty 		}
60408705d96Sshatty 	}
60508705d96Sshatty 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
60608705d96Sshatty 
60708705d96Sshatty 	/* calculate inverse vertical scaling factor, taking zoom into account */
60808705d96Sshatty 	/* standard scaling formula: */
60908705d96Sshatty 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
61008705d96Sshatty 
61108705d96Sshatty 	/* correct factor to prevent lowest visible line from distorting */
61208705d96Sshatty 	ifactor -= (1 << 2);
61308705d96Sshatty 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
61408705d96Sshatty 
61508705d96Sshatty 	/* preserve ifactor for source positioning calculations later on */
61608705d96Sshatty 	viscalv = ifactor;
61704e6b7ceSRudolf Cornelissen 	/* save for nv_bes_calc_move_overlay() */
618ac83e70cSRudolf Cornelissen 	si->overlay.v_ifactor = ifactor;
61908705d96Sshatty 
62008705d96Sshatty 	/* check scaling factor (and modify if needed) to be within scaling limits */
6215a84c3b1SRudolf Cornelissen 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
622aa1e552fSshatty 	if (viscalv < 0x00002000)
62308705d96Sshatty 	{
62408705d96Sshatty 		/* (non-inverse) factor too large, set factor to max. valid value */
625aa1e552fSshatty 		viscalv = 0x00002000;
62608705d96Sshatty 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
62708705d96Sshatty 	}
628887d4abbSshatty 	switch (si->ps.card_arch)
629887d4abbSshatty 	{
630aa1e552fSshatty 	case NV04A:
631aa1e552fSshatty 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
632aa1e552fSshatty 		 * (16bit register with 0.11 format value) */
633aa1e552fSshatty 		if (viscalv > 0x0000ffff)
634aa1e552fSshatty 		{
635aa1e552fSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
636aa1e552fSshatty 			viscalv = 0x0000ffff;
637aa1e552fSshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
638aa1e552fSshatty 		}
639aa1e552fSshatty 		break;
640887d4abbSshatty 	case NV30A:
64117cf1498SRudolf Cornelissen 	case NV40A:
64217cf1498SRudolf Cornelissen 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
643e8d5d47cSRudolf Cornelissen 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
644887d4abbSshatty 		{
645887d4abbSshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
646887d4abbSshatty 			viscalv = (2 << 16);
647887d4abbSshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
648887d4abbSshatty 		}
649e8d5d47cSRudolf Cornelissen 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
650e8d5d47cSRudolf Cornelissen 		 * So let it fall through... */
651e8d5d47cSRudolf Cornelissen 		if (si->ps.card_type != NV31) break;
652887d4abbSshatty 	default:
653aa1e552fSshatty 		/* the rest has a downscaling limit of 0.125 */
65405b269c0Sshatty 		if (viscalv > (8 << 16))
65508705d96Sshatty 		{
65608705d96Sshatty 			/* (non-inverse) factor too small, set factor to min. valid value */
65705b269c0Sshatty 			viscalv = (8 << 16);
65808705d96Sshatty 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
65908705d96Sshatty 		}
660887d4abbSshatty 		break;
661887d4abbSshatty 	}
66208705d96Sshatty 	/* AND below is required by hardware */
66308705d96Sshatty 	viscalv &= 0x001ffffc;
66408705d96Sshatty 
66508705d96Sshatty 
66604e6b7ceSRudolf Cornelissen 	/********************************************************************************
66704e6b7ceSRudolf Cornelissen 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
66804e6b7ceSRudolf Cornelissen 	 ********************************************************************************/
66904e6b7ceSRudolf Cornelissen 	nv_bes_calc_move_overlay(&moi);
67008705d96Sshatty 
67108705d96Sshatty 
67208705d96Sshatty 	/*****************************
67308705d96Sshatty 	 *** log color keying info ***
67408705d96Sshatty 	 *****************************/
67508705d96Sshatty 
6765f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
67708705d96Sshatty 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
6785f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
67908705d96Sshatty 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
68008705d96Sshatty 
68108705d96Sshatty 
68205b269c0Sshatty 	/*****************
68305b269c0Sshatty 	 *** log flags ***
68405b269c0Sshatty 	 *****************/
68508705d96Sshatty 
6865f1edbfbSRudolf Cornelissen 	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
68705b269c0Sshatty 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
68808705d96Sshatty 
68908705d96Sshatty 
69008705d96Sshatty 	/*************************************
69108705d96Sshatty 	 *** sync to BES (Back End Scaler) ***
69208705d96Sshatty 	 *************************************/
69308705d96Sshatty 
694b4c44701Sshatty 	/* Done in card hardware:
695b4c44701Sshatty 	 * double buffered registers + trigger if programming complete feature. */
69608705d96Sshatty 
69708705d96Sshatty 
69808705d96Sshatty 	/**************************************
69908705d96Sshatty 	 *** actually program the registers ***
70008705d96Sshatty 	 **************************************/
70108705d96Sshatty 
702aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
703aa1e552fSshatty 	{
704aa1e552fSshatty 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
705aa1e552fSshatty 		BESW(NV04_OE_STATE, 0x00000000);
706aa1e552fSshatty 		/* select buffer 0 as active (b16) */
707aa1e552fSshatty 		BESW(NV04_SU_STATE, 0x00000000);
708aa1e552fSshatty 		/* unknown (no effect?) */
709aa1e552fSshatty 		BESW(NV04_RM_STATE, 0x00000000);
710aa1e552fSshatty 		/* setup clipped(!) buffer startadress in RAM */
711aa1e552fSshatty 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
712aa1e552fSshatty 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
713aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
714aa1e552fSshatty 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
71504e6b7ceSRudolf Cornelissen 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
716aa1e552fSshatty 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
71704e6b7ceSRudolf Cornelissen 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
71804e6b7ceSRudolf Cornelissen 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
719aa1e552fSshatty 		/* setup buffer source pitch including slopspace (in bytes).
720aa1e552fSshatty 		 * Note:
721aa1e552fSshatty 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
722aa1e552fSshatty 		/* (program both buffers to prevent sync distortions) */
723aa1e552fSshatty 		BESW(NV04_0SRCPTCH, (ob->width * 2));
724aa1e552fSshatty 		BESW(NV04_1SRCPTCH, (ob->width * 2));
725aa1e552fSshatty 		/* setup output window position */
72604e6b7ceSRudolf Cornelissen 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
727aa1e552fSshatty 		/* setup output window size */
728aa1e552fSshatty 		BESW(NV04_DSTSIZE, (
72904e6b7ceSRudolf Cornelissen 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
73004e6b7ceSRudolf Cornelissen 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
731aa1e552fSshatty 			));
732aa1e552fSshatty 		/* setup horizontal and vertical scaling */
733aa1e552fSshatty 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
734aa1e552fSshatty 		/* enable vertical filtering (b0) */
735aa1e552fSshatty 		BESW(NV04_CTRL_V, 0x00000001);
736aa1e552fSshatty 		/* enable horizontal filtering (no effect?) */
737aa1e552fSshatty 		BESW(NV04_CTRL_H, 0x00000111);
738d040001cSRudolf Cornelissen 		/* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */
739d040001cSRudolf Cornelissen 		if (ow->flags & B_OVERLAY_COLOR_KEY)
740aa1e552fSshatty 			BESW(NV04_GENCTRL, 0x00000111);
741d040001cSRudolf Cornelissen 		else
742d040001cSRudolf Cornelissen 			BESW(NV04_GENCTRL, 0x00000101);
743aa1e552fSshatty 		/* select buffer 1 as active (b16) */
744aa1e552fSshatty 		BESW(NV04_SU_STATE, 0x00010000);
745aa1e552fSshatty 
746aa1e552fSshatty 		/**************************
747aa1e552fSshatty 		 *** setup color keying ***
748aa1e552fSshatty 		 **************************/
749aa1e552fSshatty 
750aa1e552fSshatty 		/* setup colorkeying */
751aa1e552fSshatty 		switch(si->dm.space)
752aa1e552fSshatty 		{
753aa1e552fSshatty 		case B_RGB15_LITTLE:
754aa1e552fSshatty 			BESW(NV04_COLKEY, (
755aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
756aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 5) |
757aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 10)    |
758aa1e552fSshatty 				((ow->alpha.value & ow->alpha.mask) << 15)
759aa1e552fSshatty 				));
760aa1e552fSshatty 			break;
761aa1e552fSshatty 		case B_RGB16_LITTLE:
762aa1e552fSshatty 			BESW(NV04_COLKEY, (
763aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
764aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 5) |
765aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 11)
766aa1e552fSshatty 				/* this space has no alpha bits */
767aa1e552fSshatty 				));
768aa1e552fSshatty 			break;
769aa1e552fSshatty 		case B_CMAP8:
770aa1e552fSshatty 		case B_RGB32_LITTLE:
771aa1e552fSshatty 		default:
772aa1e552fSshatty 			BESW(NV04_COLKEY, (
773aa1e552fSshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
774aa1e552fSshatty 				((ow->green.value & ow->green.mask) << 8) |
775aa1e552fSshatty 				((ow->red.value & ow->red.mask) << 16)    |
776aa1e552fSshatty 				((ow->alpha.value & ow->alpha.mask) << 24)
777aa1e552fSshatty 				));
778aa1e552fSshatty 			break;
779aa1e552fSshatty 		}
780aa1e552fSshatty 	}
781aa1e552fSshatty 	else
782aa1e552fSshatty 	{
783aa1e552fSshatty 		/* >= NV10A */
784aa1e552fSshatty 
78505b269c0Sshatty 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
78604e6b7ceSRudolf Cornelissen 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
78705b269c0Sshatty 		/* setup buffersize */
788aa1e552fSshatty 		//fixme if needed: width must be even officially...
78905b269c0Sshatty 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
79005b269c0Sshatty 		/* setup source pitch including slopspace (in bytes),
791d040001cSRudolf Cornelissen 		 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */
79205b269c0Sshatty 		/* Note:
79305b269c0Sshatty 		 * source pitch granularity = 32 pixels on GeForce cards!! */
794d040001cSRudolf Cornelissen 		if (ow->flags & B_OVERLAY_COLOR_KEY)
79505b269c0Sshatty 			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
796d040001cSRudolf Cornelissen 		else
797d040001cSRudolf Cornelissen 			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24)));
79805b269c0Sshatty 		/* setup output window position */
79904e6b7ceSRudolf Cornelissen 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
80005b269c0Sshatty 		/* setup output window size */
80105b269c0Sshatty 		BESW(NV10_0DSTSIZE, (
80204e6b7ceSRudolf Cornelissen 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
80304e6b7ceSRudolf Cornelissen 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
80405b269c0Sshatty 			));
80505b269c0Sshatty 		/* setup horizontal scaling */
80605b269c0Sshatty 		BESW(NV10_0ISCALH, (hiscalv << 4));
80705b269c0Sshatty 		/* setup vertical scaling */
80805b269c0Sshatty 		BESW(NV10_0ISCALV, (viscalv << 4));
80905b269c0Sshatty 		/* setup (unclipped!) buffer startadress in RAM */
81004e6b7ceSRudolf Cornelissen 		BESW(NV10_0BUFADR, moi.a1orgv);
81105b269c0Sshatty 		/* enable BES (b0 = 0) */
81205b269c0Sshatty 		BESW(NV10_GENCTRL, 0x00000000);
813aa1e552fSshatty 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
814aa1e552fSshatty 		/* This also triggers activation of programmed values (double buffered registers feature) */
815aa1e552fSshatty 		BESW(NV10_BUFSEL, 0x00000001);
81608705d96Sshatty 
81708705d96Sshatty 		/**************************
81808705d96Sshatty 		 *** setup color keying ***
81908705d96Sshatty 		 **************************/
82008705d96Sshatty 
82108705d96Sshatty 		/* setup colorkeying */
82205b269c0Sshatty 		switch(si->dm.space)
82305b269c0Sshatty 		{
82405b269c0Sshatty 		case B_RGB15_LITTLE:
82505b269c0Sshatty 			BESW(NV10_COLKEY, (
82605b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
82705b269c0Sshatty 				((ow->green.value & ow->green.mask) << 5) |
82805b269c0Sshatty 				((ow->red.value & ow->red.mask) << 10)    |
82905b269c0Sshatty 				((ow->alpha.value & ow->alpha.mask) << 15)
83005b269c0Sshatty 				));
83105b269c0Sshatty 			break;
83205b269c0Sshatty 		case B_RGB16_LITTLE:
83305b269c0Sshatty 			BESW(NV10_COLKEY, (
83405b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
83505b269c0Sshatty 				((ow->green.value & ow->green.mask) << 5) |
83605b269c0Sshatty 				((ow->red.value & ow->red.mask) << 11)
83705b269c0Sshatty 				/* this space has no alpha bits */
83805b269c0Sshatty 				));
83905b269c0Sshatty 			break;
84005b269c0Sshatty 		case B_CMAP8:
84105b269c0Sshatty 		case B_RGB32_LITTLE:
84205b269c0Sshatty 		default:
84305b269c0Sshatty 			BESW(NV10_COLKEY, (
84405b269c0Sshatty 				((ow->blue.value & ow->blue.mask) << 0)   |
84505b269c0Sshatty 				((ow->green.value & ow->green.mask) << 8) |
84605b269c0Sshatty 				((ow->red.value & ow->red.mask) << 16)    |
84705b269c0Sshatty 				((ow->alpha.value & ow->alpha.mask) << 24)
84805b269c0Sshatty 				));
84905b269c0Sshatty 			break;
85005b269c0Sshatty 		}
851aa1e552fSshatty 	}
85208705d96Sshatty 
853ac83e70cSRudolf Cornelissen 	/* note that overlay is in use (for nv_bes_move_overlay()) */
854ac83e70cSRudolf Cornelissen 	si->overlay.active = true;
855ac83e70cSRudolf Cornelissen 
85608705d96Sshatty 	return B_OK;
85708705d96Sshatty }
85808705d96Sshatty 
nv_release_bes()85908705d96Sshatty status_t nv_release_bes()
86008705d96Sshatty {
861aa1e552fSshatty 	if (si->ps.card_arch < NV10A)
862aa1e552fSshatty 	{
863aa1e552fSshatty 		/* setup BES control: disable scaler (b0 = 0) */
864aa1e552fSshatty 		BESW(NV04_GENCTRL, 0x00000000);
865aa1e552fSshatty 	}
866aa1e552fSshatty 	else
867aa1e552fSshatty 	{
86805b269c0Sshatty 		/* setup BES control: disable scaler (b0 = 1) */
86905b269c0Sshatty 		BESW(NV10_GENCTRL, 0x00000001);
870aa1e552fSshatty 	}
87108705d96Sshatty 
872ac83e70cSRudolf Cornelissen 	/* note that overlay is not in use (for nv_bes_move_overlay()) */
873ac83e70cSRudolf Cornelissen 	si->overlay.active = false;
874ac83e70cSRudolf Cornelissen 
87508705d96Sshatty 	return B_OK;
87608705d96Sshatty }
877