xref: /haiku/src/add-ons/accelerants/skeleton/engine/bes.c (revision 95bac3fda53a4cb21880712d7b43f8c21db32a2e)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-9/2004 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16 	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
17 };
18 
19 static void eng_bes_calc_move_overlay(move_overlay_info *moi);
20 static void eng_bes_program_move_overlay(move_overlay_info moi);
21 
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24  * si->dm.h_display_start and si->dm.v_display_start determine where the new
25  * output window is located! */
26 void eng_bes_move_overlay()
27 {
28 	move_overlay_info moi;
29 
30 	/* abort if overlay is not active */
31 	if (!si->overlay.active) return;
32 
33 	eng_bes_calc_move_overlay(&moi);
34 	eng_bes_program_move_overlay(moi);
35 }
36 
37 static void eng_bes_calc_move_overlay(move_overlay_info *moi)
38 {
39 	/* misc used variables */
40 	uint16 temp1, temp2;
41 	/* visible screen window in virtual workspaces */
42 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43 
44 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45 	if (si->ps.secondary_head)
46 	{
47 		switch (si->dm.flags & DUALHEAD_BITS)
48 		{
49 		case DUALHEAD_ON:
50 		case DUALHEAD_SWITCH:
51 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 					(si->dm.h_display_start + si->dm.timing.h_display))
53 				eng_bes_to_crtc(si->crtc_switch_mode);
54 			else
55 				eng_bes_to_crtc(!si->crtc_switch_mode);
56 			break;
57 		default:
58 				eng_bes_to_crtc(si->crtc_switch_mode);
59 			break;
60 		}
61 	}
62 
63 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64 	 * constraints only */
65 	crtc_hstart = si->dm.h_display_start;
66 	/* make dualhead stretch and switch mode work while we're at it.. */
67 	if (si->overlay.crtc)
68 	{
69 		crtc_hstart += si->dm.timing.h_display;
70 	}
71 
72 	/* horizontal end is the first position beyond the displayed range on the CRTC */
73 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 	crtc_vstart = si->dm.v_display_start;
75 	/* vertical end is the first position beyond the displayed range on the CRTC */
76 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77 
78 
79 	/****************************************
80 	 *** setup all edges of output window ***
81 	 ****************************************/
82 
83 	/* setup left and right edges of output window */
84 	moi->hcoordv = 0;
85 	/* left edge coordinate of output window, must be inside desktop */
86 	/* clipping on the left side */
87 	if (si->overlay.ow.h_start < crtc_hstart)
88 	{
89 		temp1 = 0;
90 	}
91 	else
92 	{
93 		/* clipping on the right side */
94 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95 		{
96 			/* width < 2 is not allowed */
97 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98 		}
99 		else
100 		/* no clipping here */
101 		{
102 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103 		}
104 	}
105 	moi->hcoordv |= temp1 << 16;
106 	/* right edge coordinate of output window, must be inside desktop */
107 	/* width < 2 is not allowed */
108 	if (si->overlay.ow.width < 2)
109 	{
110 		temp2 = (temp1 + 1) & 0x7ff;
111 	}
112 	else
113 	{
114 		/* clipping on the right side */
115 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116 		{
117 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118 		}
119 		else
120 		{
121 			/* clipping on the left side */
122 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123 			{
124 				/* width < 2 is not allowed */
125 				temp2 = 1;
126 			}
127 			else
128 			/* no clipping here */
129 			{
130 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131 			}
132 		}
133 	}
134 	moi->hcoordv |= temp2 << 0;
135 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136 
137 	/* setup top and bottom edges of output window */
138 	moi->vcoordv = 0;
139 	/* top edge coordinate of output window, must be inside desktop */
140 	/* clipping on the top side */
141 	if (si->overlay.ow.v_start < crtc_vstart)
142 	{
143 		temp1 = 0;
144 	}
145 	else
146 	{
147 		/* clipping on the bottom side */
148 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149 		{
150 			/* height < 2 is not allowed */
151 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152 		}
153 		else
154 		/* no clipping here */
155 		{
156 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157 		}
158 	}
159 	moi->vcoordv |= temp1 << 16;
160 	/* bottom edge coordinate of output window, must be inside desktop */
161 	/* height < 2 is not allowed */
162 	if (si->overlay.ow.height < 2)
163 	{
164 		temp2 = (temp1 + 1) & 0x7ff;
165 	}
166 	else
167 	{
168 		/* clipping on the bottom side */
169 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170 		{
171 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172 		}
173 		else
174 		{
175 			/* clipping on the top side */
176 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177 			{
178 				/* height < 2 is not allowed */
179 				temp2 = 1;
180 			}
181 			else
182 			/* no clipping here */
183 			{
184 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185 			}
186 		}
187 	}
188 	moi->vcoordv |= temp2 << 0;
189 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190 
191 
192 	/*********************************
193 	 *** setup horizontal clipping ***
194 	 *********************************/
195 
196 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 	/* Note:
198 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 	/* Note also:
203 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 	moi->hsrcstv = 0;
205 	/* check for destination horizontal clipping at left side */
206 	if (si->overlay.ow.h_start < crtc_hstart)
207 	{
208 		/* check if entire destination picture is clipping left:
209 		 * (2 pixels will be clamped onscreen at least) */
210 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211 		{
212 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 			moi->hsrcstv += (si->overlay.ow.width - 2);
214 		}
215 		else
216 		{
217 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219 		}
220 		LOG(4,("Overlay: clipping left...\n"));
221 
222 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 		 * Note that this also already takes care of aligning the value to the BES register! */
224 		moi->hsrcstv *= si->overlay.h_ifactor;
225 	}
226 	/* take zoom into account */
227 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 	/* AND below required by hardware */
229 	moi->hsrcstv &= 0x03fffffc;
230 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231 
232 
233 	/*******************************
234 	 *** setup vertical clipping ***
235 	 *******************************/
236 
237 	/* calculate inputbitmap origin adress */
238 	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
239 	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
240 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241 
242 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
243 	/* Note:
244 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
245 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247 	/* Note also:
248 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249 
250 	moi->v1srcstv = 0;
251 	/* check for destination vertical clipping at top side */
252 	if (si->overlay.ow.v_start < crtc_vstart)
253 	{
254 		/* check if entire destination picture is clipping at top:
255 		 * (2 pixels will be clamped onscreen at least) */
256 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257 		{
258 			/* increase 'number of clipping pixels' with 'fixed value':
259 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261 			/* on pre-NV10 we need to do clipping in the source
262 			 * bitmap because no seperate clipping registers exist... */
263 			if (si->ps.card_arch < NV10A)
264 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265 		}
266 		else
267 		{
268 			/* increase 'first contributing pixel' with:
269 			 * number of destination picture clipping pixels * inverse scaling factor */
270 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271 			/* on pre-NV10 we need to do clipping in the source
272 			 * bitmap because no seperate clipping registers exist... */
273 			if (si->ps.card_arch < NV10A)
274 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275 		}
276 		LOG(4,("Overlay: clipping at top...\n"));
277 	}
278 	/* take zoom into account */
279 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280 	if (si->ps.card_arch < NV10A)
281 	{
282 		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284 	}
285 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286 
287 	/* AND below is probably required by hardware. */
288 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289 	moi->a1orgv &= 0xfffffff0;
290 }
291 
292 static void eng_bes_program_move_overlay(move_overlay_info moi)
293 {
294 	/*************************************
295 	 *** sync to BES (Back End Scaler) ***
296 	 *************************************/
297 
298 	/* Done in card hardware:
299 	 * double buffered registers + trigger if programming complete feature. */
300 
301 
302 	/**************************************
303 	 *** actually program the registers ***
304 	 **************************************/
305 
306 	if (si->ps.card_arch < NV10A)
307 	{
308 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309 		BESW(NV04_OE_STATE, 0x00000000);
310 		/* select buffer 0 as active (b16) */
311 		BESW(NV04_SU_STATE, 0x00000000);
312 		/* unknown (no effect?) */
313 		BESW(NV04_RM_STATE, 0x00000000);
314 		/* setup clipped(!) buffer startadress in RAM */
315 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 		/* (program both buffers to prevent sync distortions) */
318 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
319 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 		/* setup output window position */
324 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 		/* setup output window size */
326 		BESW(NV04_DSTSIZE, (
327 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329 			));
330 		/* select buffer 1 as active (b16) */
331 		BESW(NV04_SU_STATE, 0x00010000);
332 	}
333 	else
334 	{
335 		/* >= NV10A */
336 
337 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 		/* setup output window position */
340 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 		/* setup output window size */
342 		BESW(NV10_0DSTSIZE, (
343 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345 			));
346 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 		/* This also triggers activation of programmed values (double buffered registers feature) */
348 		BESW(NV10_BUFSEL, 0x00000001);
349 	}
350 }
351 
352 status_t eng_bes_to_crtc(bool crtc)
353 {
354 	if (si->ps.secondary_head)
355 	{
356 		if (crtc)
357 		{
358 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 			/* switch overlay engine to CRTC2 */
360 			ENG_RG32(RG32_FUNCSEL) &= ~0x00001000;
361 			ENG_RG32(RG32_2FUNCSEL) |= 0x00001000;
362 			si->overlay.crtc = !si->crtc_switch_mode;
363 		}
364 		else
365 		{
366 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 			/* switch overlay engine to CRTC1 */
368 			ENG_RG32(RG32_2FUNCSEL) &= ~0x00001000;
369 			ENG_RG32(RG32_FUNCSEL) |= 0x00001000;
370 			si->overlay.crtc = si->crtc_switch_mode;
371 		}
372 		return B_OK;
373 	}
374 	else
375 	{
376 		return B_ERROR;
377 	}
378 }
379 
380 status_t eng_bes_init()
381 {
382 	if (si->ps.card_arch < NV10A)
383 	{
384 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 		BESW(NV04_INTE, 0x00000000);
386 
387 		/* setup saturation to be 'neutral' */
388 		BESW(NV04_SAT, 0x00000000);
389 		/* setup RGB brightness to be 'neutral' */
390 		BESW(NV04_RED_AMP, 0x00000069);
391 		BESW(NV04_GRN_AMP, 0x0000003e);
392 		BESW(NV04_BLU_AMP, 0x00000089);
393 
394 		/* setup fifo for fetching data */
395 		BESW(NV04_FIFOBURL, 0x00000003);
396 		BESW(NV04_FIFOTHRS, 0x00000038);
397 
398 		/* unknown, but needed (registers only have b0 implemented) */
399 		/* (program both buffers to prevent sync distortions) */
400 		BESW(NV04_0OFFSET, 0x00000000);
401 		BESW(NV04_1OFFSET, 0x00000000);
402 	}
403 	else
404 	{
405 		/* >= NV10A */
406 
407 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 		BESW(NV10_INTE, 0x00000000);
409 		/* shut off GeForce4MX MPEG2 decoder */
410 		BESW(DEC_GENCTRL, 0x00000000);
411 		/* setup BES memory-range mask */
412 		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 		/* unknown, but needed */
414 		BESW(NV10_0OFFSET, 0x00000000);
415 
416 		/* setup brightness, contrast and saturation to be 'neutral' */
417 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419 	}
420 
421 	return B_OK;
422 }
423 
424 status_t eng_configure_bes
425 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
426 {
427 	/* yuy2 (4:2:2) colorspace calculations */
428 
429 	/* Note:
430 	 * in BeOS R5.0.3 and DANO:
431 	 * 'ow->offset_xxx' is always 0, so not used;
432 	 * 'ow->width' and 'ow->height' are the output window size: does not change
433 	 * if window is clipping;
434 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
435 	 * window. These values can be negative: this means the window is clipping
436 	 * at the left or the top of the display, respectively. */
437 
438 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
439 	 * displayed on screen. This is used for the 'hardware zoom' function. */
440 
441 	/* output window position and clipping info for source buffer */
442 	move_overlay_info moi;
443 	/* calculated BES register values */
444 	uint32 	hiscalv, viscalv;
445 	/* interval representation, used for scaling calculations */
446 	uint16 intrep;
447 	/* inverse scaling factor, used for source positioning */
448 	uint32 ifactor;
449 	/* copy of overlay view which has checked valid values */
450 	overlay_view my_ov;
451 
452 
453 	/**************************************************************************************
454 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
455 	 **************************************************************************************/
456 	my_ov = *ov;
457 	/* check for valid 'coordinates' */
458 	if (my_ov.width == 0) my_ov.width++;
459 	if (my_ov.height == 0) my_ov.height++;
460 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
461 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
462 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
463 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
464 	if (my_ov.v_start > (ob->height - 1))
465 		my_ov.v_start = (ob->height - 1);
466 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
467 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
468 
469 	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
470 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
471 
472 	/* save for eng_bes_calc_move_overlay() */
473 	si->overlay.ow = *ow;
474 	si->overlay.ob = *ob;
475 	si->overlay.my_ov = my_ov;
476 
477 
478 	/********************************
479 	 *** setup horizontal scaling ***
480 	 ********************************/
481 	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
482 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
483 	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
484 
485 	/* determine interval representation value, taking zoom into account */
486 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
487 	{
488 		/* horizontal filtering is ON */
489 		if ((my_ov.width == ow->width) | (ow->width < 2))
490 		{
491 			/* no horizontal scaling used, OR destination width < 2 */
492 			intrep = 0;
493 		}
494 		else
495 		{
496 			intrep = 1;
497 		}
498 	}
499 	else
500 	{
501 		/* horizontal filtering is OFF */
502 		if ((ow->width < my_ov.width) & (ow->width >= 2))
503 		{
504 			/* horizontal downscaling used AND destination width >= 2 */
505 			intrep = 1;
506 		}
507 		else
508 		{
509 			intrep = 0;
510 		}
511 	}
512 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
513 
514 	/* calculate inverse horizontal scaling factor, taking zoom into account */
515 	/* standard scaling formula: */
516 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
517 
518 	/* correct factor to prevent most-right visible 'line' from distorting */
519 	ifactor -= (1 << 2);
520 	hiscalv = ifactor;
521 	/* save for eng_bes_calc_move_overlay() */
522 	si->overlay.h_ifactor = ifactor;
523 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
524 
525 	/* check scaling factor (and modify if needed) to be within scaling limits */
526 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
527 	if (hiscalv < 0x00002000)
528 	{
529 		/* (non-inverse) factor too large, set factor to max. valid value */
530 		hiscalv = 0x00002000;
531 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
532 	}
533 	switch (si->ps.card_arch)
534 	{
535 	case NV04A:
536 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
537 		 * (16bit register with 0.11 format value) */
538 		if (hiscalv > 0x0000ffff)
539 		{
540 			/* (non-inverse) factor too small, set factor to min. valid value */
541 			hiscalv = 0x0000ffff;
542 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
543 		}
544 		break;
545 	case NV30A:
546 	case NV40A:
547 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
548 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
549 		{
550 			/* (non-inverse) factor too small, set factor to min. valid value */
551 			hiscalv = (2 << 16);
552 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
553 		}
554 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
555 		 * So let it fall through... */
556 		if (si->ps.card_type != NV31) break;
557 	default:
558 		/* the rest has a downscaling limit of 0.125 */
559 		if (hiscalv > (8 << 16))
560 		{
561 			/* (non-inverse) factor too small, set factor to min. valid value */
562 			hiscalv = (8 << 16);
563 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
564 		}
565 		break;
566 	}
567 	/* AND below is required by hardware */
568 	hiscalv &= 0x001ffffc;
569 
570 
571 	/******************************
572 	 *** setup vertical scaling ***
573 	 ******************************/
574 
575 	/* determine interval representation value, taking zoom into account */
576 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
577 	{
578 		/* vertical filtering is ON */
579 		if ((my_ov.height == ow->height) | (ow->height < 2))
580 		{
581 			/* no vertical scaling used, OR destination height < 2 */
582 			intrep = 0;
583 		}
584 		else
585 		{
586 			intrep = 1;
587 		}
588 	}
589 	else
590 	{
591 		/* vertical filtering is OFF */
592 		if ((ow->height < my_ov.height) & (ow->height >= 2))
593 		{
594 			/* vertical downscaling used AND destination height >= 2 */
595 			intrep = 1;
596 		}
597 		else
598 		{
599 			intrep = 0;
600 		}
601 	}
602 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
603 
604 	/* calculate inverse vertical scaling factor, taking zoom into account */
605 	/* standard scaling formula: */
606 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
607 
608 	/* correct factor to prevent lowest visible line from distorting */
609 	ifactor -= (1 << 2);
610 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
611 
612 	/* preserve ifactor for source positioning calculations later on */
613 	viscalv = ifactor;
614 	/* save for eng_bes_calc_move_overlay() */
615 	si->overlay.v_ifactor = ifactor;
616 
617 	/* check scaling factor (and modify if needed) to be within scaling limits */
618 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
619 	if (viscalv < 0x00002000)
620 	{
621 		/* (non-inverse) factor too large, set factor to max. valid value */
622 		viscalv = 0x00002000;
623 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
624 	}
625 	switch (si->ps.card_arch)
626 	{
627 	case NV04A:
628 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
629 		 * (16bit register with 0.11 format value) */
630 		if (viscalv > 0x0000ffff)
631 		{
632 			/* (non-inverse) factor too small, set factor to min. valid value */
633 			viscalv = 0x0000ffff;
634 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
635 		}
636 		break;
637 	case NV30A:
638 	case NV40A:
639 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
640 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
641 		{
642 			/* (non-inverse) factor too small, set factor to min. valid value */
643 			viscalv = (2 << 16);
644 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
645 		}
646 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
647 		 * So let it fall through... */
648 		if (si->ps.card_type != NV31) break;
649 	default:
650 		/* the rest has a downscaling limit of 0.125 */
651 		if (viscalv > (8 << 16))
652 		{
653 			/* (non-inverse) factor too small, set factor to min. valid value */
654 			viscalv = (8 << 16);
655 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
656 		}
657 		break;
658 	}
659 	/* AND below is required by hardware */
660 	viscalv &= 0x001ffffc;
661 
662 
663 	/********************************************************************************
664 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
665 	 ********************************************************************************/
666 	eng_bes_calc_move_overlay(&moi);
667 
668 
669 	/*****************************
670 	 *** log color keying info ***
671 	 *****************************/
672 
673 	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
674 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
675 	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
676 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
677 
678 
679 	/*****************
680 	 *** log flags ***
681 	 *****************/
682 
683 	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
684 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
685 
686 
687 	/*************************************
688 	 *** sync to BES (Back End Scaler) ***
689 	 *************************************/
690 
691 	/* Done in card hardware:
692 	 * double buffered registers + trigger if programming complete feature. */
693 
694 
695 	/**************************************
696 	 *** actually program the registers ***
697 	 **************************************/
698 
699 	if (si->ps.card_arch < NV10A)
700 	{
701 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
702 		BESW(NV04_OE_STATE, 0x00000000);
703 		/* select buffer 0 as active (b16) */
704 		BESW(NV04_SU_STATE, 0x00000000);
705 		/* unknown (no effect?) */
706 		BESW(NV04_RM_STATE, 0x00000000);
707 		/* setup clipped(!) buffer startadress in RAM */
708 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
709 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
710 		/* (program both buffers to prevent sync distortions) */
711 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
712 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
713 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
714 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
715 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
716 		/* setup buffer source pitch including slopspace (in bytes).
717 		 * Note:
718 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
719 		/* (program both buffers to prevent sync distortions) */
720 		BESW(NV04_0SRCPTCH, (ob->width * 2));
721 		BESW(NV04_1SRCPTCH, (ob->width * 2));
722 		/* setup output window position */
723 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
724 		/* setup output window size */
725 		BESW(NV04_DSTSIZE, (
726 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
727 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
728 			));
729 		/* setup horizontal and vertical scaling */
730 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
731 		/* enable vertical filtering (b0) */
732 		BESW(NV04_CTRL_V, 0x00000001);
733 		/* enable horizontal filtering (no effect?) */
734 		BESW(NV04_CTRL_H, 0x00000111);
735 
736 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
737 		BESW(NV04_GENCTRL, 0x00000111);
738 		/* select buffer 1 as active (b16) */
739 		BESW(NV04_SU_STATE, 0x00010000);
740 
741 		/**************************
742 		 *** setup color keying ***
743 		 **************************/
744 
745 		/* setup colorkeying */
746 		switch(si->dm.space)
747 		{
748 		case B_RGB15_LITTLE:
749 			BESW(NV04_COLKEY, (
750 				((ow->blue.value & ow->blue.mask) << 0)   |
751 				((ow->green.value & ow->green.mask) << 5) |
752 				((ow->red.value & ow->red.mask) << 10)    |
753 				((ow->alpha.value & ow->alpha.mask) << 15)
754 				));
755 			break;
756 		case B_RGB16_LITTLE:
757 			BESW(NV04_COLKEY, (
758 				((ow->blue.value & ow->blue.mask) << 0)   |
759 				((ow->green.value & ow->green.mask) << 5) |
760 				((ow->red.value & ow->red.mask) << 11)
761 				/* this space has no alpha bits */
762 				));
763 			break;
764 		case B_CMAP8:
765 		case B_RGB32_LITTLE:
766 		default:
767 			BESW(NV04_COLKEY, (
768 				((ow->blue.value & ow->blue.mask) << 0)   |
769 				((ow->green.value & ow->green.mask) << 8) |
770 				((ow->red.value & ow->red.mask) << 16)    |
771 				((ow->alpha.value & ow->alpha.mask) << 24)
772 				));
773 			break;
774 		}
775 	}
776 	else
777 	{
778 		/* >= NV10A */
779 
780 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
781 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
782 		/* setup buffersize */
783 		//fixme if needed: width must be even officially...
784 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
785 		/* setup source pitch including slopspace (in bytes),
786 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
787 		/* Note:
788 		 * source pitch granularity = 32 pixels on GeForce cards!! */
789 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
790 		/* setup output window position */
791 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
792 		/* setup output window size */
793 		BESW(NV10_0DSTSIZE, (
794 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
795 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
796 			));
797 		/* setup horizontal scaling */
798 		BESW(NV10_0ISCALH, (hiscalv << 4));
799 		/* setup vertical scaling */
800 		BESW(NV10_0ISCALV, (viscalv << 4));
801 		/* setup (unclipped!) buffer startadress in RAM */
802 		BESW(NV10_0BUFADR, moi.a1orgv);
803 		/* enable BES (b0 = 0) */
804 		BESW(NV10_GENCTRL, 0x00000000);
805 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
806 		/* This also triggers activation of programmed values (double buffered registers feature) */
807 		BESW(NV10_BUFSEL, 0x00000001);
808 
809 		/**************************
810 		 *** setup color keying ***
811 		 **************************/
812 
813 		/* setup colorkeying */
814 		switch(si->dm.space)
815 		{
816 		case B_RGB15_LITTLE:
817 			BESW(NV10_COLKEY, (
818 				((ow->blue.value & ow->blue.mask) << 0)   |
819 				((ow->green.value & ow->green.mask) << 5) |
820 				((ow->red.value & ow->red.mask) << 10)    |
821 				((ow->alpha.value & ow->alpha.mask) << 15)
822 				));
823 			break;
824 		case B_RGB16_LITTLE:
825 			BESW(NV10_COLKEY, (
826 				((ow->blue.value & ow->blue.mask) << 0)   |
827 				((ow->green.value & ow->green.mask) << 5) |
828 				((ow->red.value & ow->red.mask) << 11)
829 				/* this space has no alpha bits */
830 				));
831 			break;
832 		case B_CMAP8:
833 		case B_RGB32_LITTLE:
834 		default:
835 			BESW(NV10_COLKEY, (
836 				((ow->blue.value & ow->blue.mask) << 0)   |
837 				((ow->green.value & ow->green.mask) << 8) |
838 				((ow->red.value & ow->red.mask) << 16)    |
839 				((ow->alpha.value & ow->alpha.mask) << 24)
840 				));
841 			break;
842 		}
843 	}
844 
845 	/* note that overlay is in use (for eng_bes_move_overlay()) */
846 	si->overlay.active = true;
847 
848 	return B_OK;
849 }
850 
851 status_t eng_release_bes()
852 {
853 	if (si->ps.card_arch < NV10A)
854 	{
855 		/* setup BES control: disable scaler (b0 = 0) */
856 		BESW(NV04_GENCTRL, 0x00000000);
857 	}
858 	else
859 	{
860 		/* setup BES control: disable scaler (b0 = 1) */
861 		BESW(NV10_GENCTRL, 0x00000001);
862 	}
863 
864 	/* note that overlay is not in use (for eng_bes_move_overlay()) */
865 	si->overlay.active = false;
866 
867 	return B_OK;
868 }
869