xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision db10640de90f7f9519ba2da9577b7c1af3c64f6b)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-7/2004 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "nv_std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16 	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
17 };
18 
19 static void nv_bes_calc_move_overlay(move_overlay_info *moi);
20 static void nv_bes_program_move_overlay(move_overlay_info moi);
21 
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24  * si->dm.h_display_start and si->dm.v_display_start determine where the new
25  * output window is located! */
26 void nv_bes_move_overlay()
27 {
28 	move_overlay_info moi;
29 
30 	/* abort if overlay is not active */
31 	if (!si->overlay.active) return;
32 
33 	nv_bes_calc_move_overlay(&moi);
34 	nv_bes_program_move_overlay(moi);
35 }
36 
37 static void nv_bes_calc_move_overlay(move_overlay_info *moi)
38 {
39 	/* misc used variables */
40 	uint16 temp1, temp2;
41 	/* visible screen window in virtual workspaces */
42 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43 
44 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45 	if (si->ps.secondary_head)
46 	{
47 		switch (si->dm.flags & DUALHEAD_BITS)
48 		{
49 		case DUALHEAD_ON:
50 		case DUALHEAD_SWITCH:
51 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 					(si->dm.h_display_start + si->dm.timing.h_display))
53 				nv_bes_to_crtc(si->crtc_switch_mode);
54 			else
55 				nv_bes_to_crtc(!si->crtc_switch_mode);
56 			break;
57 		default:
58 				nv_bes_to_crtc(si->crtc_switch_mode);
59 			break;
60 		}
61 	}
62 
63 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64 	 * constraints only */
65 	crtc_hstart = si->dm.h_display_start;
66 	/* make dualhead stretch and switch mode work while we're at it.. */
67 	if (si->overlay.crtc)
68 	{
69 		crtc_hstart += si->dm.timing.h_display;
70 	}
71 
72 	/* horizontal end is the first position beyond the displayed range on the CRTC */
73 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 	crtc_vstart = si->dm.v_display_start;
75 	/* vertical end is the first position beyond the displayed range on the CRTC */
76 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77 
78 
79 	/****************************************
80 	 *** setup all edges of output window ***
81 	 ****************************************/
82 
83 	/* setup left and right edges of output window */
84 	moi->hcoordv = 0;
85 	/* left edge coordinate of output window, must be inside desktop */
86 	/* clipping on the left side */
87 	if (si->overlay.ow.h_start < crtc_hstart)
88 	{
89 		temp1 = 0;
90 	}
91 	else
92 	{
93 		/* clipping on the right side */
94 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95 		{
96 			/* width < 2 is not allowed */
97 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98 		}
99 		else
100 		/* no clipping here */
101 		{
102 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103 		}
104 	}
105 	moi->hcoordv |= temp1 << 16;
106 	/* right edge coordinate of output window, must be inside desktop */
107 	/* width < 2 is not allowed */
108 	if (si->overlay.ow.width < 2)
109 	{
110 		temp2 = (temp1 + 1) & 0x7ff;
111 	}
112 	else
113 	{
114 		/* clipping on the right side */
115 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116 		{
117 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118 		}
119 		else
120 		{
121 			/* clipping on the left side */
122 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123 			{
124 				/* width < 2 is not allowed */
125 				temp2 = 1;
126 			}
127 			else
128 			/* no clipping here */
129 			{
130 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131 			}
132 		}
133 	}
134 	moi->hcoordv |= temp2 << 0;
135 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136 
137 	/* setup top and bottom edges of output window */
138 	moi->vcoordv = 0;
139 	/* top edge coordinate of output window, must be inside desktop */
140 	/* clipping on the top side */
141 	if (si->overlay.ow.v_start < crtc_vstart)
142 	{
143 		temp1 = 0;
144 	}
145 	else
146 	{
147 		/* clipping on the bottom side */
148 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149 		{
150 			/* height < 2 is not allowed */
151 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152 		}
153 		else
154 		/* no clipping here */
155 		{
156 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157 		}
158 	}
159 	moi->vcoordv |= temp1 << 16;
160 	/* bottom edge coordinate of output window, must be inside desktop */
161 	/* height < 2 is not allowed */
162 	if (si->overlay.ow.height < 2)
163 	{
164 		temp2 = (temp1 + 1) & 0x7ff;
165 	}
166 	else
167 	{
168 		/* clipping on the bottom side */
169 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170 		{
171 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172 		}
173 		else
174 		{
175 			/* clipping on the top side */
176 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177 			{
178 				/* height < 2 is not allowed */
179 				temp2 = 1;
180 			}
181 			else
182 			/* no clipping here */
183 			{
184 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185 			}
186 		}
187 	}
188 	moi->vcoordv |= temp2 << 0;
189 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190 
191 
192 	/*********************************
193 	 *** setup horizontal clipping ***
194 	 *********************************/
195 
196 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 	/* Note:
198 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 	/* Note also:
203 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 	moi->hsrcstv = 0;
205 	/* check for destination horizontal clipping at left side */
206 	if (si->overlay.ow.h_start < crtc_hstart)
207 	{
208 		/* check if entire destination picture is clipping left:
209 		 * (2 pixels will be clamped onscreen at least) */
210 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211 		{
212 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 			moi->hsrcstv += (si->overlay.ow.width - 2);
214 		}
215 		else
216 		{
217 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219 		}
220 		LOG(4,("Overlay: clipping left...\n"));
221 
222 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 		 * Note that this also already takes care of aligning the value to the BES register! */
224 		moi->hsrcstv *= si->overlay.h_ifactor;
225 	}
226 	/* take zoom into account */
227 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 	/* AND below required by hardware */
229 	moi->hsrcstv &= 0x03fffffc;
230 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231 
232 
233 	/*******************************
234 	 *** setup vertical clipping ***
235 	 *******************************/
236 
237 	/* calculate inputbitmap origin adress */
238 	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
239 	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
240 
241 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
242 	/* Note:
243 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
244 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
245 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
246 	/* Note also:
247 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
248 
249 	moi->v1srcstv = 0;
250 	/* check for destination vertical clipping at top side */
251 	if (si->overlay.ow.v_start < crtc_vstart)
252 	{
253 		/* check if entire destination picture is clipping at top:
254 		 * (2 pixels will be clamped onscreen at least) */
255 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
256 		{
257 			/* increase 'number of clipping pixels' with 'fixed value':
258 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
259 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
260 			/* on pre-NV10 we need to do clipping in the source
261 			 * bitmap because no seperate clipping registers exist... */
262 			if (si->ps.card_arch < NV10A)
263 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
264 		}
265 		else
266 		{
267 			/* increase 'first contributing pixel' with:
268 			 * number of destination picture clipping pixels * inverse scaling factor */
269 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
270 			/* on pre-NV10 we need to do clipping in the source
271 			 * bitmap because no seperate clipping registers exist... */
272 			if (si->ps.card_arch < NV10A)
273 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
274 		}
275 		LOG(4,("Overlay: clipping at top...\n"));
276 	}
277 	/* take zoom into account */
278 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
279 	if (si->ps.card_arch < NV10A)
280 	{
281 		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
282 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
283 	}
284 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
285 
286 	/* AND below is probably required by hardware. */
287 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
288 	moi->a1orgv &= 0xfffffff0;
289 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
290 }
291 
292 static void nv_bes_program_move_overlay(move_overlay_info moi)
293 {
294 	/*************************************
295 	 *** sync to BES (Back End Scaler) ***
296 	 *************************************/
297 
298 	/* Done in card hardware:
299 	 * double buffered registers + trigger if programming complete feature. */
300 
301 
302 	/**************************************
303 	 *** actually program the registers ***
304 	 **************************************/
305 
306 	if (si->ps.card_arch < NV10A)
307 	{
308 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309 		BESW(NV04_OE_STATE, 0x00000000);
310 		/* select buffer 0 as active (b16) */
311 		BESW(NV04_SU_STATE, 0x00000000);
312 		/* unknown (no effect?) */
313 		BESW(NV04_RM_STATE, 0x00000000);
314 		/* setup clipped(!) buffer startadress in RAM */
315 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 		/* (program both buffers to prevent sync distortions) */
318 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
319 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 		/* setup output window position */
324 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 		/* setup output window size */
326 		BESW(NV04_DSTSIZE, (
327 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329 			));
330 		/* select buffer 1 as active (b16) */
331 		BESW(NV04_SU_STATE, 0x00010000);
332 	}
333 	else
334 	{
335 		/* >= NV10A */
336 
337 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 		/* setup output window position */
340 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 		/* setup output window size */
342 		BESW(NV10_0DSTSIZE, (
343 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345 			));
346 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 		/* This also triggers activation of programmed values (double buffered registers feature) */
348 		BESW(NV10_BUFSEL, 0x00000001);
349 	}
350 }
351 
352 status_t nv_bes_to_crtc(bool crtc)
353 {
354 	if (si->ps.secondary_head)
355 	{
356 		if (crtc)
357 		{
358 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 			/* switch overlay engine to CRTC2 */
360 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362 			si->overlay.crtc = !si->crtc_switch_mode;
363 		}
364 		else
365 		{
366 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 			/* switch overlay engine to CRTC1 */
368 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370 			si->overlay.crtc = si->crtc_switch_mode;
371 		}
372 		return B_OK;
373 	}
374 	else
375 	{
376 		return B_ERROR;
377 	}
378 }
379 
380 status_t nv_bes_init()
381 {
382 	if (si->ps.card_arch < NV10A)
383 	{
384 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 		BESW(NV04_INTE, 0x00000000);
386 
387 		/* setup saturation to be 'neutral' */
388 		BESW(NV04_SAT, 0x00000000);
389 		/* setup RGB brightness to be 'neutral' */
390 		BESW(NV04_RED_AMP, 0x00000069);
391 		BESW(NV04_GRN_AMP, 0x0000003e);
392 		BESW(NV04_BLU_AMP, 0x00000089);
393 
394 		/* setup fifo for fetching data */
395 		BESW(NV04_FIFOBURL, 0x00000003);
396 		BESW(NV04_FIFOTHRS, 0x00000038);
397 
398 		/* unknown, but needed (registers only have b0 implemented) */
399 		/* (program both buffers to prevent sync distortions) */
400 		BESW(NV04_0OFFSET, 0x00000000);
401 		BESW(NV04_1OFFSET, 0x00000000);
402 	}
403 	else
404 	{
405 		/* >= NV10A */
406 
407 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 		BESW(NV10_INTE, 0x00000000);
409 		/* shut off GeForce4MX MPEG2 decoder */
410 		BESW(DEC_GENCTRL, 0x00000000);
411 		/* setup BES memory-range mask */
412 		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 		/* unknown, but needed */
414 		BESW(NV10_0OFFSET, 0x00000000);
415 
416 		/* setup brightness, contrast and saturation to be 'neutral' */
417 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419 	}
420 
421 	return B_OK;
422 }
423 
424 status_t nv_configure_bes
425 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
426 {
427 	/* yuy2 (4:2:2) colorspace calculations */
428 
429 	/* Note:
430 	 * in BeOS R5.0.3 and DANO:
431 	 * 'ow->offset_xxx' is always 0, so not used;
432 	 * 'ow->width' and 'ow->height' are the output window size: does not change
433 	 * if window is clipping;
434 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
435 	 * window. These values can be negative: this means the window is clipping
436 	 * at the left or the top of the display, respectively. */
437 
438 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
439 	 * displayed on screen. This is used for the 'hardware zoom' function. */
440 
441 	/* output window position and clipping info for source buffer */
442 	move_overlay_info moi;
443 	/* calculated BES register values */
444 	uint32 	hiscalv, viscalv;
445 	/* interval representation, used for scaling calculations */
446 	uint16 intrep;
447 	/* inverse scaling factor, used for source positioning */
448 	uint32 ifactor;
449 	/* copy of overlay view which has checked valid values */
450 	overlay_view my_ov;
451 
452 
453 	/**************************************************************************************
454 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
455 	 **************************************************************************************/
456 	my_ov = *ov;
457 	/* check for valid 'coordinates' */
458 	if (my_ov.width == 0) my_ov.width++;
459 	if (my_ov.height == 0) my_ov.height++;
460 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
461 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
462 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
463 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
464 	if (my_ov.v_start > (ob->height - 1))
465 		my_ov.v_start = (ob->height - 1);
466 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
467 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
468 
469 	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
470 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
471 
472 	/* save for nv_bes_calc_move_overlay() */
473 	si->overlay.ow = *ow;
474 	si->overlay.ob = *ob;
475 	si->overlay.my_ov = my_ov;
476 
477 
478 	/********************************
479 	 *** setup horizontal scaling ***
480 	 ********************************/
481 	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
482 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
483 	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
484 
485 	/* determine interval representation value, taking zoom into account */
486 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
487 	{
488 		/* horizontal filtering is ON */
489 		if ((my_ov.width == ow->width) | (ow->width < 2))
490 		{
491 			/* no horizontal scaling used, OR destination width < 2 */
492 			intrep = 0;
493 		}
494 		else
495 		{
496 			intrep = 1;
497 		}
498 	}
499 	else
500 	{
501 		/* horizontal filtering is OFF */
502 		if ((ow->width < my_ov.width) & (ow->width >= 2))
503 		{
504 			/* horizontal downscaling used AND destination width >= 2 */
505 			intrep = 1;
506 		}
507 		else
508 		{
509 			intrep = 0;
510 		}
511 	}
512 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
513 
514 	/* calculate inverse horizontal scaling factor, taking zoom into account */
515 	/* standard scaling formula: */
516 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
517 
518 	/* correct factor to prevent most-right visible 'line' from distorting */
519 	ifactor -= (1 << 2);
520 	hiscalv = ifactor;
521 	/* save for nv_bes_calc_move_overlay() */
522 	si->overlay.h_ifactor = ifactor;
523 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
524 
525 	/* check scaling factor (and modify if needed) to be within scaling limits */
526 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
527 	if (hiscalv < 0x00002000)
528 	{
529 		/* (non-inverse) factor too large, set factor to max. valid value */
530 		hiscalv = 0x00002000;
531 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
532 	}
533 	switch (si->ps.card_arch)
534 	{
535 	case NV04A:
536 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
537 		 * (16bit register with 0.11 format value) */
538 		if (hiscalv > 0x0000ffff)
539 		{
540 			/* (non-inverse) factor too small, set factor to min. valid value */
541 			hiscalv = 0x0000ffff;
542 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
543 		}
544 		break;
545 	case NV30A:
546 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
547 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
548 		{
549 			/* (non-inverse) factor too small, set factor to min. valid value */
550 			hiscalv = (2 << 16);
551 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
552 		}
553 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
554 		 * So let it fall through... */
555 		if (si->ps.card_type != NV31) break;
556 	default:
557 		/* the rest has a downscaling limit of 0.125 */
558 		if (hiscalv > (8 << 16))
559 		{
560 			/* (non-inverse) factor too small, set factor to min. valid value */
561 			hiscalv = (8 << 16);
562 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
563 		}
564 		break;
565 	}
566 	/* AND below is required by hardware */
567 	hiscalv &= 0x001ffffc;
568 
569 
570 	/******************************
571 	 *** setup vertical scaling ***
572 	 ******************************/
573 
574 	/* determine interval representation value, taking zoom into account */
575 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
576 	{
577 		/* vertical filtering is ON */
578 		if ((my_ov.height == ow->height) | (ow->height < 2))
579 		{
580 			/* no vertical scaling used, OR destination height < 2 */
581 			intrep = 0;
582 		}
583 		else
584 		{
585 			intrep = 1;
586 		}
587 	}
588 	else
589 	{
590 		/* vertical filtering is OFF */
591 		if ((ow->height < my_ov.height) & (ow->height >= 2))
592 		{
593 			/* vertical downscaling used AND destination height >= 2 */
594 			intrep = 1;
595 		}
596 		else
597 		{
598 			intrep = 0;
599 		}
600 	}
601 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
602 
603 	/* calculate inverse vertical scaling factor, taking zoom into account */
604 	/* standard scaling formula: */
605 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
606 
607 	/* correct factor to prevent lowest visible line from distorting */
608 	ifactor -= (1 << 2);
609 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
610 
611 	/* preserve ifactor for source positioning calculations later on */
612 	viscalv = ifactor;
613 	/* save for nv_bes_calc_move_overlay() */
614 	si->overlay.v_ifactor = ifactor;
615 
616 	/* check scaling factor (and modify if needed) to be within scaling limits */
617 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
618 	if (viscalv < 0x00002000)
619 	{
620 		/* (non-inverse) factor too large, set factor to max. valid value */
621 		viscalv = 0x00002000;
622 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
623 	}
624 	switch (si->ps.card_arch)
625 	{
626 	case NV04A:
627 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
628 		 * (16bit register with 0.11 format value) */
629 		if (viscalv > 0x0000ffff)
630 		{
631 			/* (non-inverse) factor too small, set factor to min. valid value */
632 			viscalv = 0x0000ffff;
633 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
634 		}
635 		break;
636 	case NV30A:
637 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
638 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
639 		{
640 			/* (non-inverse) factor too small, set factor to min. valid value */
641 			viscalv = (2 << 16);
642 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
643 		}
644 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
645 		 * So let it fall through... */
646 		if (si->ps.card_type != NV31) break;
647 	default:
648 		/* the rest has a downscaling limit of 0.125 */
649 		if (viscalv > (8 << 16))
650 		{
651 			/* (non-inverse) factor too small, set factor to min. valid value */
652 			viscalv = (8 << 16);
653 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
654 		}
655 		break;
656 	}
657 	/* AND below is required by hardware */
658 	viscalv &= 0x001ffffc;
659 
660 
661 	/********************************************************************************
662 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
663 	 ********************************************************************************/
664 	nv_bes_calc_move_overlay(&moi);
665 
666 
667 	/*****************************
668 	 *** log color keying info ***
669 	 *****************************/
670 
671 	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
672 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
673 	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
674 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
675 
676 
677 	/*****************
678 	 *** log flags ***
679 	 *****************/
680 
681 	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
682 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
683 
684 
685 	/*************************************
686 	 *** sync to BES (Back End Scaler) ***
687 	 *************************************/
688 
689 	/* Done in card hardware:
690 	 * double buffered registers + trigger if programming complete feature. */
691 
692 
693 	/**************************************
694 	 *** actually program the registers ***
695 	 **************************************/
696 
697 	if (si->ps.card_arch < NV10A)
698 	{
699 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
700 		BESW(NV04_OE_STATE, 0x00000000);
701 		/* select buffer 0 as active (b16) */
702 		BESW(NV04_SU_STATE, 0x00000000);
703 		/* unknown (no effect?) */
704 		BESW(NV04_RM_STATE, 0x00000000);
705 		/* setup clipped(!) buffer startadress in RAM */
706 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
707 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
708 		/* (program both buffers to prevent sync distortions) */
709 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
710 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
711 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
712 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
713 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
714 		/* setup buffer source pitch including slopspace (in bytes).
715 		 * Note:
716 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
717 		/* (program both buffers to prevent sync distortions) */
718 		BESW(NV04_0SRCPTCH, (ob->width * 2));
719 		BESW(NV04_1SRCPTCH, (ob->width * 2));
720 		/* setup output window position */
721 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
722 		/* setup output window size */
723 		BESW(NV04_DSTSIZE, (
724 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
725 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
726 			));
727 		/* setup horizontal and vertical scaling */
728 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
729 		/* enable vertical filtering (b0) */
730 		BESW(NV04_CTRL_V, 0x00000001);
731 		/* enable horizontal filtering (no effect?) */
732 		BESW(NV04_CTRL_H, 0x00000111);
733 
734 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
735 		BESW(NV04_GENCTRL, 0x00000111);
736 		/* select buffer 1 as active (b16) */
737 		BESW(NV04_SU_STATE, 0x00010000);
738 
739 		/**************************
740 		 *** setup color keying ***
741 		 **************************/
742 
743 		/* setup colorkeying */
744 		switch(si->dm.space)
745 		{
746 		case B_RGB15_LITTLE:
747 			BESW(NV04_COLKEY, (
748 				((ow->blue.value & ow->blue.mask) << 0)   |
749 				((ow->green.value & ow->green.mask) << 5) |
750 				((ow->red.value & ow->red.mask) << 10)    |
751 				((ow->alpha.value & ow->alpha.mask) << 15)
752 				));
753 			break;
754 		case B_RGB16_LITTLE:
755 			BESW(NV04_COLKEY, (
756 				((ow->blue.value & ow->blue.mask) << 0)   |
757 				((ow->green.value & ow->green.mask) << 5) |
758 				((ow->red.value & ow->red.mask) << 11)
759 				/* this space has no alpha bits */
760 				));
761 			break;
762 		case B_CMAP8:
763 		case B_RGB32_LITTLE:
764 		default:
765 			BESW(NV04_COLKEY, (
766 				((ow->blue.value & ow->blue.mask) << 0)   |
767 				((ow->green.value & ow->green.mask) << 8) |
768 				((ow->red.value & ow->red.mask) << 16)    |
769 				((ow->alpha.value & ow->alpha.mask) << 24)
770 				));
771 			break;
772 		}
773 	}
774 	else
775 	{
776 		/* >= NV10A */
777 
778 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
779 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
780 		/* setup buffersize */
781 		//fixme if needed: width must be even officially...
782 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
783 		/* setup source pitch including slopspace (in bytes),
784 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
785 		/* Note:
786 		 * source pitch granularity = 32 pixels on GeForce cards!! */
787 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
788 		/* setup output window position */
789 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
790 		/* setup output window size */
791 		BESW(NV10_0DSTSIZE, (
792 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
793 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
794 			));
795 		/* setup horizontal scaling */
796 		BESW(NV10_0ISCALH, (hiscalv << 4));
797 		/* setup vertical scaling */
798 		BESW(NV10_0ISCALV, (viscalv << 4));
799 		/* setup (unclipped!) buffer startadress in RAM */
800 		BESW(NV10_0BUFADR, moi.a1orgv);
801 		/* enable BES (b0 = 0) */
802 		BESW(NV10_GENCTRL, 0x00000000);
803 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
804 		/* This also triggers activation of programmed values (double buffered registers feature) */
805 		BESW(NV10_BUFSEL, 0x00000001);
806 
807 		/**************************
808 		 *** setup color keying ***
809 		 **************************/
810 
811 		/* setup colorkeying */
812 		switch(si->dm.space)
813 		{
814 		case B_RGB15_LITTLE:
815 			BESW(NV10_COLKEY, (
816 				((ow->blue.value & ow->blue.mask) << 0)   |
817 				((ow->green.value & ow->green.mask) << 5) |
818 				((ow->red.value & ow->red.mask) << 10)    |
819 				((ow->alpha.value & ow->alpha.mask) << 15)
820 				));
821 			break;
822 		case B_RGB16_LITTLE:
823 			BESW(NV10_COLKEY, (
824 				((ow->blue.value & ow->blue.mask) << 0)   |
825 				((ow->green.value & ow->green.mask) << 5) |
826 				((ow->red.value & ow->red.mask) << 11)
827 				/* this space has no alpha bits */
828 				));
829 			break;
830 		case B_CMAP8:
831 		case B_RGB32_LITTLE:
832 		default:
833 			BESW(NV10_COLKEY, (
834 				((ow->blue.value & ow->blue.mask) << 0)   |
835 				((ow->green.value & ow->green.mask) << 8) |
836 				((ow->red.value & ow->red.mask) << 16)    |
837 				((ow->alpha.value & ow->alpha.mask) << 24)
838 				));
839 			break;
840 		}
841 	}
842 
843 	/* note that overlay is in use (for nv_bes_move_overlay()) */
844 	si->overlay.active = true;
845 
846 	return B_OK;
847 }
848 
849 status_t nv_release_bes()
850 {
851 	if (si->ps.card_arch < NV10A)
852 	{
853 		/* setup BES control: disable scaler (b0 = 0) */
854 		BESW(NV04_GENCTRL, 0x00000000);
855 	}
856 	else
857 	{
858 		/* setup BES control: disable scaler (b0 = 1) */
859 		BESW(NV10_GENCTRL, 0x00000001);
860 	}
861 
862 	/* note that overlay is not in use (for nv_bes_move_overlay()) */
863 	si->overlay.active = false;
864 
865 	return B_OK;
866 }
867