xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision 81f5654c124bf46fba0fd251f208e2d88d81e1ce)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-5/2004 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "nv_std.h"
7 
8 /* move the overlay output window in virtualscreens */
9 /* Note:
10  * si->dm.h_display_start and si->dm.v_display_start determine where the new
11  * output window is located! */
12 void nv_bes_move_overlay()
13 {
14 	/* calculated BES register values */
15 	uint32 	hcoordv, vcoordv, hsrcstv, a1orgv, v1srcstv;
16 	/* misc used variables */
17 	uint16 temp1, temp2;
18 	/* visible screen window in virtual workspaces */
19 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
20 
21 	/* abort if overlay is not active */
22 	if (!si->overlay.active) return;
23 
24 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
25 	if (si->ps.secondary_head)
26 	{
27 		switch (si->dm.flags & DUALHEAD_BITS)
28 		{
29 		case DUALHEAD_ON:
30 		case DUALHEAD_SWITCH:
31 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
32 					(si->dm.h_display_start + si->dm.timing.h_display))
33 				nv_bes_to_crtc(si->crtc_switch_mode);
34 			else
35 				nv_bes_to_crtc(!si->crtc_switch_mode);
36 			break;
37 		default:
38 				nv_bes_to_crtc(si->crtc_switch_mode);
39 			break;
40 		}
41 	}
42 
43 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
44 	 * constraints only */
45 	crtc_hstart = si->dm.h_display_start;
46 	/* make dualhead stretch and switch mode work while we're at it.. */
47 	if (si->overlay.crtc)
48 	{
49 		crtc_hstart += si->dm.timing.h_display;
50 	}
51 
52 	/* horizontal end is the first position beyond the displayed range on the CRTC */
53 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
54 	crtc_vstart = si->dm.v_display_start;
55 	/* vertical end is the first position beyond the displayed range on the CRTC */
56 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
57 
58 
59 	/****************************************
60 	 *** setup all edges of output window ***
61 	 ****************************************/
62 
63 	/* setup left and right edges of output window */
64 	hcoordv = 0;
65 	/* left edge coordinate of output window, must be inside desktop */
66 	/* clipping on the left side */
67 	if (si->overlay.ow.h_start < crtc_hstart)
68 	{
69 		temp1 = 0;
70 	}
71 	else
72 	{
73 		/* clipping on the right side */
74 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
75 		{
76 			/* width < 2 is not allowed */
77 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
78 		}
79 		else
80 		/* no clipping here */
81 		{
82 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
83 		}
84 	}
85 	hcoordv |= temp1 << 16;
86 	/* right edge coordinate of output window, must be inside desktop */
87 	/* width < 2 is not allowed */
88 	if (si->overlay.ow.width < 2)
89 	{
90 		temp2 = (temp1 + 1) & 0x7ff;
91 	}
92 	else
93 	{
94 		/* clipping on the right side */
95 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
96 		{
97 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
98 		}
99 		else
100 		{
101 			/* clipping on the left side */
102 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
103 			{
104 				/* width < 2 is not allowed */
105 				temp2 = 1;
106 			}
107 			else
108 			/* no clipping here */
109 			{
110 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
111 			}
112 		}
113 	}
114 	hcoordv |= temp2 << 0;
115 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
116 
117 	/* setup top and bottom edges of output window */
118 	vcoordv = 0;
119 	/* top edge coordinate of output window, must be inside desktop */
120 	/* clipping on the top side */
121 	if (si->overlay.ow.v_start < crtc_vstart)
122 	{
123 		temp1 = 0;
124 	}
125 	else
126 	{
127 		/* clipping on the bottom side */
128 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
129 		{
130 			/* height < 2 is not allowed */
131 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
132 		}
133 		else
134 		/* no clipping here */
135 		{
136 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
137 		}
138 	}
139 	vcoordv |= temp1 << 16;
140 	/* bottom edge coordinate of output window, must be inside desktop */
141 	/* height < 2 is not allowed */
142 	if (si->overlay.ow.height < 2)
143 	{
144 		temp2 = (temp1 + 1) & 0x7ff;
145 	}
146 	else
147 	{
148 		/* clipping on the bottom side */
149 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
150 		{
151 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
152 		}
153 		else
154 		{
155 			/* clipping on the top side */
156 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
157 			{
158 				/* height < 2 is not allowed */
159 				temp2 = 1;
160 			}
161 			else
162 			/* no clipping here */
163 			{
164 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
165 			}
166 		}
167 	}
168 	vcoordv |= temp2 << 0;
169 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
170 
171 
172 	/*********************************
173 	 *** setup horizontal clipping ***
174 	 *********************************/
175 
176 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
177 	/* Note:
178 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
179 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
180 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
181 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
182 	/* Note also:
183 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
184 	hsrcstv = 0;
185 	/* check for destination horizontal clipping at left side */
186 	if (si->overlay.ow.h_start < crtc_hstart)
187 	{
188 		/* check if entire destination picture is clipping left:
189 		 * (2 pixels will be clamped onscreen at least) */
190 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
191 		{
192 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
193 			hsrcstv += (si->overlay.ow.width - 2);
194 		}
195 		else
196 		{
197 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
198 			hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
199 		}
200 		LOG(4,("Overlay: clipping left...\n"));
201 
202 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
203 		 * Note that this also already takes care of aligning the value to the BES register! */
204 		hsrcstv *= si->overlay.h_ifactor;
205 	}
206 	/* take zoom into account */
207 	hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
208 	/* AND below required by hardware */
209 	hsrcstv &= 0x03fffffc;
210 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
211 
212 
213 	/*******************************
214 	 *** setup vertical clipping ***
215 	 *******************************/
216 
217 	/* calculate inputbitmap origin adress */
218 	a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
219 	a1orgv -= (uint32)((vuint32 *)si->framebuffer);
220 
221 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
222 	/* Note:
223 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
224 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
225 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
226 	/* Note also:
227 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
228 
229 	v1srcstv = 0;
230 	/* check for destination vertical clipping at top side */
231 	if (si->overlay.ow.v_start < crtc_vstart)
232 	{
233 		/* check if entire destination picture is clipping at top:
234 		 * (2 pixels will be clamped onscreen at least) */
235 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
236 		{
237 			/* increase 'number of clipping pixels' with 'fixed value':
238 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
239 			v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
240 			/* on pre-NV10 we need to do clipping in the source
241 			 * bitmap because no seperate clipping registers exist... */
242 			if (si->ps.card_arch < NV10A)
243 				a1orgv += ((v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
244 		}
245 		else
246 		{
247 			/* increase 'first contributing pixel' with:
248 			 * number of destination picture clipping pixels * inverse scaling factor */
249 			v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
250 			/* on pre-NV10 we need to do clipping in the source
251 			 * bitmap because no seperate clipping registers exist... */
252 			if (si->ps.card_arch < NV10A)
253 				a1orgv += ((v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
254 		}
255 		LOG(4,("Overlay: clipping at top...\n"));
256 	}
257 	/* take zoom into account */
258 	v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
259 	if (si->ps.card_arch < NV10A)
260 	{
261 		a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
262 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv));
263 	}
264 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536));
265 
266 	/* AND below is probably required by hardware. */
267 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
268 	a1orgv &= 0xfffffff0;
269 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv));
270 
271 
272 	/*************************************
273 	 *** sync to BES (Back End Scaler) ***
274 	 *************************************/
275 
276 	/* Done in card hardware:
277 	 * double buffered registers + trigger if programming complete feature. */
278 
279 
280 	/**************************************
281 	 *** actually program the registers ***
282 	 **************************************/
283 
284 	if (si->ps.card_arch < NV10A)
285 	{
286 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
287 		BESW(NV04_OE_STATE, 0x00000000);
288 		/* select buffer 0 as active (b16) */
289 		BESW(NV04_SU_STATE, 0x00000000);
290 		/* unknown (no effect?) */
291 		BESW(NV04_RM_STATE, 0x00000000);
292 		/* setup clipped(!) buffer startadress in RAM */
293 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
294 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
295 		/* (program both buffers to prevent sync distortions) */
296 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
297 		a1orgv += ((hsrcstv >> 16) * 2);
298 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
299 		BESW(NV04_0BUFADR, (a1orgv & ~0x03));
300 		BESW(NV04_1BUFADR, (a1orgv & ~0x03));
301 
302 		/* setup buffer source pitch including slopspace (in bytes).
303 		 * Note:
304 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
305 		/* (program both buffers to prevent sync distortions) */
306 //		BESW(NV04_0SRCPTCH, (ob->width * 2));
307 //		BESW(NV04_1SRCPTCH, (ob->width * 2));
308 		/* setup output window position */
309 		BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
310 		/* setup output window size */
311 		BESW(NV04_DSTSIZE, (
312 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
313 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
314 			));
315 
316 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
317 //		BESW(NV04_GENCTRL, 0x00000111);
318 		/* select buffer 1 as active (b16) */
319 		BESW(NV04_SU_STATE, 0x00010000);
320 	}
321 	else
322 	{
323 		/* >= NV10A */
324 
325 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
326 		BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff));
327 		/* setup buffersize */
328 		//fixme if needed: width must be even officially...
329 //		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
330 		/* setup source pitch including slopspace (in bytes),
331 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
332 		/* Note:
333 		 * source pitch granularity = 32 pixels on GeForce cards!! */
334 //		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
335 		/* setup output window position */
336 		BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
337 		/* setup output window size */
338 		BESW(NV10_0DSTSIZE, (
339 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
340 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
341 			));
342 		/* setup (unclipped!) buffer startadress in RAM */
343 //		BESW(NV10_0BUFADR, a1orgv);
344 		/* enable BES (b0 = 0) */
345 //		BESW(NV10_GENCTRL, 0x00000000);
346 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 		/* This also triggers activation of programmed values (double buffered registers feature) */
348 		BESW(NV10_BUFSEL, 0x00000001);
349 	}
350 }
351 
352 status_t nv_bes_to_crtc(bool crtc)
353 {
354 	if (si->ps.secondary_head)
355 	{
356 		if (crtc)
357 		{
358 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 			/* switch overlay engine to CRTC2 */
360 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362 			si->overlay.crtc = !si->crtc_switch_mode;
363 		}
364 		else
365 		{
366 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 			/* switch overlay engine to CRTC1 */
368 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370 			si->overlay.crtc = si->crtc_switch_mode;
371 		}
372 		return B_OK;
373 	}
374 	else
375 	{
376 		return B_ERROR;
377 	}
378 }
379 
380 status_t nv_bes_init()
381 {
382 	if (si->ps.card_arch < NV10A)
383 	{
384 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 		BESW(NV04_INTE, 0x00000000);
386 
387 		/* setup saturation to be 'neutral' */
388 		BESW(NV04_SAT, 0x00000000);
389 		/* setup RGB brightness to be 'neutral' */
390 		BESW(NV04_RED_AMP, 0x00000069);
391 		BESW(NV04_GRN_AMP, 0x0000003e);
392 		BESW(NV04_BLU_AMP, 0x00000089);
393 
394 		/* setup fifo for fetching data */
395 		BESW(NV04_FIFOBURL, 0x00000003);
396 		BESW(NV04_FIFOTHRS, 0x00000038);
397 
398 		/* unknown, but needed (registers only have b0 implemented) */
399 		/* (program both buffers to prevent sync distortions) */
400 		BESW(NV04_0OFFSET, 0x00000000);
401 		BESW(NV04_1OFFSET, 0x00000000);
402 	}
403 	else
404 	{
405 		/* >= NV10A */
406 
407 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 		BESW(NV10_INTE, 0x00000000);
409 		/* shut off GeForce4MX MPEG2 decoder */
410 		BESW(DEC_GENCTRL, 0x00000000);
411 		/* setup BES memory-range mask */
412 		BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1));
413 		/* unknown, but needed */
414 		BESW(NV10_0OFFSET, 0x00000000);
415 
416 		/* setup brightness, contrast and saturation to be 'neutral' */
417 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419 	}
420 
421 	return B_OK;
422 }
423 
424 status_t nv_configure_bes
425 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
426 {
427 	/* yuy2 (4:2:2) colorspace calculations */
428 
429 	/* Note:
430 	 * in BeOS R5.0.3 and DANO:
431 	 * 'ow->offset_xxx' is always 0, so not used;
432 	 * 'ow->width' and 'ow->height' are the output window size: does not change
433 	 * if window is clipping;
434 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
435 	 * window. These values can be negative: this means the window is clipping
436 	 * at the left or the top of the display, respectively. */
437 
438 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
439 	 * displayed on screen. This is used for the 'hardware zoom' function. */
440 
441 	/* calculated BES register values */
442 	uint32 	hcoordv, vcoordv, hiscalv, hsrcstv,	viscalv, a1orgv, v1srcstv;
443 	/* misc used variables */
444 	uint16 temp1, temp2;
445 	/* interval representation, used for scaling calculations */
446 	uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
447 	/* inverse scaling factor, used for source positioning */
448 	uint32 ifactor;
449 	/* copy of overlay view which has checked valid values */
450 	overlay_view my_ov;
451 
452 
453 	/**************************************************************************************
454 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
455 	 **************************************************************************************/
456 	my_ov = *ov;
457 	/* check for valid 'coordinates' */
458 	if (my_ov.width == 0) my_ov.width++;
459 	if (my_ov.height == 0) my_ov.height++;
460 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
461 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
462 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
463 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
464 	if (my_ov.v_start > (ob->height - 1))
465 		my_ov.v_start = (ob->height - 1);
466 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
467 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
468 
469 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
470 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
471 
472 	/* save for nv_bes_move_overlay() */
473 	si->overlay.ow = *ow;
474 	si->overlay.ob = *ob;
475 	si->overlay.my_ov = my_ov;
476 
477 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
478 	 * constraints only */
479 	crtc_hstart = si->dm.h_display_start;
480 	/* make dualhead stretch and switch mode work while we're at it.. */
481 	if (si->overlay.crtc)
482 	{
483 		crtc_hstart += si->dm.timing.h_display;
484 	}
485 
486 	/* horizontal end is the first position beyond the displayed range on the CRTC */
487 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
488 	crtc_vstart = si->dm.v_display_start;
489 	/* vertical end is the first position beyond the displayed range on the CRTC */
490 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
491 
492 
493 	/****************************************
494 	 *** setup all edges of output window ***
495 	 ****************************************/
496 
497 	/* setup left and right edges of output window */
498 	hcoordv = 0;
499 	/* left edge coordinate of output window, must be inside desktop */
500 	/* clipping on the left side */
501 	if (ow->h_start < crtc_hstart)
502 	{
503 		temp1 = 0;
504 	}
505 	else
506 	{
507 		/* clipping on the right side */
508 		if (ow->h_start >= (crtc_hend - 1))
509 		{
510 			/* width < 2 is not allowed */
511 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
512 		}
513 		else
514 		/* no clipping here */
515 		{
516 			temp1 = (ow->h_start - crtc_hstart) & 0x7ff;
517 		}
518 	}
519 	hcoordv |= temp1 << 16;
520 	/* right edge coordinate of output window, must be inside desktop */
521 	/* width < 2 is not allowed */
522 	if (ow->width < 2)
523 	{
524 		temp2 = (temp1 + 1) & 0x7ff;
525 	}
526 	else
527 	{
528 		/* clipping on the right side */
529 		if ((ow->h_start + ow->width - 1) > (crtc_hend - 1))
530 		{
531 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
532 		}
533 		else
534 		{
535 			/* clipping on the left side */
536 			if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
537 			{
538 				/* width < 2 is not allowed */
539 				temp2 = 1;
540 			}
541 			else
542 			/* no clipping here */
543 			{
544 				temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff;
545 			}
546 		}
547 	}
548 	hcoordv |= temp2 << 0;
549 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
550 
551 	/* setup top and bottom edges of output window */
552 	vcoordv = 0;
553 	/* top edge coordinate of output window, must be inside desktop */
554 	/* clipping on the top side */
555 	if (ow->v_start < crtc_vstart)
556 	{
557 		temp1 = 0;
558 	}
559 	else
560 	{
561 		/* clipping on the bottom side */
562 		if (ow->v_start >= (crtc_vend - 1))
563 		{
564 			/* height < 2 is not allowed */
565 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
566 		}
567 		else
568 		/* no clipping here */
569 		{
570 			temp1 = (ow->v_start - crtc_vstart) & 0x7ff;
571 		}
572 	}
573 	vcoordv |= temp1 << 16;
574 	/* bottom edge coordinate of output window, must be inside desktop */
575 	/* height < 2 is not allowed */
576 	if (ow->height < 2)
577 	{
578 		temp2 = (temp1 + 1) & 0x7ff;
579 	}
580 	else
581 	{
582 		/* clipping on the bottom side */
583 		if ((ow->v_start + ow->height - 1) > (crtc_vend - 1))
584 		{
585 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
586 		}
587 		else
588 		{
589 			/* clipping on the top side */
590 			if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
591 			{
592 				/* height < 2 is not allowed */
593 				temp2 = 1;
594 			}
595 			else
596 			/* no clipping here */
597 			{
598 				temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff;
599 			}
600 		}
601 	}
602 	vcoordv |= temp2 << 0;
603 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
604 
605 
606 	/*********************************************
607 	 *** setup horizontal scaling and clipping ***
608 	 *********************************************/
609 
610 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
611 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
612 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
613 
614 	/* do horizontal scaling... */
615 	/* determine interval representation value, taking zoom into account */
616 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
617 	{
618 		/* horizontal filtering is ON */
619 		if ((my_ov.width == ow->width) | (ow->width < 2))
620 		{
621 			/* no horizontal scaling used, OR destination width < 2 */
622 			intrep = 0;
623 		}
624 		else
625 		{
626 			intrep = 1;
627 		}
628 	}
629 	else
630 	{
631 		/* horizontal filtering is OFF */
632 		if ((ow->width < my_ov.width) & (ow->width >= 2))
633 		{
634 			/* horizontal downscaling used AND destination width >= 2 */
635 			intrep = 1;
636 		}
637 		else
638 		{
639 			intrep = 0;
640 		}
641 	}
642 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
643 
644 	/* calculate inverse horizontal scaling factor, taking zoom into account */
645 	/* standard scaling formula: */
646 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
647 
648 	/* correct factor to prevent most-right visible 'line' from distorting */
649 	ifactor -= (1 << 2);
650 	hiscalv = ifactor;
651 	/* save for nv_bes_move_overlay() */
652 	si->overlay.h_ifactor = ifactor;
653 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
654 
655 	/* check scaling factor (and modify if needed) to be within scaling limits */
656 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
657 	if (hiscalv < 0x00002000)
658 	{
659 		/* (non-inverse) factor too large, set factor to max. valid value */
660 		hiscalv = 0x00002000;
661 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
662 	}
663 	switch (si->ps.card_arch)
664 	{
665 	case NV04A:
666 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
667 		 * (16bit register with 0.11 format value) */
668 		if (hiscalv > 0x0000ffff)
669 		{
670 			/* (non-inverse) factor too small, set factor to min. valid value */
671 			hiscalv = 0x0000ffff;
672 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
673 		}
674 		break;
675 	case NV30A:
676 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
677 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
678 		{
679 			/* (non-inverse) factor too small, set factor to min. valid value */
680 			hiscalv = (2 << 16);
681 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
682 		}
683 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
684 		 * So let it fall through... */
685 		if (si->ps.card_type != NV31) break;
686 	default:
687 		/* the rest has a downscaling limit of 0.125 */
688 		if (hiscalv > (8 << 16))
689 		{
690 			/* (non-inverse) factor too small, set factor to min. valid value */
691 			hiscalv = (8 << 16);
692 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
693 		}
694 		break;
695 	}
696 	/* AND below is required by hardware */
697 	hiscalv &= 0x001ffffc;
698 
699 
700 	/* do horizontal clipping... */
701 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
702 	/* Note:
703 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
704 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
705 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
706 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
707 	/* Note also:
708 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
709 	hsrcstv = 0;
710 	/* check for destination horizontal clipping at left side */
711 	if (ow->h_start < crtc_hstart)
712 	{
713 		/* check if entire destination picture is clipping left:
714 		 * (2 pixels will be clamped onscreen at least) */
715 		if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
716 		{
717 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
718 			hsrcstv += (ow->width - 2);
719 		}
720 		else
721 		{
722 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
723 			hsrcstv += (crtc_hstart - ow->h_start);
724 		}
725 		LOG(4,("Overlay: clipping left...\n"));
726 
727 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
728 		 * Note that this also already takes care of aligning the value to the BES register! */
729 		hsrcstv *= ifactor;
730 	}
731 	/* take zoom into account */
732 	hsrcstv += ((uint32)my_ov.h_start) << 16;
733 	/* AND below required by hardware */
734 	hsrcstv &= 0x03fffffc;
735 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
736 
737 
738 	/*******************************************
739 	 *** setup vertical scaling and clipping ***
740 	 *******************************************/
741 
742 	/* do vertical scaling... */
743 	/* determine interval representation value, taking zoom into account */
744 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
745 	{
746 		/* vertical filtering is ON */
747 		if ((my_ov.height == ow->height) | (ow->height < 2))
748 		{
749 			/* no vertical scaling used, OR destination height < 2 */
750 			intrep = 0;
751 		}
752 		else
753 		{
754 			intrep = 1;
755 		}
756 	}
757 	else
758 	{
759 		/* vertical filtering is OFF */
760 		if ((ow->height < my_ov.height) & (ow->height >= 2))
761 		{
762 			/* vertical downscaling used AND destination height >= 2 */
763 			intrep = 1;
764 		}
765 		else
766 		{
767 			intrep = 0;
768 		}
769 	}
770 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
771 
772 	/* calculate inverse vertical scaling factor, taking zoom into account */
773 	/* standard scaling formula: */
774 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
775 
776 	/* correct factor to prevent lowest visible line from distorting */
777 	ifactor -= (1 << 2);
778 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
779 
780 	/* preserve ifactor for source positioning calculations later on */
781 	viscalv = ifactor;
782 	/* save for nv_bes_move_overlay() */
783 	si->overlay.v_ifactor = ifactor;
784 
785 	/* check scaling factor (and modify if needed) to be within scaling limits */
786 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
787 	if (viscalv < 0x00002000)
788 	{
789 		/* (non-inverse) factor too large, set factor to max. valid value */
790 		viscalv = 0x00002000;
791 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
792 	}
793 	switch (si->ps.card_arch)
794 	{
795 	case NV04A:
796 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
797 		 * (16bit register with 0.11 format value) */
798 		if (viscalv > 0x0000ffff)
799 		{
800 			/* (non-inverse) factor too small, set factor to min. valid value */
801 			viscalv = 0x0000ffff;
802 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
803 		}
804 		break;
805 	case NV30A:
806 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
807 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
808 		{
809 			/* (non-inverse) factor too small, set factor to min. valid value */
810 			viscalv = (2 << 16);
811 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
812 		}
813 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
814 		 * So let it fall through... */
815 		if (si->ps.card_type != NV31) break;
816 	default:
817 		/* the rest has a downscaling limit of 0.125 */
818 		if (viscalv > (8 << 16))
819 		{
820 			/* (non-inverse) factor too small, set factor to min. valid value */
821 			viscalv = (8 << 16);
822 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
823 		}
824 		break;
825 	}
826 	/* AND below is required by hardware */
827 	viscalv &= 0x001ffffc;
828 
829 
830 	/* calculate inputbitmap origin adress */
831 	a1orgv = (uint32)((vuint32 *)ob->buffer);
832 	a1orgv -= (uint32)((vuint32 *)si->framebuffer);
833 
834 	/* do vertical clipping... */
835 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
836 	/* Note:
837 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
838 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
839 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
840 	/* Note also:
841 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
842 
843 	v1srcstv = 0;
844 	/* check for destination vertical clipping at top side */
845 	if (ow->v_start < crtc_vstart)
846 	{
847 		/* check if entire destination picture is clipping at top:
848 		 * (2 pixels will be clamped onscreen at least) */
849 		if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
850 		{
851 			/* increase 'number of clipping pixels' with 'fixed value':
852 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
853 			v1srcstv = (ow->height - 2) * ifactor;
854 			/* on pre-NV10 we need to do clipping in the source
855 			 * bitmap because no seperate clipping registers exist... */
856 			if (si->ps.card_arch < NV10A)
857 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
858 		}
859 		else
860 		{
861 			/* increase 'first contributing pixel' with:
862 			 * number of destination picture clipping pixels * inverse scaling factor */
863 			v1srcstv = (crtc_vstart - ow->v_start) * ifactor;
864 			/* on pre-NV10 we need to do clipping in the source
865 			 * bitmap because no seperate clipping registers exist... */
866 			if (si->ps.card_arch < NV10A)
867 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
868 		}
869 		LOG(4,("Overlay: clipping at top...\n"));
870 	}
871 	/* take zoom into account */
872 	v1srcstv += (((uint32)my_ov.v_start) << 16);
873 	if (si->ps.card_arch < NV10A)
874 	{
875 		a1orgv += (my_ov.v_start * ob->bytes_per_row);
876 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv));
877 	}
878 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536));
879 
880 	/* AND below is probably required by hardware. */
881 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
882 	a1orgv &= 0xfffffff0;
883 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv));
884 
885 
886 	/*****************************
887 	 *** log color keying info ***
888 	 *****************************/
889 
890 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
891 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
892 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
893 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
894 
895 
896 	/*****************
897 	 *** log flags ***
898 	 *****************/
899 
900 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
901 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
902 
903 
904 	/*************************************
905 	 *** sync to BES (Back End Scaler) ***
906 	 *************************************/
907 
908 	/* Done in card hardware:
909 	 * double buffered registers + trigger if programming complete feature. */
910 
911 
912 	/**************************************
913 	 *** actually program the registers ***
914 	 **************************************/
915 
916 	if (si->ps.card_arch < NV10A)
917 	{
918 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
919 		BESW(NV04_OE_STATE, 0x00000000);
920 		/* select buffer 0 as active (b16) */
921 		BESW(NV04_SU_STATE, 0x00000000);
922 		/* unknown (no effect?) */
923 		BESW(NV04_RM_STATE, 0x00000000);
924 		/* setup clipped(!) buffer startadress in RAM */
925 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
926 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
927 		/* (program both buffers to prevent sync distortions) */
928 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
929 		a1orgv += ((hsrcstv >> 16) * 2);
930 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
931 		BESW(NV04_0BUFADR, (a1orgv & ~0x03));
932 		BESW(NV04_1BUFADR, (a1orgv & ~0x03));
933 		/* setup buffer source pitch including slopspace (in bytes).
934 		 * Note:
935 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
936 		/* (program both buffers to prevent sync distortions) */
937 		BESW(NV04_0SRCPTCH, (ob->width * 2));
938 		BESW(NV04_1SRCPTCH, (ob->width * 2));
939 		/* setup output window position */
940 		BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
941 		/* setup output window size */
942 		BESW(NV04_DSTSIZE, (
943 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
944 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
945 			));
946 		/* setup horizontal and vertical scaling */
947 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
948 		/* enable vertical filtering (b0) */
949 		BESW(NV04_CTRL_V, 0x00000001);
950 		/* enable horizontal filtering (no effect?) */
951 		BESW(NV04_CTRL_H, 0x00000111);
952 
953 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
954 		BESW(NV04_GENCTRL, 0x00000111);
955 		/* select buffer 1 as active (b16) */
956 		BESW(NV04_SU_STATE, 0x00010000);
957 
958 		/**************************
959 		 *** setup color keying ***
960 		 **************************/
961 
962 		/* setup colorkeying */
963 		switch(si->dm.space)
964 		{
965 		case B_RGB15_LITTLE:
966 			BESW(NV04_COLKEY, (
967 				((ow->blue.value & ow->blue.mask) << 0)   |
968 				((ow->green.value & ow->green.mask) << 5) |
969 				((ow->red.value & ow->red.mask) << 10)    |
970 				((ow->alpha.value & ow->alpha.mask) << 15)
971 				));
972 			break;
973 		case B_RGB16_LITTLE:
974 			BESW(NV04_COLKEY, (
975 				((ow->blue.value & ow->blue.mask) << 0)   |
976 				((ow->green.value & ow->green.mask) << 5) |
977 				((ow->red.value & ow->red.mask) << 11)
978 				/* this space has no alpha bits */
979 				));
980 			break;
981 		case B_CMAP8:
982 		case B_RGB32_LITTLE:
983 		default:
984 			BESW(NV04_COLKEY, (
985 				((ow->blue.value & ow->blue.mask) << 0)   |
986 				((ow->green.value & ow->green.mask) << 8) |
987 				((ow->red.value & ow->red.mask) << 16)    |
988 				((ow->alpha.value & ow->alpha.mask) << 24)
989 				));
990 			break;
991 		}
992 	}
993 	else
994 	{
995 		/* >= NV10A */
996 
997 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
998 		BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff));
999 		/* setup buffersize */
1000 		//fixme if needed: width must be even officially...
1001 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
1002 		/* setup source pitch including slopspace (in bytes),
1003 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
1004 		/* Note:
1005 		 * source pitch granularity = 32 pixels on GeForce cards!! */
1006 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
1007 		/* setup output window position */
1008 		BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
1009 		/* setup output window size */
1010 		BESW(NV10_0DSTSIZE, (
1011 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
1012 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
1013 			));
1014 		/* setup horizontal scaling */
1015 		BESW(NV10_0ISCALH, (hiscalv << 4));
1016 		/* setup vertical scaling */
1017 		BESW(NV10_0ISCALV, (viscalv << 4));
1018 		/* setup (unclipped!) buffer startadress in RAM */
1019 		BESW(NV10_0BUFADR, a1orgv);
1020 		/* enable BES (b0 = 0) */
1021 		BESW(NV10_GENCTRL, 0x00000000);
1022 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
1023 		/* This also triggers activation of programmed values (double buffered registers feature) */
1024 		BESW(NV10_BUFSEL, 0x00000001);
1025 
1026 		/**************************
1027 		 *** setup color keying ***
1028 		 **************************/
1029 
1030 		/* setup colorkeying */
1031 		switch(si->dm.space)
1032 		{
1033 		case B_RGB15_LITTLE:
1034 			BESW(NV10_COLKEY, (
1035 				((ow->blue.value & ow->blue.mask) << 0)   |
1036 				((ow->green.value & ow->green.mask) << 5) |
1037 				((ow->red.value & ow->red.mask) << 10)    |
1038 				((ow->alpha.value & ow->alpha.mask) << 15)
1039 				));
1040 			break;
1041 		case B_RGB16_LITTLE:
1042 			BESW(NV10_COLKEY, (
1043 				((ow->blue.value & ow->blue.mask) << 0)   |
1044 				((ow->green.value & ow->green.mask) << 5) |
1045 				((ow->red.value & ow->red.mask) << 11)
1046 				/* this space has no alpha bits */
1047 				));
1048 			break;
1049 		case B_CMAP8:
1050 		case B_RGB32_LITTLE:
1051 		default:
1052 			BESW(NV10_COLKEY, (
1053 				((ow->blue.value & ow->blue.mask) << 0)   |
1054 				((ow->green.value & ow->green.mask) << 8) |
1055 				((ow->red.value & ow->red.mask) << 16)    |
1056 				((ow->alpha.value & ow->alpha.mask) << 24)
1057 				));
1058 			break;
1059 		}
1060 	}
1061 
1062 	/* note that overlay is in use (for nv_bes_move_overlay()) */
1063 	si->overlay.active = true;
1064 
1065 	return B_OK;
1066 }
1067 
1068 status_t nv_release_bes()
1069 {
1070 	if (si->ps.card_arch < NV10A)
1071 	{
1072 		/* setup BES control: disable scaler (b0 = 0) */
1073 		BESW(NV04_GENCTRL, 0x00000000);
1074 	}
1075 	else
1076 	{
1077 		/* setup BES control: disable scaler (b0 = 1) */
1078 		BESW(NV10_GENCTRL, 0x00000001);
1079 	}
1080 
1081 	/* note that overlay is not in use (for nv_bes_move_overlay()) */
1082 	si->overlay.active = false;
1083 
1084 	return B_OK;
1085 }
1086