xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision dc234e798d854e75a197ee9fc85c6a1eede67fef)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-5/2009 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "nv_std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16 	uintptr_t a1orgv;		/* alternate source clipping via startadress of source buffer */
17 };
18 
19 static void nv_bes_calc_move_overlay(move_overlay_info *moi);
20 static void nv_bes_program_move_overlay(move_overlay_info moi);
21 
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24  * si->dm.h_display_start and si->dm.v_display_start determine where the new
25  * output window is located! */
nv_bes_move_overlay()26 void nv_bes_move_overlay()
27 {
28 	move_overlay_info moi;
29 
30 	/* abort if overlay is not active */
31 	if (!si->overlay.active) return;
32 
33 	nv_bes_calc_move_overlay(&moi);
34 	nv_bes_program_move_overlay(moi);
35 }
36 
nv_bes_calc_move_overlay(move_overlay_info * moi)37 static void nv_bes_calc_move_overlay(move_overlay_info *moi)
38 {
39 	/* misc used variables */
40 	uint16 temp1, temp2;
41 	/* visible screen window in virtual workspaces */
42 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43 
44 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45 	if (si->ps.secondary_head)
46 	{
47 		switch (si->dm.flags & DUALHEAD_BITS)
48 		{
49 		case DUALHEAD_ON:
50 		case DUALHEAD_SWITCH:
51 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 					(si->dm.h_display_start + si->dm.timing.h_display))
53 				nv_bes_to_crtc(si->crtc_switch_mode);
54 			else
55 				nv_bes_to_crtc(!si->crtc_switch_mode);
56 			break;
57 		default:
58 				nv_bes_to_crtc(si->crtc_switch_mode);
59 			break;
60 		}
61 	}
62 
63 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64 	 * constraints only */
65 	crtc_hstart = si->dm.h_display_start;
66 	/* make dualhead stretch and switch mode work while we're at it.. */
67 	if (si->overlay.crtc)
68 	{
69 		crtc_hstart += si->dm.timing.h_display;
70 	}
71 
72 	/* horizontal end is the first position beyond the displayed range on the CRTC */
73 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 	crtc_vstart = si->dm.v_display_start;
75 	/* vertical end is the first position beyond the displayed range on the CRTC */
76 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77 
78 
79 	/****************************************
80 	 *** setup all edges of output window ***
81 	 ****************************************/
82 
83 	/* setup left and right edges of output window */
84 	moi->hcoordv = 0;
85 	/* left edge coordinate of output window, must be inside desktop */
86 	/* clipping on the left side */
87 	if (si->overlay.ow.h_start < crtc_hstart)
88 	{
89 		temp1 = 0;
90 	}
91 	else
92 	{
93 		/* clipping on the right side */
94 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95 		{
96 			/* width < 2 is not allowed */
97 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98 		}
99 		else
100 		/* no clipping here */
101 		{
102 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103 		}
104 	}
105 	moi->hcoordv |= temp1 << 16;
106 	/* right edge coordinate of output window, must be inside desktop */
107 	/* width < 2 is not allowed */
108 	if (si->overlay.ow.width < 2)
109 	{
110 		temp2 = (temp1 + 1) & 0x7ff;
111 	}
112 	else
113 	{
114 		/* clipping on the right side */
115 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116 		{
117 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118 		}
119 		else
120 		{
121 			/* clipping on the left side */
122 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123 			{
124 				/* width < 2 is not allowed */
125 				temp2 = 1;
126 			}
127 			else
128 			/* no clipping here */
129 			{
130 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131 			}
132 		}
133 	}
134 	moi->hcoordv |= temp2 << 0;
135 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136 
137 	/* setup top and bottom edges of output window */
138 	moi->vcoordv = 0;
139 	/* top edge coordinate of output window, must be inside desktop */
140 	/* clipping on the top side */
141 	if (si->overlay.ow.v_start < crtc_vstart)
142 	{
143 		temp1 = 0;
144 	}
145 	else
146 	{
147 		/* clipping on the bottom side */
148 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149 		{
150 			/* height < 2 is not allowed */
151 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152 		}
153 		else
154 		/* no clipping here */
155 		{
156 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157 		}
158 	}
159 	moi->vcoordv |= temp1 << 16;
160 	/* bottom edge coordinate of output window, must be inside desktop */
161 	/* height < 2 is not allowed */
162 	if (si->overlay.ow.height < 2)
163 	{
164 		temp2 = (temp1 + 1) & 0x7ff;
165 	}
166 	else
167 	{
168 		/* clipping on the bottom side */
169 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170 		{
171 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172 		}
173 		else
174 		{
175 			/* clipping on the top side */
176 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177 			{
178 				/* height < 2 is not allowed */
179 				temp2 = 1;
180 			}
181 			else
182 			/* no clipping here */
183 			{
184 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185 			}
186 		}
187 	}
188 	moi->vcoordv |= temp2 << 0;
189 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190 
191 
192 	/*********************************
193 	 *** setup horizontal clipping ***
194 	 *********************************/
195 
196 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 	/* Note:
198 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 	/* Note also:
203 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 	moi->hsrcstv = 0;
205 	/* check for destination horizontal clipping at left side */
206 	if (si->overlay.ow.h_start < crtc_hstart)
207 	{
208 		/* check if entire destination picture is clipping left:
209 		 * (2 pixels will be clamped onscreen at least) */
210 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211 		{
212 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 			moi->hsrcstv += (si->overlay.ow.width - 2);
214 		}
215 		else
216 		{
217 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219 		}
220 		LOG(4,("Overlay: clipping left...\n"));
221 
222 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 		 * Note that this also already takes care of aligning the value to the BES register! */
224 		moi->hsrcstv *= si->overlay.h_ifactor;
225 	}
226 	/* take zoom into account */
227 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 	/* AND below required by hardware (> 1024 support confirmed on all cards) */
229 	moi->hsrcstv &= 0x07fffffc;
230 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231 
232 
233 	/*******************************
234 	 *** setup vertical clipping ***
235 	 *******************************/
236 
237 	/* calculate inputbitmap origin adress */
238 	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
239 	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
240 	LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241 
242 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
243 	/* Note:
244 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
245 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247 	/* Note also:
248 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249 
250 	moi->v1srcstv = 0;
251 	/* check for destination vertical clipping at top side */
252 	if (si->overlay.ow.v_start < crtc_vstart)
253 	{
254 		/* check if entire destination picture is clipping at top:
255 		 * (2 pixels will be clamped onscreen at least) */
256 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257 		{
258 			/* increase 'number of clipping pixels' with 'fixed value':
259 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261 			/* on pre-NV10 we need to do clipping in the source
262 			 * bitmap because no seperate clipping registers exist... */
263 			if (si->ps.card_arch < NV10A)
264 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265 		}
266 		else
267 		{
268 			/* increase 'first contributing pixel' with:
269 			 * number of destination picture clipping pixels * inverse scaling factor */
270 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271 			/* on pre-NV10 we need to do clipping in the source
272 			 * bitmap because no seperate clipping registers exist... */
273 			if (si->ps.card_arch < NV10A)
274 				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275 		}
276 		LOG(4,("Overlay: clipping at top...\n"));
277 	}
278 	/* take zoom into account */
279 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280 	if (si->ps.card_arch < NV10A)
281 	{
282 		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284 	}
285 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286 
287 	/* AND below is probably required by hardware. */
288 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289 	moi->a1orgv &= 0xfffffff0;
290 }
291 
nv_bes_program_move_overlay(move_overlay_info moi)292 static void nv_bes_program_move_overlay(move_overlay_info moi)
293 {
294 	/*************************************
295 	 *** sync to BES (Back End Scaler) ***
296 	 *************************************/
297 
298 	/* Done in card hardware:
299 	 * double buffered registers + trigger if programming complete feature. */
300 
301 
302 	/**************************************
303 	 *** actually program the registers ***
304 	 **************************************/
305 
306 	if (si->ps.card_arch < NV10A)
307 	{
308 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309 		BESW(NV04_OE_STATE, 0x00000000);
310 		/* select buffer 0 as active (b16) */
311 		BESW(NV04_SU_STATE, 0x00000000);
312 		/* unknown (no effect?) */
313 		BESW(NV04_RM_STATE, 0x00000000);
314 		/* setup clipped(!) buffer startadress in RAM */
315 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 		/* (program both buffers to prevent sync distortions) */
318 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
319 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 		/* setup output window position */
324 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 		/* setup output window size */
326 		BESW(NV04_DSTSIZE, (
327 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329 			));
330 		/* select buffer 1 as active (b16) */
331 		BESW(NV04_SU_STATE, 0x00010000);
332 	}
333 	else
334 	{
335 		/* >= NV10A */
336 
337 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 		/* setup output window position */
340 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 		/* setup output window size */
342 		BESW(NV10_0DSTSIZE, (
343 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345 			));
346 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 		/* This also triggers activation of programmed values (double buffered registers feature) */
348 		BESW(NV10_BUFSEL, 0x00000001);
349 	}
350 }
351 
nv_bes_to_crtc(bool crtc)352 status_t nv_bes_to_crtc(bool crtc)
353 {
354 	if (si->ps.secondary_head)
355 	{
356 		if (crtc)
357 		{
358 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 			/* switch overlay engine to CRTC2 */
360 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362 			si->overlay.crtc = !si->crtc_switch_mode;
363 		}
364 		else
365 		{
366 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 			/* switch overlay engine to CRTC1 */
368 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370 			si->overlay.crtc = si->crtc_switch_mode;
371 		}
372 		return B_OK;
373 	}
374 	else
375 	{
376 		return B_ERROR;
377 	}
378 }
379 
nv_bes_init()380 status_t nv_bes_init()
381 {
382 	if (si->ps.card_arch < NV10A)
383 	{
384 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 		BESW(NV04_INTE, 0x00000000);
386 
387 		/* setup saturation to be 'neutral' */
388 		BESW(NV04_SAT, 0x00000000);
389 		/* setup RGB brightness to be 'neutral' */
390 		BESW(NV04_RED_AMP, 0x00000069);
391 		BESW(NV04_GRN_AMP, 0x0000003e);
392 		BESW(NV04_BLU_AMP, 0x00000089);
393 
394 		/* setup fifo for fetching data */
395 		BESW(NV04_FIFOBURL, 0x00000003);
396 		BESW(NV04_FIFOTHRS, 0x00000038);
397 
398 		/* unknown, but needed (registers only have b0 implemented) */
399 		/* (program both buffers to prevent sync distortions) */
400 		BESW(NV04_0OFFSET, 0x00000000);
401 		BESW(NV04_1OFFSET, 0x00000000);
402 	}
403 	else
404 	{
405 		/* >= NV10A */
406 
407 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 		BESW(NV10_INTE, 0x00000000);
409 		/* shut off GeForce4MX MPEG2 decoder */
410 		BESW(DEC_GENCTRL, 0x00000000);
411 		/* setup BES memory-range mask */
412 		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 		/* unknown, but needed */
414 		BESW(NV10_0OFFSET, 0x00000000);
415 
416 		/* setup brightness, contrast and saturation to be 'neutral' */
417 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419 	}
420 
421 	/* make sure the engine is disabled. */
422 	nv_release_bes();
423 
424 	return B_OK;
425 }
426 
nv_configure_bes(const overlay_buffer * ob,const overlay_window * ow,const overlay_view * ov,int offset)427 status_t nv_configure_bes
428 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
429 {
430 	/* yuy2 (4:2:2) colorspace calculations */
431 
432 	/* Note:
433 	 * in BeOS R5.0.3 and DANO:
434 	 * 'ow->offset_xxx' is always 0, so not used;
435 	 * 'ow->width' and 'ow->height' are the output window size: does not change
436 	 * if window is clipping;
437 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
438 	 * window. These values can be negative: this means the window is clipping
439 	 * at the left or the top of the display, respectively. */
440 
441 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
442 	 * displayed on screen. This is used for the 'hardware zoom' function. */
443 
444 	/* output window position and clipping info for source buffer */
445 	move_overlay_info moi;
446 	/* calculated BES register values */
447 	uint32 	hiscalv, viscalv;
448 	/* interval representation, used for scaling calculations */
449 	uint16 intrep;
450 	/* inverse scaling factor, used for source positioning */
451 	uint32 ifactor;
452 	/* copy of overlay view which has checked valid values */
453 	overlay_view my_ov;
454 
455 
456 	/**************************************************************************************
457 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
458 	 **************************************************************************************/
459 	my_ov = *ov;
460 	/* check for valid 'coordinates' */
461 	if (my_ov.width == 0) my_ov.width++;
462 	if (my_ov.height == 0) my_ov.height++;
463 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
464 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
465 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
466 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
467 	if (my_ov.v_start > (ob->height - 1))
468 		my_ov.v_start = (ob->height - 1);
469 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
470 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
471 
472 	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
473 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
474 
475 	/* save for nv_bes_calc_move_overlay() */
476 	si->overlay.ow = *ow;
477 	si->overlay.ob = *ob;
478 	si->overlay.my_ov = my_ov;
479 
480 
481 	/********************************
482 	 *** setup horizontal scaling ***
483 	 ********************************/
484 	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
485 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
486 	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
487 
488 	/* determine interval representation value, taking zoom into account */
489 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
490 	{
491 		/* horizontal filtering is ON */
492 		if ((my_ov.width == ow->width) | (ow->width < 2))
493 		{
494 			/* no horizontal scaling used, OR destination width < 2 */
495 			intrep = 0;
496 		}
497 		else
498 		{
499 			intrep = 1;
500 		}
501 	}
502 	else
503 	{
504 		/* horizontal filtering is OFF */
505 		if ((ow->width < my_ov.width) & (ow->width >= 2))
506 		{
507 			/* horizontal downscaling used AND destination width >= 2 */
508 			intrep = 1;
509 		}
510 		else
511 		{
512 			intrep = 0;
513 		}
514 	}
515 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
516 
517 	/* calculate inverse horizontal scaling factor, taking zoom into account */
518 	/* standard scaling formula: */
519 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
520 
521 	/* correct factor to prevent most-right visible 'line' from distorting */
522 	ifactor -= (1 << 2);
523 	hiscalv = ifactor;
524 	/* save for nv_bes_calc_move_overlay() */
525 	si->overlay.h_ifactor = ifactor;
526 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
527 
528 	/* check scaling factor (and modify if needed) to be within scaling limits */
529 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
530 	if (hiscalv < 0x00002000)
531 	{
532 		/* (non-inverse) factor too large, set factor to max. valid value */
533 		hiscalv = 0x00002000;
534 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
535 	}
536 	switch (si->ps.card_arch)
537 	{
538 	case NV04A:
539 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
540 		 * (16bit register with 0.11 format value) */
541 		if (hiscalv > 0x0000ffff)
542 		{
543 			/* (non-inverse) factor too small, set factor to min. valid value */
544 			hiscalv = 0x0000ffff;
545 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
546 		}
547 		break;
548 	case NV30A:
549 	case NV40A:
550 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
551 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
552 		{
553 			/* (non-inverse) factor too small, set factor to min. valid value */
554 			hiscalv = (2 << 16);
555 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
556 		}
557 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
558 		 * So let it fall through... */
559 		if (si->ps.card_type != NV31) break;
560 	default:
561 		/* the rest has a downscaling limit of 0.125 */
562 		if (hiscalv > (8 << 16))
563 		{
564 			/* (non-inverse) factor too small, set factor to min. valid value */
565 			hiscalv = (8 << 16);
566 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
567 		}
568 		break;
569 	}
570 	/* AND below is required by hardware */
571 	hiscalv &= 0x001ffffc;
572 
573 
574 	/******************************
575 	 *** setup vertical scaling ***
576 	 ******************************/
577 
578 	/* determine interval representation value, taking zoom into account */
579 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
580 	{
581 		/* vertical filtering is ON */
582 		if ((my_ov.height == ow->height) | (ow->height < 2))
583 		{
584 			/* no vertical scaling used, OR destination height < 2 */
585 			intrep = 0;
586 		}
587 		else
588 		{
589 			intrep = 1;
590 		}
591 	}
592 	else
593 	{
594 		/* vertical filtering is OFF */
595 		if ((ow->height < my_ov.height) & (ow->height >= 2))
596 		{
597 			/* vertical downscaling used AND destination height >= 2 */
598 			intrep = 1;
599 		}
600 		else
601 		{
602 			intrep = 0;
603 		}
604 	}
605 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
606 
607 	/* calculate inverse vertical scaling factor, taking zoom into account */
608 	/* standard scaling formula: */
609 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
610 
611 	/* correct factor to prevent lowest visible line from distorting */
612 	ifactor -= (1 << 2);
613 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
614 
615 	/* preserve ifactor for source positioning calculations later on */
616 	viscalv = ifactor;
617 	/* save for nv_bes_calc_move_overlay() */
618 	si->overlay.v_ifactor = ifactor;
619 
620 	/* check scaling factor (and modify if needed) to be within scaling limits */
621 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
622 	if (viscalv < 0x00002000)
623 	{
624 		/* (non-inverse) factor too large, set factor to max. valid value */
625 		viscalv = 0x00002000;
626 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
627 	}
628 	switch (si->ps.card_arch)
629 	{
630 	case NV04A:
631 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
632 		 * (16bit register with 0.11 format value) */
633 		if (viscalv > 0x0000ffff)
634 		{
635 			/* (non-inverse) factor too small, set factor to min. valid value */
636 			viscalv = 0x0000ffff;
637 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
638 		}
639 		break;
640 	case NV30A:
641 	case NV40A:
642 		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
643 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
644 		{
645 			/* (non-inverse) factor too small, set factor to min. valid value */
646 			viscalv = (2 << 16);
647 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
648 		}
649 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
650 		 * So let it fall through... */
651 		if (si->ps.card_type != NV31) break;
652 	default:
653 		/* the rest has a downscaling limit of 0.125 */
654 		if (viscalv > (8 << 16))
655 		{
656 			/* (non-inverse) factor too small, set factor to min. valid value */
657 			viscalv = (8 << 16);
658 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
659 		}
660 		break;
661 	}
662 	/* AND below is required by hardware */
663 	viscalv &= 0x001ffffc;
664 
665 
666 	/********************************************************************************
667 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
668 	 ********************************************************************************/
669 	nv_bes_calc_move_overlay(&moi);
670 
671 
672 	/*****************************
673 	 *** log color keying info ***
674 	 *****************************/
675 
676 	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
677 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
678 	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
679 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
680 
681 
682 	/*****************
683 	 *** log flags ***
684 	 *****************/
685 
686 	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
687 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
688 
689 
690 	/*************************************
691 	 *** sync to BES (Back End Scaler) ***
692 	 *************************************/
693 
694 	/* Done in card hardware:
695 	 * double buffered registers + trigger if programming complete feature. */
696 
697 
698 	/**************************************
699 	 *** actually program the registers ***
700 	 **************************************/
701 
702 	if (si->ps.card_arch < NV10A)
703 	{
704 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
705 		BESW(NV04_OE_STATE, 0x00000000);
706 		/* select buffer 0 as active (b16) */
707 		BESW(NV04_SU_STATE, 0x00000000);
708 		/* unknown (no effect?) */
709 		BESW(NV04_RM_STATE, 0x00000000);
710 		/* setup clipped(!) buffer startadress in RAM */
711 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
712 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
713 		/* (program both buffers to prevent sync distortions) */
714 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
715 		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
716 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
717 		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
718 		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
719 		/* setup buffer source pitch including slopspace (in bytes).
720 		 * Note:
721 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
722 		/* (program both buffers to prevent sync distortions) */
723 		BESW(NV04_0SRCPTCH, (ob->width * 2));
724 		BESW(NV04_1SRCPTCH, (ob->width * 2));
725 		/* setup output window position */
726 		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
727 		/* setup output window size */
728 		BESW(NV04_DSTSIZE, (
729 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
730 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
731 			));
732 		/* setup horizontal and vertical scaling */
733 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
734 		/* enable vertical filtering (b0) */
735 		BESW(NV04_CTRL_V, 0x00000001);
736 		/* enable horizontal filtering (no effect?) */
737 		BESW(NV04_CTRL_H, 0x00000111);
738 		/* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */
739 		if (ow->flags & B_OVERLAY_COLOR_KEY)
740 			BESW(NV04_GENCTRL, 0x00000111);
741 		else
742 			BESW(NV04_GENCTRL, 0x00000101);
743 		/* select buffer 1 as active (b16) */
744 		BESW(NV04_SU_STATE, 0x00010000);
745 
746 		/**************************
747 		 *** setup color keying ***
748 		 **************************/
749 
750 		/* setup colorkeying */
751 		switch(si->dm.space)
752 		{
753 		case B_RGB15_LITTLE:
754 			BESW(NV04_COLKEY, (
755 				((ow->blue.value & ow->blue.mask) << 0)   |
756 				((ow->green.value & ow->green.mask) << 5) |
757 				((ow->red.value & ow->red.mask) << 10)    |
758 				((ow->alpha.value & ow->alpha.mask) << 15)
759 				));
760 			break;
761 		case B_RGB16_LITTLE:
762 			BESW(NV04_COLKEY, (
763 				((ow->blue.value & ow->blue.mask) << 0)   |
764 				((ow->green.value & ow->green.mask) << 5) |
765 				((ow->red.value & ow->red.mask) << 11)
766 				/* this space has no alpha bits */
767 				));
768 			break;
769 		case B_CMAP8:
770 		case B_RGB32_LITTLE:
771 		default:
772 			BESW(NV04_COLKEY, (
773 				((ow->blue.value & ow->blue.mask) << 0)   |
774 				((ow->green.value & ow->green.mask) << 8) |
775 				((ow->red.value & ow->red.mask) << 16)    |
776 				((ow->alpha.value & ow->alpha.mask) << 24)
777 				));
778 			break;
779 		}
780 	}
781 	else
782 	{
783 		/* >= NV10A */
784 
785 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
786 		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
787 		/* setup buffersize */
788 		//fixme if needed: width must be even officially...
789 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
790 		/* setup source pitch including slopspace (in bytes),
791 		 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */
792 		/* Note:
793 		 * source pitch granularity = 32 pixels on GeForce cards!! */
794 		if (ow->flags & B_OVERLAY_COLOR_KEY)
795 			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
796 		else
797 			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24)));
798 		/* setup output window position */
799 		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
800 		/* setup output window size */
801 		BESW(NV10_0DSTSIZE, (
802 			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
803 			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
804 			));
805 		/* setup horizontal scaling */
806 		BESW(NV10_0ISCALH, (hiscalv << 4));
807 		/* setup vertical scaling */
808 		BESW(NV10_0ISCALV, (viscalv << 4));
809 		/* setup (unclipped!) buffer startadress in RAM */
810 		BESW(NV10_0BUFADR, moi.a1orgv);
811 		/* enable BES (b0 = 0) */
812 		BESW(NV10_GENCTRL, 0x00000000);
813 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
814 		/* This also triggers activation of programmed values (double buffered registers feature) */
815 		BESW(NV10_BUFSEL, 0x00000001);
816 
817 		/**************************
818 		 *** setup color keying ***
819 		 **************************/
820 
821 		/* setup colorkeying */
822 		switch(si->dm.space)
823 		{
824 		case B_RGB15_LITTLE:
825 			BESW(NV10_COLKEY, (
826 				((ow->blue.value & ow->blue.mask) << 0)   |
827 				((ow->green.value & ow->green.mask) << 5) |
828 				((ow->red.value & ow->red.mask) << 10)    |
829 				((ow->alpha.value & ow->alpha.mask) << 15)
830 				));
831 			break;
832 		case B_RGB16_LITTLE:
833 			BESW(NV10_COLKEY, (
834 				((ow->blue.value & ow->blue.mask) << 0)   |
835 				((ow->green.value & ow->green.mask) << 5) |
836 				((ow->red.value & ow->red.mask) << 11)
837 				/* this space has no alpha bits */
838 				));
839 			break;
840 		case B_CMAP8:
841 		case B_RGB32_LITTLE:
842 		default:
843 			BESW(NV10_COLKEY, (
844 				((ow->blue.value & ow->blue.mask) << 0)   |
845 				((ow->green.value & ow->green.mask) << 8) |
846 				((ow->red.value & ow->red.mask) << 16)    |
847 				((ow->alpha.value & ow->alpha.mask) << 24)
848 				));
849 			break;
850 		}
851 	}
852 
853 	/* note that overlay is in use (for nv_bes_move_overlay()) */
854 	si->overlay.active = true;
855 
856 	return B_OK;
857 }
858 
nv_release_bes()859 status_t nv_release_bes()
860 {
861 	if (si->ps.card_arch < NV10A)
862 	{
863 		/* setup BES control: disable scaler (b0 = 0) */
864 		BESW(NV04_GENCTRL, 0x00000000);
865 	}
866 	else
867 	{
868 		/* setup BES control: disable scaler (b0 = 1) */
869 		BESW(NV10_GENCTRL, 0x00000001);
870 	}
871 
872 	/* note that overlay is not in use (for nv_bes_move_overlay()) */
873 	si->overlay.active = false;
874 
875 	return B_OK;
876 }
877