xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision 5a84c3b178b092b0c4dc61f1e946639c2783d80a)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-2/2004 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "nv_std.h"
7 
8 //fixme: implement: (used for virtual screens!)
9 //void move_overlay(uint16 hdisp_start, uint16 vdisp_start);
10 
11 status_t nv_bes_to_crtc(uint8 crtc)
12 {
13 	if (si->ps.secondary_head)
14 	{
15 		if (crtc)
16 		{
17 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
18 			/* switch overlay engine to CRTC2 */
19 			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
20 			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
21 			si->overlay.crtc = 1;
22 		}
23 		else
24 		{
25 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
26 			/* switch overlay engine to CRTC1 */
27 			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
28 			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
29 			si->overlay.crtc = 0;
30 		}
31 		return B_OK;
32 	}
33 	else
34 	{
35 		return B_ERROR;
36 	}
37 }
38 
39 status_t nv_bes_init()
40 {
41 	if (si->ps.card_arch < NV10A)
42 	{
43 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
44 		BESW(NV04_INTE, 0x00000000);
45 
46 		/* setup saturation to be 'neutral' */
47 		BESW(NV04_SAT, 0x00000000);
48 		/* setup RGB brightness to be 'neutral' */
49 		BESW(NV04_RED_AMP, 0x00000069);
50 		BESW(NV04_GRN_AMP, 0x0000003e);
51 		BESW(NV04_BLU_AMP, 0x00000089);
52 
53 		/* setup fifo for fetching data */
54 		BESW(NV04_FIFOBURL, 0x00000003);
55 		BESW(NV04_FIFOTHRS, 0x00000038);
56 
57 		/* unknown, but needed (registers only have b0 implemented) */
58 		/* (program both buffers to prevent sync distortions) */
59 		BESW(NV04_0OFFSET, 0x00000000);
60 		BESW(NV04_1OFFSET, 0x00000000);
61 	}
62 	else
63 	{
64 		/* >= NV10A */
65 
66 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
67 		BESW(NV10_INTE, 0x00000000);
68 		/* shut off GeForce4MX MPEG2 decoder */
69 		BESW(DEC_GENCTRL, 0x00000000);
70 		/* setup BES memory-range mask */
71 		BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1));
72 		/* unknown, but needed */
73 		BESW(NV10_0OFFSET, 0x00000000);
74 
75 		/* setup brightness, contrast and saturation to be 'neutral' */
76 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
77 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
78 	}
79 
80 	return B_OK;
81 }
82 
83 status_t nv_configure_bes
84 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
85 {
86 	/* yuy2 (4:2:2) colorspace calculations */
87 
88 	/* Note:
89 	 * in BeOS R5.0.3 and DANO:
90 	 * 'ow->offset_xxx' is always 0, so not used;
91 	 * 'ow->width' and 'ow->height' are the output window size: does not change
92 	 * if window is clipping;
93 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
94 	 * window. These values can be negative: this means the window is clipping
95 	 * at the left or the top of the display, respectively. */
96 
97 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
98 	 * displayed on screen. This is used for the 'hardware zoom' function. */
99 
100 	/* calculated BES register values */
101 	uint32 	hcoordv, vcoordv, hiscalv, hsrcstv,	viscalv, a1orgv, v1srcstv;
102 	/* misc used variables */
103 	uint16 temp1, temp2;
104 	/* interval representation, used for scaling calculations */
105 	uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
106 	/* inverse scaling factor, used for source positioning */
107 	uint32 ifactor;
108 	/* copy of overlay view which has checked valid values */
109 	overlay_view my_ov;
110 
111 
112 	/**************************************************************************************
113 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
114 	 **************************************************************************************/
115 	my_ov = *ov;
116 	/* check for valid 'coordinates' */
117 	if (my_ov.width == 0) my_ov.width++;
118 	if (my_ov.height == 0) my_ov.height++;
119 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
120 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
121 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
122 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
123 	if (my_ov.v_start > (ob->height - 1))
124 		my_ov.v_start = (ob->height - 1);
125 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
126 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
127 
128 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
129 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
130 
131 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
132 	 * constraints only */
133 	crtc_hstart = si->dm.h_display_start;
134 	/* make dualhead stretch and switch mode work while we're at it.. */
135 	if (si->overlay.crtc)
136 	{
137 		crtc_hstart += si->dm.timing.h_display;
138 	}
139 
140 	/* horizontal end is the first position beyond the displayed range on the CRTC */
141 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
142 	crtc_vstart = si->dm.v_display_start;
143 	/* vertical end is the first position beyond the displayed range on the CRTC */
144 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
145 
146 
147 	/****************************************
148 	 *** setup all edges of output window ***
149 	 ****************************************/
150 
151 	/* setup left and right edges of output window */
152 	hcoordv = 0;
153 	/* left edge coordinate of output window, must be inside desktop */
154 	/* clipping on the left side */
155 	if (ow->h_start < crtc_hstart)
156 	{
157 		temp1 = 0;
158 	}
159 	else
160 	{
161 		/* clipping on the right side */
162 		if (ow->h_start >= (crtc_hend - 1))
163 		{
164 			/* width < 2 is not allowed */
165 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
166 		}
167 		else
168 		/* no clipping here */
169 		{
170 			temp1 = (ow->h_start - crtc_hstart) & 0x7ff;
171 		}
172 	}
173 	hcoordv |= temp1 << 16;
174 	/* right edge coordinate of output window, must be inside desktop */
175 	/* width < 2 is not allowed */
176 	if (ow->width < 2)
177 	{
178 		temp2 = (temp1 + 1) & 0x7ff;
179 	}
180 	else
181 	{
182 		/* clipping on the right side */
183 		if ((ow->h_start + ow->width - 1) > (crtc_hend - 1))
184 		{
185 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
186 		}
187 		else
188 		{
189 			/* clipping on the left side */
190 			if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
191 			{
192 				/* width < 2 is not allowed */
193 				temp2 = 1;
194 			}
195 			else
196 			/* no clipping here */
197 			{
198 				temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff;
199 			}
200 		}
201 	}
202 	hcoordv |= temp2 << 0;
203 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
204 
205 	/* setup top and bottom edges of output window */
206 	vcoordv = 0;
207 	/* top edge coordinate of output window, must be inside desktop */
208 	/* clipping on the top side */
209 	if (ow->v_start < crtc_vstart)
210 	{
211 		temp1 = 0;
212 	}
213 	else
214 	{
215 		/* clipping on the bottom side */
216 		if (ow->v_start >= (crtc_vend - 1))
217 		{
218 			/* height < 2 is not allowed */
219 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
220 		}
221 		else
222 		/* no clipping here */
223 		{
224 			temp1 = (ow->v_start - crtc_vstart) & 0x7ff;
225 		}
226 	}
227 	vcoordv |= temp1 << 16;
228 	/* bottom edge coordinate of output window, must be inside desktop */
229 	/* height < 2 is not allowed */
230 	if (ow->height < 2)
231 	{
232 		temp2 = (temp1 + 1) & 0x7ff;
233 	}
234 	else
235 	{
236 		/* clipping on the bottom side */
237 		if ((ow->v_start + ow->height - 1) > (crtc_vend - 1))
238 		{
239 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
240 		}
241 		else
242 		{
243 			/* clipping on the top side */
244 			if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
245 			{
246 				/* height < 2 is not allowed */
247 				temp2 = 1;
248 			}
249 			else
250 			/* no clipping here */
251 			{
252 				temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff;
253 			}
254 		}
255 	}
256 	vcoordv |= temp2 << 0;
257 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
258 
259 
260 	/*********************************************
261 	 *** setup horizontal scaling and clipping ***
262 	 *********************************************/
263 
264 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
265 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
266 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
267 
268 	/* do horizontal scaling... */
269 	/* determine interval representation value, taking zoom into account */
270 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
271 	{
272 		/* horizontal filtering is ON */
273 		if ((my_ov.width == ow->width) | (ow->width < 2))
274 		{
275 			/* no horizontal scaling used, OR destination width < 2 */
276 			intrep = 0;
277 		}
278 		else
279 		{
280 			intrep = 1;
281 		}
282 	}
283 	else
284 	{
285 		/* horizontal filtering is OFF */
286 		if ((ow->width < my_ov.width) & (ow->width >= 2))
287 		{
288 			/* horizontal downscaling used AND destination width >= 2 */
289 			intrep = 1;
290 		}
291 		else
292 		{
293 			intrep = 0;
294 		}
295 	}
296 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
297 
298 	/* calculate inverse horizontal scaling factor, taking zoom into account */
299 	/* standard scaling formula: */
300 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
301 
302 	/* correct factor to prevent most-right visible 'line' from distorting */
303 	ifactor -= (1 << 2);
304 	hiscalv = ifactor;
305 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
306 
307 	/* check scaling factor (and modify if needed) to be within scaling limits */
308 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
309 	if (hiscalv < 0x00002000)
310 	{
311 		/* (non-inverse) factor too large, set factor to max. valid value */
312 		hiscalv = 0x00002000;
313 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
314 	}
315 	switch (si->ps.card_arch)
316 	{
317 	case NV04A:
318 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
319 		 * (16bit register with 0.11 format value) */
320 		if (hiscalv > 0x0000ffff)
321 		{
322 			/* (non-inverse) factor too small, set factor to min. valid value */
323 			hiscalv = 0x0000ffff;
324 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
325 		}
326 		break;
327 	case NV30A:
328 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
329 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
330 		{
331 			/* (non-inverse) factor too small, set factor to min. valid value */
332 			hiscalv = (2 << 16);
333 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
334 		}
335 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
336 		 * So let it fall through... */
337 		if (si->ps.card_type != NV31) break;
338 	default:
339 		/* the rest has a downscaling limit of 0.125 */
340 		if (hiscalv > (8 << 16))
341 		{
342 			/* (non-inverse) factor too small, set factor to min. valid value */
343 			hiscalv = (8 << 16);
344 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
345 		}
346 		break;
347 	}
348 	/* AND below is required by hardware */
349 	hiscalv &= 0x001ffffc;
350 
351 
352 	/* do horizontal clipping... */
353 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
354 	/* Note:
355 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
356 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
357 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
358 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
359 	/* Note also:
360 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
361 	hsrcstv = 0;
362 	/* check for destination horizontal clipping at left side */
363 	if (ow->h_start < crtc_hstart)
364 	{
365 		/* check if entire destination picture is clipping left:
366 		 * (2 pixels will be clamped onscreen at least) */
367 		if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
368 		{
369 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
370 			hsrcstv += (ow->width - 2);
371 		}
372 		else
373 		{
374 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
375 			hsrcstv += (crtc_hstart - ow->h_start);
376 		}
377 		LOG(4,("Overlay: clipping left...\n"));
378 
379 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
380 		 * Note that this also already takes care of aligning the value to the BES register! */
381 		hsrcstv *= ifactor;
382 	}
383 	/* take zoom into account */
384 	hsrcstv += ((uint32)my_ov.h_start) << 16;
385 	/* AND below required by hardware */
386 	hsrcstv &= 0x03fffffc;
387 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
388 
389 
390 	/*******************************************
391 	 *** setup vertical scaling and clipping ***
392 	 *******************************************/
393 
394 	/* do vertical scaling... */
395 	/* determine interval representation value, taking zoom into account */
396 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
397 	{
398 		/* vertical filtering is ON */
399 		if ((my_ov.height == ow->height) | (ow->height < 2))
400 		{
401 			/* no vertical scaling used, OR destination height < 2 */
402 			intrep = 0;
403 		}
404 		else
405 		{
406 			intrep = 1;
407 		}
408 	}
409 	else
410 	{
411 		/* vertical filtering is OFF */
412 		if ((ow->height < my_ov.height) & (ow->height >= 2))
413 		{
414 			/* vertical downscaling used AND destination height >= 2 */
415 			intrep = 1;
416 		}
417 		else
418 		{
419 			intrep = 0;
420 		}
421 	}
422 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
423 
424 	/* calculate inverse vertical scaling factor, taking zoom into account */
425 	/* standard scaling formula: */
426 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
427 
428 	/* correct factor to prevent lowest visible line from distorting */
429 	ifactor -= (1 << 2);
430 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
431 
432 	/* preserve ifactor for source positioning calculations later on */
433 	viscalv = ifactor;
434 
435 	/* check scaling factor (and modify if needed) to be within scaling limits */
436 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
437 	if (viscalv < 0x00002000)
438 	{
439 		/* (non-inverse) factor too large, set factor to max. valid value */
440 		viscalv = 0x00002000;
441 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
442 	}
443 	switch (si->ps.card_arch)
444 	{
445 	case NV04A:
446 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
447 		 * (16bit register with 0.11 format value) */
448 		if (viscalv > 0x0000ffff)
449 		{
450 			/* (non-inverse) factor too small, set factor to min. valid value */
451 			viscalv = 0x0000ffff;
452 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
453 		}
454 		break;
455 	case NV30A:
456 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
457 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
458 		{
459 			/* (non-inverse) factor too small, set factor to min. valid value */
460 			viscalv = (2 << 16);
461 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
462 		}
463 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
464 		 * So let it fall through... */
465 		if (si->ps.card_type != NV31) break;
466 	default:
467 		/* the rest has a downscaling limit of 0.125 */
468 		if (viscalv > (8 << 16))
469 		{
470 			/* (non-inverse) factor too small, set factor to min. valid value */
471 			viscalv = (8 << 16);
472 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
473 		}
474 		break;
475 	}
476 	/* AND below is required by hardware */
477 	viscalv &= 0x001ffffc;
478 
479 
480 	/* calculate inputbitmap origin adress */
481 	a1orgv = (uint32)((vuint32 *)ob->buffer);
482 	a1orgv -= (uint32)((vuint32 *)si->framebuffer);
483 
484 	/* do vertical clipping... */
485 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
486 	/* Note:
487 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
488 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
489 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
490 	/* Note also:
491 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
492 
493 	v1srcstv = 0;
494 	/* check for destination vertical clipping at top side */
495 	if (ow->v_start < crtc_vstart)
496 	{
497 		/* check if entire destination picture is clipping at top:
498 		 * (2 pixels will be clamped onscreen at least) */
499 		if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
500 		{
501 			/* increase 'number of clipping pixels' with 'fixed value':
502 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
503 			v1srcstv = (ow->height - 2) * ifactor;
504 			/* on pre-NV10 we need to do clipping in the source
505 			 * bitmap because no seperate clipping registers exist... */
506 			if (si->ps.card_arch < NV10A)
507 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
508 		}
509 		else
510 		{
511 			/* increase 'first contributing pixel' with:
512 			 * number of destination picture clipping pixels * inverse scaling factor */
513 			v1srcstv = (crtc_vstart - ow->v_start) * ifactor;
514 			/* on pre-NV10 we need to do clipping in the source
515 			 * bitmap because no seperate clipping registers exist... */
516 			if (si->ps.card_arch < NV10A)
517 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
518 		}
519 		LOG(4,("Overlay: clipping at top...\n"));
520 	}
521 	/* take zoom into account */
522 	v1srcstv += (((uint32)my_ov.v_start) << 16);
523 	if (si->ps.card_arch < NV10A)
524 	{
525 		a1orgv += (my_ov.v_start * ob->bytes_per_row);
526 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv));
527 	}
528 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536));
529 
530 	/* AND below is probably required by hardware. */
531 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
532 	a1orgv &= 0xfffffff0;
533 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv));
534 
535 
536 	/*****************************
537 	 *** log color keying info ***
538 	 *****************************/
539 
540 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
541 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
542 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
543 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
544 
545 
546 	/*****************
547 	 *** log flags ***
548 	 *****************/
549 
550 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
551 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
552 
553 
554 	/*************************************
555 	 *** sync to BES (Back End Scaler) ***
556 	 *************************************/
557 
558 	/* Done in card hardware:
559 	 * double buffered registers + trigger if programming complete feature. */
560 
561 
562 	/**************************************
563 	 *** actually program the registers ***
564 	 **************************************/
565 
566 	if (si->ps.card_arch < NV10A)
567 	{
568 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
569 		BESW(NV04_OE_STATE, 0x00000000);
570 		/* select buffer 0 as active (b16) */
571 		BESW(NV04_SU_STATE, 0x00000000);
572 		/* unknown (no effect?) */
573 		BESW(NV04_RM_STATE, 0x00000000);
574 		/* setup clipped(!) buffer startadress in RAM */
575 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
576 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
577 		/* (program both buffers to prevent sync distortions) */
578 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
579 		a1orgv += ((hsrcstv >> 16) * 2);
580 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
581 		BESW(NV04_0BUFADR, (a1orgv & ~0x03));
582 		BESW(NV04_1BUFADR, (a1orgv & ~0x03));
583 		/* setup buffer source pitch including slopspace (in bytes).
584 		 * Note:
585 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
586 		/* (program both buffers to prevent sync distortions) */
587 		BESW(NV04_0SRCPTCH, (ob->width * 2));
588 		BESW(NV04_1SRCPTCH, (ob->width * 2));
589 		/* setup output window position */
590 		BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
591 		/* setup output window size */
592 		BESW(NV04_DSTSIZE, (
593 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
594 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
595 			));
596 		/* setup horizontal and vertical scaling */
597 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
598 		/* enable vertical filtering (b0) */
599 		BESW(NV04_CTRL_V, 0x00000001);
600 		/* enable horizontal filtering (no effect?) */
601 		BESW(NV04_CTRL_H, 0x00000111);
602 
603 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
604 		BESW(NV04_GENCTRL, 0x00000111);
605 		/* select buffer 1 as active (b16) */
606 		BESW(NV04_SU_STATE, 0x00010000);
607 
608 		/**************************
609 		 *** setup color keying ***
610 		 **************************/
611 
612 		/* setup colorkeying */
613 		switch(si->dm.space)
614 		{
615 		case B_RGB15_LITTLE:
616 			BESW(NV04_COLKEY, (
617 				((ow->blue.value & ow->blue.mask) << 0)   |
618 				((ow->green.value & ow->green.mask) << 5) |
619 				((ow->red.value & ow->red.mask) << 10)    |
620 				((ow->alpha.value & ow->alpha.mask) << 15)
621 				));
622 			break;
623 		case B_RGB16_LITTLE:
624 			BESW(NV04_COLKEY, (
625 				((ow->blue.value & ow->blue.mask) << 0)   |
626 				((ow->green.value & ow->green.mask) << 5) |
627 				((ow->red.value & ow->red.mask) << 11)
628 				/* this space has no alpha bits */
629 				));
630 			break;
631 		case B_CMAP8:
632 		case B_RGB32_LITTLE:
633 		default:
634 			BESW(NV04_COLKEY, (
635 				((ow->blue.value & ow->blue.mask) << 0)   |
636 				((ow->green.value & ow->green.mask) << 8) |
637 				((ow->red.value & ow->red.mask) << 16)    |
638 				((ow->alpha.value & ow->alpha.mask) << 24)
639 				));
640 			break;
641 		}
642 	}
643 	else
644 	{
645 		/* >= NV10A */
646 
647 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
648 		BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff));
649 		/* setup buffersize */
650 		//fixme if needed: width must be even officially...
651 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
652 		/* setup source pitch including slopspace (in bytes),
653 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
654 		/* Note:
655 		 * source pitch granularity = 32 pixels on GeForce cards!! */
656 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
657 		/* setup output window position */
658 		BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
659 		/* setup output window size */
660 		BESW(NV10_0DSTSIZE, (
661 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
662 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
663 			));
664 		/* setup horizontal scaling */
665 		BESW(NV10_0ISCALH, (hiscalv << 4));
666 		/* setup vertical scaling */
667 		BESW(NV10_0ISCALV, (viscalv << 4));
668 		/* setup (unclipped!) buffer startadress in RAM */
669 		BESW(NV10_0BUFADR, a1orgv);
670 		/* enable BES (b0 = 0) */
671 		BESW(NV10_GENCTRL, 0x00000000);
672 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
673 		/* This also triggers activation of programmed values (double buffered registers feature) */
674 		BESW(NV10_BUFSEL, 0x00000001);
675 
676 		/**************************
677 		 *** setup color keying ***
678 		 **************************/
679 
680 		/* setup colorkeying */
681 		switch(si->dm.space)
682 		{
683 		case B_RGB15_LITTLE:
684 			BESW(NV10_COLKEY, (
685 				((ow->blue.value & ow->blue.mask) << 0)   |
686 				((ow->green.value & ow->green.mask) << 5) |
687 				((ow->red.value & ow->red.mask) << 10)    |
688 				((ow->alpha.value & ow->alpha.mask) << 15)
689 				));
690 			break;
691 		case B_RGB16_LITTLE:
692 			BESW(NV10_COLKEY, (
693 				((ow->blue.value & ow->blue.mask) << 0)   |
694 				((ow->green.value & ow->green.mask) << 5) |
695 				((ow->red.value & ow->red.mask) << 11)
696 				/* this space has no alpha bits */
697 				));
698 			break;
699 		case B_CMAP8:
700 		case B_RGB32_LITTLE:
701 		default:
702 			BESW(NV10_COLKEY, (
703 				((ow->blue.value & ow->blue.mask) << 0)   |
704 				((ow->green.value & ow->green.mask) << 8) |
705 				((ow->red.value & ow->red.mask) << 16)    |
706 				((ow->alpha.value & ow->alpha.mask) << 24)
707 				));
708 			break;
709 		}
710 	}
711 
712 	return B_OK;
713 }
714 
715 status_t nv_release_bes()
716 {
717 	if (si->ps.card_arch < NV10A)
718 	{
719 		/* setup BES control: disable scaler (b0 = 0) */
720 		BESW(NV04_GENCTRL, 0x00000000);
721 	}
722 	else
723 	{
724 		/* setup BES control: disable scaler (b0 = 1) */
725 		BESW(NV10_GENCTRL, 0x00000001);
726 	}
727 
728 	return B_OK;
729 }
730