xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_bes.c (revision e8d5d47c1a14e1c78484406cf23cb04248774b58)
1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-12/2003 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "nv_std.h"
7 
8 //fixme: implement: (used for virtual screens!)
9 //void move_overlay(uint16 hdisp_start, uint16 vdisp_start);
10 
11 status_t nv_bes_init()
12 {
13 	if (si->ps.card_arch < NV10A)
14 	{
15 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
16 		BESW(NV04_INTE, 0x00000000);
17 
18 		/* setup saturation to be 'neutral' */
19 		BESW(NV04_SAT, 0x00000000);
20 		/* setup RGB brightness to be 'neutral' */
21 		BESW(NV04_RED_AMP, 0x00000069);
22 		BESW(NV04_GRN_AMP, 0x0000003e);
23 		BESW(NV04_BLU_AMP, 0x00000089);
24 
25 		/* setup fifo for fetching data */
26 		BESW(NV04_FIFOBURL, 0x00000003);
27 		BESW(NV04_FIFOTHRS, 0x00000038);
28 
29 		/* unknown, but needed (registers only have b0 implemented) */
30 		/* (program both buffers to prevent sync distortions) */
31 		BESW(NV04_0OFFSET, 0x00000000);
32 		BESW(NV04_1OFFSET, 0x00000000);
33 	}
34 	else
35 	{
36 		/* >= NV10A */
37 
38 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
39 		BESW(NV10_INTE, 0x00000000);
40 		/* shut off GeForce4MX MPEG2 decoder */
41 		BESW(DEC_GENCTRL, 0x00000000);
42 		/* setup BES memory-range mask */
43 		BESW(NV10_0MEMMASK, ((si->ps.memory_size << 20) - 1));
44 		/* unknown, but needed */
45 		BESW(NV10_0OFFSET, 0x00000000);
46 
47 		/* setup brightness, contrast and saturation to be 'neutral' */
48 		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
49 		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
50 	}
51 
52 	return B_OK;
53 }
54 
55 status_t nv_configure_bes
56 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
57 {
58 	/* yuy2 (4:2:2) colorspace calculations */
59 
60 	/* Note:
61 	 * in BeOS R5.0.3 and DANO:
62 	 * 'ow->offset_xxx' is always 0, so not used;
63 	 * 'ow->width' and 'ow->height' are the output window size: does not change
64 	 * if window is clipping;
65 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
66 	 * window. These values can be negative: this means the window is clipping
67 	 * at the left or the top of the display, respectively. */
68 
69 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
70 	 * displayed on screen. This is used for the 'hardware zoom' function. */
71 
72 	/* calculated BES register values */
73 	uint32 	hcoordv, vcoordv, hiscalv, hsrcstv,	viscalv, a1orgv, v1srcstv;
74 	/* misc used variables */
75 	uint16 temp1, temp2;
76 	/* interval representation, used for scaling calculations */
77 	uint16 intrep, crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
78 	/* inverse scaling factor, used for source positioning */
79 	uint32 ifactor;
80 	/* copy of overlay view which has checked valid values */
81 	overlay_view my_ov;
82 
83 
84 	/**************************************************************************************
85 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
86 	 **************************************************************************************/
87 	my_ov = *ov;
88 	/* check for valid 'coordinates' */
89 	if (my_ov.width == 0) my_ov.width++;
90 	if (my_ov.height == 0) my_ov.height++;
91 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
92 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
93 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
94 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
95 	if (my_ov.v_start > (ob->height - 1))
96 		my_ov.v_start = (ob->height - 1);
97 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
98 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
99 
100 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
101 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
102 
103 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
104 	 * constraints only */
105 	crtc_hstart = si->dm.h_display_start;
106 	/* make dualhead switch mode with TVout enabled work while we're at it.. */
107 	if (si->switched_crtcs)
108 	{
109 		crtc_hstart += si->dm.timing.h_display;
110 	}
111 	/* horizontal end is the first position beyond the displayed range on the CRTC */
112 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
113 	crtc_vstart = si->dm.v_display_start;
114 	/* vertical end is the first position beyond the displayed range on the CRTC */
115 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
116 
117 
118 	/****************************************
119 	 *** setup all edges of output window ***
120 	 ****************************************/
121 
122 	/* setup left and right edges of output window */
123 	hcoordv = 0;
124 	/* left edge coordinate of output window, must be inside desktop */
125 	/* clipping on the left side */
126 	if (ow->h_start < crtc_hstart)
127 	{
128 		temp1 = 0;
129 	}
130 	else
131 	{
132 		/* clipping on the right side */
133 		if (ow->h_start >= (crtc_hend - 1))
134 		{
135 			/* width < 2 is not allowed */
136 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
137 		}
138 		else
139 		/* no clipping here */
140 		{
141 			temp1 = (ow->h_start - crtc_hstart) & 0x7ff;
142 		}
143 	}
144 	hcoordv |= temp1 << 16;
145 	/* right edge coordinate of output window, must be inside desktop */
146 	/* width < 2 is not allowed */
147 	if (ow->width < 2)
148 	{
149 		temp2 = (temp1 + 1) & 0x7ff;
150 	}
151 	else
152 	{
153 		/* clipping on the right side */
154 		if ((ow->h_start + ow->width - 1) > (crtc_hend - 1))
155 		{
156 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
157 		}
158 		else
159 		{
160 			/* clipping on the left side */
161 			if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
162 			{
163 				/* width < 2 is not allowed */
164 				temp2 = 1;
165 			}
166 			else
167 			/* no clipping here */
168 			{
169 				temp2 = ((uint16)(ow->h_start + ow->width - crtc_hstart - 1)) & 0x7ff;
170 			}
171 		}
172 	}
173 	hcoordv |= temp2 << 0;
174 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
175 
176 	/* setup top and bottom edges of output window */
177 	vcoordv = 0;
178 	/* top edge coordinate of output window, must be inside desktop */
179 	/* clipping on the top side */
180 	if (ow->v_start < crtc_vstart)
181 	{
182 		temp1 = 0;
183 	}
184 	else
185 	{
186 		/* clipping on the bottom side */
187 		if (ow->v_start >= (crtc_vend - 1))
188 		{
189 			/* height < 2 is not allowed */
190 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
191 		}
192 		else
193 		/* no clipping here */
194 		{
195 			temp1 = (ow->v_start - crtc_vstart) & 0x7ff;
196 		}
197 	}
198 	vcoordv |= temp1 << 16;
199 	/* bottom edge coordinate of output window, must be inside desktop */
200 	/* height < 2 is not allowed */
201 	if (ow->height < 2)
202 	{
203 		temp2 = (temp1 + 1) & 0x7ff;
204 	}
205 	else
206 	{
207 		/* clipping on the bottom side */
208 		if ((ow->v_start + ow->height - 1) > (crtc_vend - 1))
209 		{
210 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
211 		}
212 		else
213 		{
214 			/* clipping on the top side */
215 			if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
216 			{
217 				/* height < 2 is not allowed */
218 				temp2 = 1;
219 			}
220 			else
221 			/* no clipping here */
222 			{
223 				temp2 = ((uint16)(ow->v_start + ow->height - crtc_vstart - 1)) & 0x7ff;
224 			}
225 		}
226 	}
227 	vcoordv |= temp2 << 0;
228 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
229 
230 
231 	/*********************************************
232 	 *** setup horizontal scaling and clipping ***
233 	 *********************************************/
234 
235 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
236 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
237 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
238 
239 	/* do horizontal scaling... */
240 	/* determine interval representation value, taking zoom into account */
241 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
242 	{
243 		/* horizontal filtering is ON */
244 		if ((my_ov.width == ow->width) | (ow->width < 2))
245 		{
246 			/* no horizontal scaling used, OR destination width < 2 */
247 			intrep = 0;
248 		}
249 		else
250 		{
251 			intrep = 1;
252 		}
253 	}
254 	else
255 	{
256 		/* horizontal filtering is OFF */
257 		if ((ow->width < my_ov.width) & (ow->width >= 2))
258 		{
259 			/* horizontal downscaling used AND destination width >= 2 */
260 			intrep = 1;
261 		}
262 		else
263 		{
264 			intrep = 0;
265 		}
266 	}
267 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
268 
269 	/* calculate inverse horizontal scaling factor, taking zoom into account */
270 	/* standard scaling formula: */
271 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
272 
273 	/* correct factor to prevent most-right visible 'line' from distorting */
274 	ifactor -= (1 << 2);
275 	hiscalv = ifactor;
276 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
277 
278 	/* check scaling factor (and modify if needed) to be within scaling limits */
279 	/* (assuming) all cards have a upscaling limit of 8.0 */
280 	if (hiscalv < 0x00002000)
281 	{
282 		/* (non-inverse) factor too large, set factor to max. valid value */
283 		hiscalv = 0x00002000;
284 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
285 	}
286 	switch (si->ps.card_arch)
287 	{
288 	case NV04A:
289 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
290 		 * (16bit register with 0.11 format value) */
291 		if (hiscalv > 0x0000ffff)
292 		{
293 			/* (non-inverse) factor too small, set factor to min. valid value */
294 			hiscalv = 0x0000ffff;
295 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
296 		}
297 		break;
298 	case NV30A:
299 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
300 		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
301 		{
302 			/* (non-inverse) factor too small, set factor to min. valid value */
303 			hiscalv = (2 << 16);
304 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
305 		}
306 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
307 		 * So let it fall through... */
308 		if (si->ps.card_type != NV31) break;
309 	default:
310 		/* the rest has a downscaling limit of 0.125 */
311 		if (hiscalv > (8 << 16))
312 		{
313 			/* (non-inverse) factor too small, set factor to min. valid value */
314 			hiscalv = (8 << 16);
315 			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
316 		}
317 		break;
318 	}
319 	/* AND below is required by hardware */
320 	hiscalv &= 0x001ffffc;
321 
322 
323 	/* do horizontal clipping... */
324 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
325 	/* Note:
326 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
327 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
328 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
329 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
330 	/* Note also:
331 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
332 	hsrcstv = 0;
333 	/* check for destination horizontal clipping at left side */
334 	if (ow->h_start < crtc_hstart)
335 	{
336 		/* check if entire destination picture is clipping left:
337 		 * (2 pixels will be clamped onscreen at least) */
338 		if ((ow->h_start + ow->width - 1) < (crtc_hstart + 1))
339 		{
340 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
341 			hsrcstv += (ow->width - 2);
342 		}
343 		else
344 		{
345 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
346 			hsrcstv += (crtc_hstart - ow->h_start);
347 		}
348 		LOG(4,("Overlay: clipping left...\n"));
349 
350 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
351 		 * Note that this also already takes care of aligning the value to the BES register! */
352 		hsrcstv *= ifactor;
353 	}
354 	/* take zoom into account */
355 	hsrcstv += ((uint32)my_ov.h_start) << 16;
356 	/* AND below required by hardware */
357 	hsrcstv &= 0x03fffffc;
358 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", hsrcstv / (float)65536));
359 
360 
361 	/*******************************************
362 	 *** setup vertical scaling and clipping ***
363 	 *******************************************/
364 
365 	/* do vertical scaling... */
366 	/* determine interval representation value, taking zoom into account */
367 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
368 	{
369 		/* vertical filtering is ON */
370 		if ((my_ov.height == ow->height) | (ow->height < 2))
371 		{
372 			/* no vertical scaling used, OR destination height < 2 */
373 			intrep = 0;
374 		}
375 		else
376 		{
377 			intrep = 1;
378 		}
379 	}
380 	else
381 	{
382 		/* vertical filtering is OFF */
383 		if ((ow->height < my_ov.height) & (ow->height >= 2))
384 		{
385 			/* vertical downscaling used AND destination height >= 2 */
386 			intrep = 1;
387 		}
388 		else
389 		{
390 			intrep = 0;
391 		}
392 	}
393 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
394 
395 	/* calculate inverse vertical scaling factor, taking zoom into account */
396 	/* standard scaling formula: */
397 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
398 
399 	/* correct factor to prevent lowest visible line from distorting */
400 	ifactor -= (1 << 2);
401 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
402 
403 	/* preserve ifactor for source positioning calculations later on */
404 	viscalv = ifactor;
405 
406 	/* check scaling factor (and modify if needed) to be within scaling limits */
407 	/* (assuming) all cards have a upscaling limit of 8.0 */
408 	if (viscalv < 0x00002000)
409 	{
410 		/* (non-inverse) factor too large, set factor to max. valid value */
411 		viscalv = 0x00002000;
412 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
413 	}
414 	switch (si->ps.card_arch)
415 	{
416 	case NV04A:
417 		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
418 		 * (16bit register with 0.11 format value) */
419 		if (viscalv > 0x0000ffff)
420 		{
421 			/* (non-inverse) factor too small, set factor to min. valid value */
422 			viscalv = 0x0000ffff;
423 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
424 		}
425 		break;
426 	case NV30A:
427 		/* GeForceFX series have a downscaling limit of 0.5 (except NV31!) */
428 		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
429 		{
430 			/* (non-inverse) factor too small, set factor to min. valid value */
431 			viscalv = (2 << 16);
432 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
433 		}
434 		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
435 		 * So let it fall through... */
436 		if (si->ps.card_type != NV31) break;
437 	default:
438 		/* the rest has a downscaling limit of 0.125 */
439 		if (viscalv > (8 << 16))
440 		{
441 			/* (non-inverse) factor too small, set factor to min. valid value */
442 			viscalv = (8 << 16);
443 			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
444 		}
445 		break;
446 	}
447 	/* AND below is required by hardware */
448 	viscalv &= 0x001ffffc;
449 
450 
451 	/* calculate inputbitmap origin adress */
452 	a1orgv = (uint32)((vuint32 *)ob->buffer);
453 	a1orgv -= (uint32)((vuint32 *)si->framebuffer);
454 
455 	/* do vertical clipping... */
456 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
457 	/* Note:
458 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
459 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
460 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
461 	/* Note also:
462 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
463 
464 	v1srcstv = 0;
465 	/* check for destination vertical clipping at top side */
466 	if (ow->v_start < crtc_vstart)
467 	{
468 		/* check if entire destination picture is clipping at top:
469 		 * (2 pixels will be clamped onscreen at least) */
470 		if ((ow->v_start + ow->height - 1) < (crtc_vstart + 1))
471 		{
472 			/* increase 'number of clipping pixels' with 'fixed value':
473 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
474 			v1srcstv = (ow->height - 2) * ifactor;
475 			/* on pre-NV10 we need to do clipping in the source
476 			 * bitmap because no seperate clipping registers exist... */
477 			if (si->ps.card_arch < NV10A)
478 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
479 		}
480 		else
481 		{
482 			/* increase 'first contributing pixel' with:
483 			 * number of destination picture clipping pixels * inverse scaling factor */
484 			v1srcstv = (crtc_vstart - ow->v_start) * ifactor;
485 			/* on pre-NV10 we need to do clipping in the source
486 			 * bitmap because no seperate clipping registers exist... */
487 			if (si->ps.card_arch < NV10A)
488 				a1orgv += ((v1srcstv >> 16) * ob->bytes_per_row);
489 		}
490 		LOG(4,("Overlay: clipping at top...\n"));
491 	}
492 	/* take zoom into account */
493 	v1srcstv += (((uint32)my_ov.v_start) << 16);
494 	if (si->ps.card_arch < NV10A)
495 	{
496 		a1orgv += (my_ov.v_start * ob->bytes_per_row);
497 		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", a1orgv));
498 	}
499 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", v1srcstv / (float)65536));
500 
501 	/* AND below is probably required by hardware. */
502 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
503 	a1orgv &= 0xfffffff0;
504 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n",a1orgv));
505 
506 
507 	/*****************************
508 	 *** log color keying info ***
509 	 *****************************/
510 
511 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
512 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
513 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
514 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
515 
516 
517 	/*****************
518 	 *** log flags ***
519 	 *****************/
520 
521 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
522 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
523 
524 
525 	/*************************************
526 	 *** sync to BES (Back End Scaler) ***
527 	 *************************************/
528 
529 	/* Done in card hardware:
530 	 * double buffered registers + trigger if programming complete feature. */
531 
532 
533 	/**************************************
534 	 *** actually program the registers ***
535 	 **************************************/
536 
537 	if (si->ps.card_arch < NV10A)
538 	{
539 		/* unknown, but needed (otherwise high-res distortions and only half the frames */
540 		BESW(NV04_OE_STATE, 0x00000000);
541 		/* select buffer 0 as active (b16) */
542 		BESW(NV04_SU_STATE, 0x00000000);
543 		/* unknown (no effect?) */
544 		BESW(NV04_RM_STATE, 0x00000000);
545 		/* setup clipped(!) buffer startadress in RAM */
546 		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
547 		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
548 		/* (program both buffers to prevent sync distortions) */
549 		/* first include 'pixel precise' left clipping... (top clipping was already included) */
550 		a1orgv += ((hsrcstv >> 16) * 2);
551 		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
552 		BESW(NV04_0BUFADR, (a1orgv & ~0x03));
553 		BESW(NV04_1BUFADR, (a1orgv & ~0x03));
554 		/* setup buffer source pitch including slopspace (in bytes).
555 		 * Note:
556 		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
557 		/* (program both buffers to prevent sync distortions) */
558 		BESW(NV04_0SRCPTCH, (ob->width * 2));
559 		BESW(NV04_1SRCPTCH, (ob->width * 2));
560 		/* setup output window position */
561 		BESW(NV04_DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
562 		/* setup output window size */
563 		BESW(NV04_DSTSIZE, (
564 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
565 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
566 			));
567 		/* setup horizontal and vertical scaling */
568 		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
569 		/* enable vertical filtering (b0) */
570 		BESW(NV04_CTRL_V, 0x00000001);
571 		/* enable horizontal filtering (no effect?) */
572 		BESW(NV04_CTRL_H, 0x00000111);
573 
574 		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
575 		BESW(NV04_GENCTRL, 0x00000111);
576 		/* select buffer 1 as active (b16) */
577 		BESW(NV04_SU_STATE, 0x00010000);
578 
579 		/**************************
580 		 *** setup color keying ***
581 		 **************************/
582 
583 		/* setup colorkeying */
584 		switch(si->dm.space)
585 		{
586 		case B_RGB15_LITTLE:
587 			BESW(NV04_COLKEY, (
588 				((ow->blue.value & ow->blue.mask) << 0)   |
589 				((ow->green.value & ow->green.mask) << 5) |
590 				((ow->red.value & ow->red.mask) << 10)    |
591 				((ow->alpha.value & ow->alpha.mask) << 15)
592 				));
593 			break;
594 		case B_RGB16_LITTLE:
595 			BESW(NV04_COLKEY, (
596 				((ow->blue.value & ow->blue.mask) << 0)   |
597 				((ow->green.value & ow->green.mask) << 5) |
598 				((ow->red.value & ow->red.mask) << 11)
599 				/* this space has no alpha bits */
600 				));
601 			break;
602 		case B_CMAP8:
603 		case B_RGB32_LITTLE:
604 		default:
605 			BESW(NV04_COLKEY, (
606 				((ow->blue.value & ow->blue.mask) << 0)   |
607 				((ow->green.value & ow->green.mask) << 8) |
608 				((ow->red.value & ow->red.mask) << 16)    |
609 				((ow->alpha.value & ow->alpha.mask) << 24)
610 				));
611 			break;
612 		}
613 	}
614 	else
615 	{
616 		/* >= NV10A */
617 
618 		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
619 		BESW(NV10_0SRCREF, ((v1srcstv << 4) & 0xffff0000) | ((hsrcstv >> 12) & 0x0000ffff));
620 		/* setup buffersize */
621 		//fixme if needed: width must be even officially...
622 		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
623 		/* setup source pitch including slopspace (in bytes),
624 		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
625 		/* Note:
626 		 * source pitch granularity = 32 pixels on GeForce cards!! */
627 		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
628 		/* setup output window position */
629 		BESW(NV10_0DSTREF, ((vcoordv & 0xffff0000) | ((hcoordv & 0xffff0000) >> 16)));
630 		/* setup output window size */
631 		BESW(NV10_0DSTSIZE, (
632 			(((vcoordv & 0x0000ffff) - ((vcoordv & 0xffff0000) >> 16) + 1) << 16) |
633 			((hcoordv & 0x0000ffff) - ((hcoordv & 0xffff0000) >> 16) + 1)
634 			));
635 		/* setup horizontal scaling */
636 		BESW(NV10_0ISCALH, (hiscalv << 4));
637 		/* setup vertical scaling */
638 		BESW(NV10_0ISCALV, (viscalv << 4));
639 		/* setup (unclipped!) buffer startadress in RAM */
640 		BESW(NV10_0BUFADR, a1orgv);
641 		/* enable BES (b0 = 0) */
642 		BESW(NV10_GENCTRL, 0x00000000);
643 		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
644 		/* This also triggers activation of programmed values (double buffered registers feature) */
645 		BESW(NV10_BUFSEL, 0x00000001);
646 
647 		/**************************
648 		 *** setup color keying ***
649 		 **************************/
650 
651 		/* setup colorkeying */
652 		switch(si->dm.space)
653 		{
654 		case B_RGB15_LITTLE:
655 			BESW(NV10_COLKEY, (
656 				((ow->blue.value & ow->blue.mask) << 0)   |
657 				((ow->green.value & ow->green.mask) << 5) |
658 				((ow->red.value & ow->red.mask) << 10)    |
659 				((ow->alpha.value & ow->alpha.mask) << 15)
660 				));
661 			break;
662 		case B_RGB16_LITTLE:
663 			BESW(NV10_COLKEY, (
664 				((ow->blue.value & ow->blue.mask) << 0)   |
665 				((ow->green.value & ow->green.mask) << 5) |
666 				((ow->red.value & ow->red.mask) << 11)
667 				/* this space has no alpha bits */
668 				));
669 			break;
670 		case B_CMAP8:
671 		case B_RGB32_LITTLE:
672 		default:
673 			BESW(NV10_COLKEY, (
674 				((ow->blue.value & ow->blue.mask) << 0)   |
675 				((ow->green.value & ow->green.mask) << 8) |
676 				((ow->red.value & ow->red.mask) << 16)    |
677 				((ow->alpha.value & ow->alpha.mask) << 24)
678 				));
679 			break;
680 		}
681 	}
682 
683 	return B_OK;
684 }
685 
686 status_t nv_release_bes()
687 {
688 	if (si->ps.card_arch < NV10A)
689 	{
690 		/* setup BES control: disable scaler (b0 = 0) */
691 		BESW(NV04_GENCTRL, 0x00000000);
692 	}
693 	else
694 	{
695 		/* setup BES control: disable scaler (b0 = 1) */
696 		BESW(NV10_GENCTRL, 0x00000001);
697 	}
698 
699 	return B_OK;
700 }
701