xref: /haiku/src/add-ons/accelerants/via/engine/bes.c (revision c90684742e7361651849be4116d0e5de3a817194)
1 /* VIA Unichrome Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-1/2006 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 hsrcendv;	/* horizontal source end in source buffer (clipping) */
16 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
17 	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
18 };
19 
20 static void eng_bes_calc_move_overlay(move_overlay_info *moi);
21 static void eng_bes_program_move_overlay(move_overlay_info moi);
22 
23 /* returns true if the current displaymode leaves enough bandwidth for overlay
24  * support, false if not. */
25 bool eng_bes_chk_bandwidth()
26 {
27 	float refresh, bandwidth;
28 	uint8 depth;
29 
30 	switch(si->dm.space)
31 	{
32 	case B_CMAP8:        depth =  8; break;
33 	case B_RGB15_LITTLE: depth = 16; break;
34 	case B_RGB16_LITTLE: depth = 16; break;
35 	case B_RGB32_LITTLE: depth = 32; break;
36 	default:
37 		LOG(8,("Overlay: Invalid colour depth 0x%08x\n", si->dm.space));
38 		return false;
39 	}
40 
41 	refresh =
42 		(si->dm.timing.pixel_clock * 1000) /
43 		(si->dm.timing.h_total * si->dm.timing.v_total);
44 	bandwidth =
45 		si->dm.timing.h_display * si->dm.timing.v_display * refresh * depth;
46 	LOG(8,("Overlay: Current mode's refreshrate is %.2fHz, bandwidth is %.0f\n",
47 		refresh, bandwidth));
48 
49 	switch (((CRTCR(MEMCLK)) & 0x70) >> 4)
50 	{
51 	case 0: /* SDR  66 */
52 	case 1: /* SDR 100 */
53 	case 2: /* SDR 133 */
54 		/* memory is too slow, sorry. */
55 		return false;
56 		break;
57 	case 3: /* DDR 100 */
58 		/* DDR100's basic limit... */
59 		if (bandwidth > 921600000.0) return false;
60 		/* ... but we have constraints at higher than 800x600 */
61 		if (si->dm.timing.h_display > 800)
62 		{
63 			if (depth != 8) return false;
64 			if (si->dm.timing.v_display > 768) return false;
65 			if (refresh > 60.2) return false;
66 		}
67 		break;
68 	case 4: /* DDR 133 */
69 		if (bandwidth > 4045440000.0) return false;
70 		break;
71 	default: /* not (yet?) used */
72 		return false;
73 		break;
74 	}
75 
76 	return true;
77 }
78 
79 /* move the overlay output window in virtualscreens */
80 /* Note:
81  * si->dm.h_display_start and si->dm.v_display_start determine where the new
82  * output window is located! */
83 void eng_bes_move_overlay()
84 {
85 	move_overlay_info moi;
86 
87 	/* abort if overlay is not active */
88 	if (!si->overlay.active) return;
89 
90 	eng_bes_calc_move_overlay(&moi);
91 	eng_bes_program_move_overlay(moi);
92 }
93 
94 static void eng_bes_calc_move_overlay(move_overlay_info *moi)
95 {
96 	/* misc used variables */
97 	uint16 temp1, temp2;
98 	/* visible screen window in virtual workspaces */
99 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
100 
101 	/* do 'overlay follow head' in dualhead modes on dualhead cards */
102 	if (si->ps.secondary_head)
103 	{
104 		switch (si->dm.flags & DUALHEAD_BITS)
105 		{
106 		case DUALHEAD_ON:
107 		case DUALHEAD_SWITCH:
108 			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
109 					(si->dm.h_display_start + si->dm.timing.h_display))
110 				eng_bes_to_crtc(si->crtc_switch_mode);
111 			else
112 				eng_bes_to_crtc(!si->crtc_switch_mode);
113 			break;
114 		default:
115 				eng_bes_to_crtc(si->crtc_switch_mode);
116 			break;
117 		}
118 	}
119 
120 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
121 	 * constraints only */
122 	crtc_hstart = si->dm.h_display_start;
123 	/* make dualhead stretch and switch mode work while we're at it.. */
124 	if (si->overlay.crtc)
125 	{
126 		crtc_hstart += si->dm.timing.h_display;
127 	}
128 
129 	/* horizontal end is the first position beyond the displayed range on the CRTC */
130 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
131 	crtc_vstart = si->dm.v_display_start;
132 	/* vertical end is the first position beyond the displayed range on the CRTC */
133 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
134 
135 
136 	/****************************************
137 	 *** setup all edges of output window ***
138 	 ****************************************/
139 
140 	/* setup left and right edges of output window */
141 	moi->hcoordv = 0;
142 	/* left edge coordinate of output window, must be inside desktop */
143 	/* clipping on the left side */
144 	if (si->overlay.ow.h_start < crtc_hstart)
145 	{
146 		temp1 = 0;
147 	}
148 	else
149 	{
150 		/* clipping on the right side */
151 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
152 		{
153 			/* width < 2 is not allowed */
154 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
155 		}
156 		else
157 		/* no clipping here */
158 		{
159 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
160 		}
161 	}
162 	moi->hcoordv |= temp1 << 16;
163 	/* right edge coordinate of output window, must be inside desktop */
164 	/* width < 2 is not allowed */
165 	if (si->overlay.ow.width < 2)
166 	{
167 		temp2 = (temp1 + 1) & 0x7ff;
168 	}
169 	else
170 	{
171 		/* clipping on the right side */
172 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
173 		{
174 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
175 		}
176 		else
177 		{
178 			/* clipping on the left side */
179 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
180 			{
181 				/* width < 2 is not allowed */
182 				temp2 = 1;
183 			}
184 			else
185 			/* no clipping here */
186 			{
187 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
188 			}
189 		}
190 	}
191 	moi->hcoordv |= temp2 << 0;
192 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
193 
194 	/* setup top and bottom edges of output window */
195 	moi->vcoordv = 0;
196 	/* top edge coordinate of output window, must be inside desktop */
197 	/* clipping on the top side */
198 	if (si->overlay.ow.v_start < crtc_vstart)
199 	{
200 		temp1 = 0;
201 	}
202 	else
203 	{
204 		/* clipping on the bottom side */
205 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
206 		{
207 			/* height < 2 is not allowed */
208 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
209 		}
210 		else
211 		/* no clipping here */
212 		{
213 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
214 		}
215 	}
216 	moi->vcoordv |= temp1 << 16;
217 	/* bottom edge coordinate of output window, must be inside desktop */
218 	/* height < 2 is not allowed */
219 	if (si->overlay.ow.height < 2)
220 	{
221 		temp2 = (temp1 + 1) & 0x7ff;
222 	}
223 	else
224 	{
225 		/* clipping on the bottom side */
226 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
227 		{
228 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
229 		}
230 		else
231 		{
232 			/* clipping on the top side */
233 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
234 			{
235 				/* height < 2 is not allowed */
236 				temp2 = 1;
237 			}
238 			else
239 			/* no clipping here */
240 			{
241 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
242 			}
243 		}
244 	}
245 	moi->vcoordv |= temp2 << 0;
246 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
247 
248 
249 	/*********************************
250 	 *** setup horizontal clipping ***
251 	 *********************************/
252 
253 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
254 	/* Note:
255 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
256 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
257 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
258 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
259 	/* Note also:
260 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
261 	moi->hsrcstv = 0;
262 	/* check for destination horizontal clipping at left side */
263 	if (si->overlay.ow.h_start < crtc_hstart)
264 	{
265 		/* check if entire destination picture is clipping left:
266 		 * (2 pixels will be clamped onscreen at least) */
267 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
268 		{
269 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
270 			moi->hsrcstv += (si->overlay.ow.width - 2);
271 		}
272 		else
273 		{
274 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
275 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
276 		}
277 		LOG(4,("Overlay: clipping left...\n"));
278 
279 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
280 		 * Note that this also already takes care of aligning the value to the BES register! */
281 		moi->hsrcstv *= si->overlay.h_ifactor;
282 	}
283 	/* take zoom into account */
284 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
285 	/* AND below required by hardware */
286 	moi->hsrcstv &= 0x03fffffc;
287 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
288 
289 	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
290 	/* Note:
291 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
292 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
293 	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
294 	/* Note also:
295 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
296 
297 	moi->hsrcendv = 0;
298 	/* check for destination horizontal clipping at right side */
299 	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
300 	{
301 		/* check if entire destination picture is clipping right:
302 		 * (2 pixels will be clamped onscreen at least) */
303 		if (si->overlay.ow.h_start > (crtc_hend - 2))
304 		{
305 			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
306 			moi->hsrcendv += (si->overlay.ow.width - 2);
307 		}
308 		else
309 		{
310 			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
311 			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
312 		}
313 		LOG(4,("Overlay: clipping right...\n"));
314 
315 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
316 		 * Note that this also already takes care of aligning the value to the BES register! */
317 		moi->hsrcendv *= si->overlay.h_ifactor;
318 		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
319 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
320 	}
321 	else
322 	{
323 		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
324 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
325 	}
326 	/* AND below required by hardware */
327 	moi->hsrcendv &= 0x03ffffff;
328 	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
329 
330 
331 	/*******************************
332 	 *** setup vertical clipping ***
333 	 *******************************/
334 
335 	/* calculate inputbitmap origin adress */
336 	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
337 	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
338 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
339 
340 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
341 	/* Note:
342 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
343 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
344 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
345 	/* Note also:
346 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
347 
348 	moi->v1srcstv = 0;
349 	/* check for destination vertical clipping at top side */
350 	if (si->overlay.ow.v_start < crtc_vstart)
351 	{
352 		/* check if entire destination picture is clipping at top:
353 		 * (2 pixels will be clamped onscreen at least) */
354 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
355 		{
356 			/* increase 'number of clipping pixels' with 'fixed value':
357 			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
358 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
359 			/* we need to do clipping in the source bitmap because no seperate clipping
360 			 * registers exist... */
361 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
362 		}
363 		else
364 		{
365 			/* increase 'first contributing pixel' with:
366 			 * number of destination picture clipping pixels * inverse scaling factor */
367 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
368 			/* we need to do clipping in the source bitmap because no seperate clipping
369 			 * registers exist... */
370 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
371 		}
372 		LOG(4,("Overlay: clipping at top...\n"));
373 	}
374 	/* take zoom into account */
375 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
376 	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
377 	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
378 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
379 
380 	/* AND below is probably required by hardware. */
381 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
382 	moi->a1orgv &= 0x07fffff0;
383 }
384 
385 static void eng_bes_program_move_overlay(move_overlay_info moi)
386 {
387 	/*************************************
388 	 *** sync to BES (Back End Scaler) ***
389 	 *************************************/
390 
391 	/* Done in card hardware:
392 	 * double buffered registers + trigger during 'BES-'VBI feature. */
393 
394 
395 	/**************************************
396 	 *** actually program the registers ***
397 	 **************************************/
398 
399 	/* setup clipped(!) buffer startadress in RAM */
400 	/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
401 	 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
402 	/* first include 'pixel precise' left clipping... (top clipping was already included) */
403 	moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
404 	/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
405 	BESW(VID1Y_ADDR0, (moi.a1orgv & 0x07fffffc));
406 
407 	/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
408 	/* notes:
409 	 * - make absolutely sure the engine can fetch the last pixel needed from
410 	 *   the sourcebitmap even if only to generate a tiny subpixel from it!
411 	 * - the engine uses byte format instead of pixel format;
412 	 * - the engine uses 16 bytes, so 8 pixels granularity. */
413 	BESW(VID1_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
414 
415 	/* setup output window position */
416 	BESW(VID1_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
417 
418 	/* setup output window size */
419 	BESW(VID1_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
420 
421 	/* enable colorkeying (b0 = 1), disable chromakeying (b1 = 0), Vid1 on top of Vid3 (b20 = 0),
422 	 * all registers are loaded during the next 'BES-'VBI (b28 = 1), Vid1 cmds fire (b31 = 1) */
423 	BESW(COMPOSE, 0x90000001);
424 }
425 
426 status_t eng_bes_to_crtc(bool crtc)
427 {
428 	if (si->ps.secondary_head)
429 	{
430 		if (crtc)
431 		{
432 			LOG(4,("Overlay: switching overlay to CRTC2\n"));
433 			/* switch overlay engine to CRTC2 */
434 //			ENG_REG32(RG32_FUNCSEL) &= ~0x00001000;
435 //			ENG_REG32(RG32_2FUNCSEL) |= 0x00001000;
436 			si->overlay.crtc = !si->crtc_switch_mode;
437 		}
438 		else
439 		{
440 			LOG(4,("Overlay: switching overlay to CRTC1\n"));
441 			/* switch overlay engine to CRTC1 */
442 //			ENG_REG32(RG32_2FUNCSEL) &= ~0x00001000;
443 //			ENG_REG32(RG32_FUNCSEL) |= 0x00001000;
444 			si->overlay.crtc = si->crtc_switch_mode;
445 		}
446 		return B_OK;
447 	}
448 	else
449 	{
450 		return B_ERROR;
451 	}
452 }
453 
454 status_t eng_bes_init()
455 {
456 	if (si->ps.chip_rev < 0x10)
457 	{
458 		/* select colorspace setup for B_YCbCr422 */
459 		BESW(VID1_COLSPAC1, 0x140020f2);
460 		BESW(VID1_COLSPAC2, 0x0a0a2c00);
461 		/* fifo depth is $20 (b0-5), threshold $10 (b8-13), prethreshold $1d (b24-29) */
462 		BESW(VID1_FIFO, 0x1d00101f);
463 	}
464 	else
465 	{
466 		/* select colorspace setup for B_YCbCr422 */
467 		BESW(VID1_COLSPAC1, 0x13000ded);
468 		BESW(VID1_COLSPAC2, 0x13171000);
469 		/* fifo depth is $40 (b0-5), threshold $38 (b8-13), prethreshold $38 (b24-29) */
470 		BESW(VID1_FIFO, 0x3800383f);
471 	}
472 
473 		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
474 //		BESW(NV04_INTE, 0x00000000);
475 		/* shut off GeForce4MX MPEG2 decoder */
476 //		BESW(DEC_GENCTRL, 0x00000000);
477 		/* setup BES memory-range mask */
478 //		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
479 		/* setup brightness, contrast and saturation to be 'neutral' */
480 //		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
481 //		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
482 
483 	return B_OK;
484 }
485 
486 status_t eng_configure_bes
487 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
488 {
489 	/* yuy2 (4:2:2) colorspace calculations */
490 
491 	/* Note:
492 	 * in BeOS R5.0.3 and DANO:
493 	 * 'ow->offset_xxx' is always 0, so not used;
494 	 * 'ow->width' and 'ow->height' are the output window size: does not change
495 	 * if window is clipping;
496 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
497 	 * window. These values can be negative: this means the window is clipping
498 	 * at the left or the top of the display, respectively. */
499 
500 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
501 	 * displayed on screen. This is used for the 'hardware zoom' function. */
502 
503 	/* output window position and clipping info for source buffer */
504 	move_overlay_info moi;
505 	/* calculated BES register values */
506 	uint32 	hiscalv, viscalv;
507 	/* interval representation, used for scaling calculations */
508 	uint16 intrep;
509 	/* inverse scaling factor, used for source positioning */
510 	uint32 ifactor;
511 	/* copy of overlay view which has checked valid values */
512 	overlay_view my_ov;
513 	/* true if scaling needed */
514 	bool scale_x, scale_y;
515 	/* for computing scaling register value */
516 	uint32 scaleval;
517 	/* for computing 'pre-scaling' on downscaling */
518 	uint32 minictrl;
519 
520 	/**************************************************************************************
521 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
522 	 **************************************************************************************/
523 	my_ov = *ov;
524 	/* check for valid 'coordinates' */
525 	if (my_ov.width == 0) my_ov.width++;
526 	if (my_ov.height == 0) my_ov.height++;
527 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
528 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
529 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
530 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
531 	if (my_ov.v_start > (ob->height - 1))
532 		my_ov.v_start = (ob->height - 1);
533 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
534 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
535 
536 	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
537 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
538 
539 	/* save for eng_bes_calc_move_overlay() */
540 	si->overlay.ow = *ow;
541 	si->overlay.ob = *ob;
542 	si->overlay.my_ov = my_ov;
543 
544 
545 	/********************************
546 	 *** setup horizontal scaling ***
547 	 ********************************/
548 	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
549 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
550 	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
551 
552 	/* preset X and Y prescaling to be 1x */
553 	minictrl = 0x00000000;
554 	/* determine interval representation value, taking zoom into account */
555 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
556 	{
557 		/* horizontal filtering is ON */
558 		if ((my_ov.width == ow->width) | (ow->width < 2))
559 		{
560 			/* no horizontal scaling used, OR destination width < 2 */
561 			intrep = 0;
562 		}
563 		else
564 		{
565 			intrep = 1;
566 		}
567 	}
568 	else
569 	{
570 		/* horizontal filtering is OFF */
571 		if ((ow->width < my_ov.width) & (ow->width >= 2))
572 		{
573 			/* horizontal downscaling used AND destination width >= 2 */
574 			intrep = 1;
575 		}
576 		else
577 		{
578 			intrep = 0;
579 		}
580 	}
581 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
582 
583 	/* calculate inverse horizontal scaling factor, taking zoom into account */
584 	/* standard scaling formula: */
585 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
586 
587 	/* correct factor to prevent most-right visible 'line' from distorting */
588 	ifactor -= (1 << 5);
589 	hiscalv = ifactor;
590 	/* save for eng_bes_calc_move_overlay() */
591 	si->overlay.h_ifactor = ifactor;
592 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
593 
594 	/* check scaling factor (and modify if needed) to be within scaling limits */
595 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
596 	//fixme: checkout...
597 	if (hiscalv < 0x00002000)
598 	{
599 		/* (non-inverse) factor too large, set factor to max. valid value */
600 		hiscalv = 0x00002000;
601 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
602 	}
603 	/* VIA has a 'downscaling' limit of 1.0, but seperate prescaling to 1/16th can be done.
604 	 * (X-scaler has 11bit register with 0.11 format value, with special 1.0 scaling factor setting;
605 	 *  prescaler has fixed 1x, 1/2x, 1/4x, 1/8x and 1/16x settings.) */
606 	if (hiscalv > 0x00100000)
607 	{
608 		/* (non-inverse) factor too small, set factor to min. valid value */
609 		hiscalv = 0x00100000;
610 		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
611 	}
612 
613 	/* setup pre-downscaling if 'requested' */
614 	if ((hiscalv > 0x00010000) && (hiscalv <= 0x00020000))
615 	{
616 		/* instruct BES to horizontal prescale 0.5x */
617 		minictrl |= 0x01000000;
618 		/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
619 		hiscalv >>= 1;
620 	}
621 	else
622 		if ((hiscalv > 0x00020000) && (hiscalv <= 0x00040000))
623 		{
624 			/* instruct BES to horizontal prescale 0.25x */
625 			minictrl |= 0x03000000;
626 			/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
627 			hiscalv >>= 2;
628 		}
629 		else
630 			if ((hiscalv > 0x00040000) && (hiscalv <= 0x00080000))
631 			{
632 				/* instruct BES to horizontal prescale 0.125x */
633 				minictrl |= 0x05000000;
634 				/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
635 				hiscalv >>= 3;
636 			}
637 			else
638 				if ((hiscalv > 0x00080000) && (hiscalv <= 0x00100000))
639 				{
640 					/* instruct BES to horizontal prescale 0.125x */
641 					minictrl |= 0x07000000;
642 					/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
643 					hiscalv >>= 4;
644 				}
645 
646 	/* only instruct normal scaler to scale if it must do so */
647 	scale_x = true;
648 	if (hiscalv == 0x00010000) scale_x = false;
649 
650 	/* AND below is required by hardware */
651 	hiscalv &= 0x0000ffe0;
652 
653 
654 	/******************************
655 	 *** setup vertical scaling ***
656 	 ******************************/
657 
658 	/* determine interval representation value, taking zoom into account */
659 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
660 	{
661 		/* vertical filtering is ON */
662 		if ((my_ov.height == ow->height) | (ow->height < 2))
663 		{
664 			/* no vertical scaling used, OR destination height < 2 */
665 			intrep = 0;
666 		}
667 		else
668 		{
669 			intrep = 1;
670 		}
671 	}
672 	else
673 	{
674 		/* vertical filtering is OFF */
675 		if ((ow->height < my_ov.height) & (ow->height >= 2))
676 		{
677 			/* vertical downscaling used AND destination height >= 2 */
678 			intrep = 1;
679 		}
680 		else
681 		{
682 			intrep = 0;
683 		}
684 	}
685 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
686 
687 	/* calculate inverse vertical scaling factor, taking zoom into account */
688 	/* standard scaling formula: */
689 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
690 
691 	/* correct factor to prevent lowest visible line from distorting */
692 	ifactor -= (1 << 6);
693 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
694 
695 	/* preserve ifactor for source positioning calculations later on */
696 	viscalv = ifactor;
697 	/* save for eng_bes_calc_move_overlay() */
698 	si->overlay.v_ifactor = ifactor;
699 
700 	/* check scaling factor (and modify if needed) to be within scaling limits */
701 	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
702 	//fixme: checkout...
703 	if (viscalv < 0x00002000)
704 	{
705 		/* (non-inverse) factor too large, set factor to max. valid value */
706 		viscalv = 0x00002000;
707 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
708 	}
709 	/* VIA has a 'downscaling' limit of 1.0, but seperate prescaling to 1/16th can be done.
710 	 * (Y-scaler has 10bit register with 0.10 format value, with special 1.0 scaling factor setting;
711 	 *  prescaler has fixed 1x, 1/2x, 1/4x, 1/8x and 1/16x settings.) */
712 	if (viscalv > 0x00100000)
713 	{
714 		/* (non-inverse) factor too small, set factor to min. valid value */
715 		viscalv = 0x00100000;
716 		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)1024 / (viscalv >> 6)));
717 	}
718 
719 	/* setup pre-downscaling if 'requested' */
720 	if ((viscalv > 0x00010000) && (viscalv <= 0x00020000))
721 	{
722 		/* instruct BES to horizontal prescale 0.5x */
723 		minictrl |= 0x00010000;
724 		/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
725 		viscalv >>= 1;
726 	}
727 	else
728 		if ((viscalv > 0x00020000) && (viscalv <= 0x00040000))
729 		{
730 			/* instruct BES to horizontal prescale 0.25x */
731 			minictrl |= 0x00030000;
732 			/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
733 			viscalv >>= 2;
734 		}
735 		else
736 			if ((viscalv > 0x00040000) && (viscalv <= 0x00080000))
737 			{
738 				/* instruct BES to horizontal prescale 0.125x */
739 				minictrl |= 0x00050000;
740 				/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
741 				viscalv >>= 3;
742 			}
743 			else
744 				if ((viscalv > 0x00080000) && (viscalv <= 0x00100000))
745 				{
746 					/* instruct BES to horizontal prescale 0.125x */
747 					minictrl |= 0x00070000;
748 					/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
749 					viscalv >>= 4;
750 				}
751 
752 	/* only instruct normal scaler to scale if it must do so */
753 	scale_y = true;
754 	if (viscalv == 0x00010000) scale_y = false;
755 
756 	/* AND below is required by hardware */
757 	viscalv &= 0x0000ffc0;
758 
759 
760 	/********************************************************************************
761 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
762 	 ********************************************************************************/
763 	eng_bes_calc_move_overlay(&moi);
764 
765 
766 	/*****************************
767 	 *** log color keying info ***
768 	 *****************************/
769 
770 	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
771 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
772 	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
773 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
774 
775 
776 	/*****************
777 	 *** log flags ***
778 	 *****************/
779 
780 	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
781 	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
782 
783 
784 	/*************************************
785 	 *** sync to BES (Back End Scaler) ***
786 	 *************************************/
787 
788 	/* Done in card hardware:
789 	 * double buffered registers + trigger during 'BES-'VBI feature. */
790 
791 
792 	/**************************************
793 	 *** actually program the registers ***
794 	 **************************************/
795 
796 	/* setup clipped(!) buffer startadress in RAM */
797 	/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
798 	 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
799 	/* first include 'pixel precise' left clipping... (top clipping was already included) */
800 	moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
801 	/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
802 	BESW(VID1Y_ADDR0, (moi.a1orgv & 0x07fffffc));
803 
804 	/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
805 	/* notes:
806 	 * - make absolutely sure the engine can fetch the last pixel needed from
807 	 *   the sourcebitmap even if only to generate a tiny subpixel from it!
808 	 * - the engine uses byte format instead of pixel format;
809 	 * - the engine uses 16 bytes, so 8 pixels granularity. */
810 	BESW(VID1_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
811 
812 	/* enable horizontal filtering if asked for */
813 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
814 	{
815 		minictrl |= (1 << 1);
816 		LOG(4,("Overlay: using horizontal interpolation on scaling\n"));
817 	}
818 	/* enable vertical filtering if asked for */
819 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
820 	{
821 		/* vertical interpolation b0, interpolation on Y, Cb and Cr all (b2) */
822 		minictrl |= ((1 << 2) | (1 << 0));
823 		LOG(4,("Overlay: using vertical interpolation on scaling\n"));
824 	}
825 	/* and program horizontal and vertical 'prescaling' for downscaling */
826 	BESW(VID1_MINI_CTL, minictrl);
827 
828 	/* setup buffersize */
829 	BESW(V1_SOURCE_WH, ((ob->height << 16) | (ob->width)));
830 
831 	/* setup buffer source pitch including slopspace (in bytes) */
832 	BESW(VID1_STRIDE, (ob->width * 2));
833 
834 	/* setup output window position */
835 	BESW(VID1_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
836 
837 	/* setup output window size */
838 	BESW(VID1_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
839 
840 	/* setup horizontal and vertical scaling:
841 	 * setup horizontal scaling enable (b31), setup vertical scaling enable (b15).
842 	 * Note:
843 	 * Vertical scaling has a different resolution than horizontal scaling(!).  */
844 	scaleval = 0x00000000;
845 	if (scale_x) scaleval |= 0x80000000;
846 	if (scale_y) scaleval |= 0x00008000;
847 	BESW(VID1_ZOOM, (scaleval | ((hiscalv << 16) >> 5) | (viscalv >> 6)));
848 
849 	if (si->ps.chip_rev < 0x10)
850 	{
851 		/* enable BES (b0), format yuv422 (b2-4 = %000), set colorspace sign (b7 = 1),
852 		 * input is frame (not field) picture (b9 = 0), expire = $5 (b16-19),
853 		 * select field (not frame)(!) base (b24 = 0) */
854 		BESW(VID1_CTL, 0x00050081);
855 	}
856 	else
857 	{
858 		/* enable BES (b0), format yuv422 (b2-4 = %000), set colorspace sign (b7 = 1),
859 		 * input is frame (not field) picture (b9 = 0), expire = $f (b16-19),
860 		 * select field (not frame)(!) base (b24 = 0) */
861 		BESW(VID1_CTL, 0x000f0081);
862 	}
863 
864 
865 	/**************************
866 	 *** setup color keying ***
867 	 **************************/
868 
869 	/* setup colorkeying */
870 	switch(si->dm.space)
871 	{
872 	case B_CMAP8:
873 		{
874 			/* do color palette index lookup for current colorkey */
875 			/* note:
876 			 * since apparantly some hardware works with color indexes instead of colors,
877 			 * it might be a good idea(!!) to include the colorindex in the system's
878 			 * overlay_window struct. */
879 			static uint8 *r,*g,*b;
880 			static uint32 idx;
881 			r = si->color_data;
882 			g = r + 256;
883 			b = g + 256;
884 			/* if index 1 doesn't help us, we assume 0 will (got to program something anyway) */
885 			//fixme, note, tweakalert:
886 			//I'm counting down for a reason:
887 			//BeOS assigns the color white (0x00ffffff) to two indexes in the palette:
888 			//index 0x3f and 0xff. In the framebuffer index 0xff is used (apparantly).
889 			//The hardware compares framebuffer to given key, so the BES must receive 0xff.
890 			for (idx = 255; idx > 0; idx--)
891 			{
892 				if ((r[idx] == ow->red.value) &&
893 					(g[idx] == ow->green.value) &&
894 					(b[idx] == ow->blue.value))
895 						break;
896 			}
897 			LOG(4,("Overlay: colorkey's palette index is $%02x\n", idx));
898 			/* program color palette index into BES engine */
899 			BESW(COLKEY, idx);
900 		}
901 		break;
902 	case B_RGB15_LITTLE:
903 		BESW(COLKEY, (
904 			((ow->blue.value & ow->blue.mask) << 0)   |
905 			((ow->green.value & ow->green.mask) << 5) |
906 			((ow->red.value & ow->red.mask) << 10)
907 			/* alpha keying is not supported here */
908 			));
909 		break;
910 	case B_RGB16_LITTLE:
911 		BESW(COLKEY, (
912 			((ow->blue.value & ow->blue.mask) << 0)   |
913 			((ow->green.value & ow->green.mask) << 5) |
914 			((ow->red.value & ow->red.mask) << 11)
915 			/* this space has no alpha bits */
916 			));
917 		break;
918 	case B_RGB32_LITTLE:
919 	default:
920 		BESW(COLKEY, (
921 			((ow->blue.value & ow->blue.mask) << 0)   |
922 			((ow->green.value & ow->green.mask) << 8) |
923 			((ow->red.value & ow->red.mask) << 16)
924 			/* alpha keying is not supported here */
925 			));
926 		break;
927 	}
928 
929 	/* disable chromakeying (b1 = 0), Vid1 on top of Vid3 (b20 = 0),
930 	 * all registers are loaded during the next 'BES-'VBI (b28 = 1), Vid1 cmds fire (b31 = 1) */
931 	if (ow->flags & B_OVERLAY_COLOR_KEY)
932 	{
933 		/* enable colorkeying (b0 = 1) */
934 		BESW(COMPOSE, 0x90000001);
935 	}
936 	else
937 	{
938 		/* disable colorkeying (b0 = 0) */
939 		BESW(COMPOSE, 0x90000000);
940 	}
941 
942 	/* note that overlay is in use (for eng_bes_move_overlay()) */
943 	si->overlay.active = true;
944 
945 	return B_OK;
946 }
947 
948 status_t eng_release_bes()
949 {
950 	/* setup BES control: disable scaler (b0 = 0) */
951 	BESW(VID1_CTL, 0x00000000);
952 
953 	/* make sure the 'disable' command really gets executed: (no 'VBI' anymore if BES disabled) */
954 	/* all registers are loaded immediately (b29 = 1), Vid1 cmds fire (b31 = 1) */
955 	BESW(COMPOSE, 0xa0000000);
956 
957 	/* note that overlay is not in use (for eng_bes_move_overlay()) */
958 	si->overlay.active = false;
959 
960 	return B_OK;
961 }
962