xref: /haiku/src/add-ons/accelerants/matrox/engine/mga_bes.c (revision d3d8b26997fac34a84981e6d2b649521de2cc45a)
1 /* G200-G550 Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-12/2005 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "mga_std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 hsrcendv;	/* horizontal source end in source buffer (clipping) */
16 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
17 	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
18 };
19 
20 static void gx00_bes_calc_move_overlay(move_overlay_info *moi);
21 static void gx00_bes_program_move_overlay(move_overlay_info moi);
22 
23 /* move the overlay output window in virtualscreens */
24 /* Note:
25  * si->dm.h_display_start and si->dm.v_display_start determine where the new
26  * output window is located! */
27 void gx00_bes_move_overlay()
28 {
29 	move_overlay_info moi;
30 
31 	/* abort if overlay is not active */
32 	if (!si->overlay.active) return;
33 
34 	gx00_bes_calc_move_overlay(&moi);
35 	gx00_bes_program_move_overlay(moi);
36 }
37 
38 static void gx00_bes_calc_move_overlay(move_overlay_info *moi)
39 {
40 	/* misc used variables */
41 	uint16 temp1, temp2;
42 	/* visible screen window in virtual workspaces */
43 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
44 
45 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
46 	 * constraints only */
47 	crtc_hstart = si->dm.h_display_start;
48 	/* make dualhead switch mode with TVout enabled work while we're at it.. */
49 	if (si->switched_crtcs)
50 	{
51 		crtc_hstart += si->dm.timing.h_display;
52 	}
53 	/* horizontal end is the first position beyond the displayed range on the CRTC */
54 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
55 	crtc_vstart = si->dm.v_display_start;
56 	/* vertical end is the first position beyond the displayed range on the CRTC */
57 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
58 
59 
60 	/****************************************
61 	 *** setup all edges of output window ***
62 	 ****************************************/
63 
64 	/* setup left and right edges of output window */
65 	moi->hcoordv = 0;
66 	/* left edge coordinate of output window, must be inside desktop */
67 	/* clipping on the left side */
68 	if (si->overlay.ow.h_start < crtc_hstart)
69 	{
70 		temp1 = 0;
71 	}
72 	else
73 	{
74 		/* clipping on the right side */
75 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
76 		{
77 			/* width < 2 is not allowed */
78 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
79 		}
80 		else
81 		/* no clipping here */
82 		{
83 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
84 		}
85 	}
86 	moi->hcoordv |= temp1 << 16;
87 	/* right edge coordinate of output window, must be inside desktop */
88 	/* width < 2 is not allowed */
89 	if (si->overlay.ow.width < 2)
90 	{
91 		temp2 = (temp1 + 1) & 0x7ff;
92 	}
93 	else
94 	{
95 		/* clipping on the right side */
96 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
97 		{
98 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
99 		}
100 		else
101 		{
102 			/* clipping on the left side */
103 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
104 			{
105 				/* width < 2 is not allowed */
106 				temp2 = 1;
107 			}
108 			else
109 			/* no clipping here */
110 			{
111 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
112 			}
113 		}
114 	}
115 	moi->hcoordv |= temp2 << 0;
116 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
117 
118 	/* setup top and bottom edges of output window */
119 	moi->vcoordv = 0;
120 	/* top edge coordinate of output window, must be inside desktop */
121 	/* clipping on the top side */
122 	if (si->overlay.ow.v_start < crtc_vstart)
123 	{
124 		temp1 = 0;
125 	}
126 	else
127 	{
128 		/* clipping on the bottom side */
129 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
130 		{
131 			/* height < 2 is not allowed */
132 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
133 		}
134 		else
135 		/* no clipping here */
136 		{
137 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
138 		}
139 	}
140 	moi->vcoordv |= temp1 << 16;
141 	/* bottom edge coordinate of output window, must be inside desktop */
142 	/* height < 2 is not allowed */
143 	if (si->overlay.ow.height < 2)
144 	{
145 		temp2 = (temp1 + 1) & 0x7ff;
146 	}
147 	else
148 	{
149 		/* clipping on the bottom side */
150 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
151 		{
152 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
153 		}
154 		else
155 		{
156 			/* clipping on the top side */
157 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
158 			{
159 				/* height < 2 is not allowed */
160 				temp2 = 1;
161 			}
162 			else
163 			/* no clipping here */
164 			{
165 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
166 			}
167 		}
168 	}
169 	moi->vcoordv |= temp2 << 0;
170 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
171 
172 
173 	/*********************************
174 	 *** setup horizontal clipping ***
175 	 *********************************/
176 
177 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
178 	/* Note:
179 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
180 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
181 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
182 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
183 	/* Note also:
184 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
185 	moi->hsrcstv = 0;
186 	/* check for destination horizontal clipping at left side */
187 	if (si->overlay.ow.h_start < crtc_hstart)
188 	{
189 		/* check if entire destination picture is clipping left:
190 		 * (2 pixels will be clamped onscreen at least) */
191 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
192 		{
193 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
194 			moi->hsrcstv += (si->overlay.ow.width - 2);
195 		}
196 		else
197 		{
198 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
199 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
200 		}
201 		LOG(4,("Overlay: clipping left...\n"));
202 
203 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
204 		 * Note that this also already takes care of aligning the value to the BES register! */
205 		moi->hsrcstv *= si->overlay.h_ifactor;
206 	}
207 	/* take zoom into account */
208 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
209 	/* AND below required by hardware */
210 	moi->hsrcstv &= 0x03fffffc;
211 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
212 
213 	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
214 	/* Note:
215 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
216 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
217 	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
218 	/* Note also:
219 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
220 	moi->hsrcendv = 0;
221 	/* check for destination horizontal clipping at right side */
222 	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
223 	{
224 		/* check if entire destination picture is clipping right:
225 		 * (2 pixels will be clamped onscreen at least) */
226 		if (si->overlay.ow.h_start > (crtc_hend - 2))
227 		{
228 			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
229 			moi->hsrcendv += (si->overlay.ow.width - 2);
230 		}
231 		else
232 		{
233 			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
234 			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
235 		}
236 		LOG(4,("Overlay: clipping right...\n"));
237 
238 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
239 		 * Note that this also already takes care of aligning the value to the BES register! */
240 		moi->hsrcendv *= si->overlay.h_ifactor;
241 		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
242 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
243 	}
244 	else
245 	{
246 		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
247 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
248 	}
249 	/* AND below required by hardware */
250 	moi->hsrcendv &= 0x03fffffc;
251 	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
252 
253 
254 	/*******************************
255 	 *** setup vertical clipping ***
256 	 *******************************/
257 
258 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
259 	/* Note: this exists of two parts:
260 	 * 1. setup fractional part (sign is always 'positive');
261 	 * 2. setup relative base_adress, taking clipping on top (and zoom) into account.
262 	 * Both parts are done intertwined below. */
263 	/* Note:
264 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
265 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
266 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
267 	/* Note also:
268 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
269 
270 	/* calculate relative base_adress and 'vertical weight fractional part' */
271 	moi->v1srcstv = 0;
272 	/* calculate origin adress */
273 	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
274 	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
275 	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
276 	/* check for destination vertical clipping at top side */
277 	if (si->overlay.ow.v_start < crtc_vstart)
278 	{
279 		/* check if entire destination picture is clipping at top:
280 		 * (2 pixels will be clamped onscreen at least) */
281 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
282 		{
283 			/* increase source buffer origin with 'fixed value':
284 			 * (integer part of ('total height - 2' of dest. picture in pixels * inverse scaling factor)) *
285 			 * bytes per row source picture */
286 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
287 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
288 		}
289 		else
290 		{
291 			/* increase source buffer origin with:
292 			 * (integer part of (number of destination picture clipping pixels * inverse scaling factor)) *
293 			 * bytes per row source picture */
294 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
295 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
296 		}
297 		LOG(4,("Overlay: clipping at top...\n"));
298 	}
299 	/* take zoom into account */
300 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
301 	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
302 	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
303 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
304 
305 	/* Note:
306 	 * Because all > G200 overlay units will ignore b0-3 of the calculated adress,
307 	 * we do not use the above way for horizontal source positioning.
308 	 * (G200 cards ignore b0-2.)
309 	 * If we did, 8 source-image pixel jumps (in 4:2:2 colorspace) will occur if the picture
310 	 * is shifted horizontally during left clipping on all > G200 cards, while G200 cards
311 	 * will have 4 source-image pixel jumps occuring. */
312 
313 	/* AND below is required by G200-G550 hardware. > G200 cards can have max. 32Mb RAM on board
314 	 * (16Mb on G200 cards). Compatible setting used (between G200 and the rest), this has no
315 	 * downside consequences here. */
316 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
317 	moi->a1orgv &= 0x01fffff0;
318 
319 	/* field 1 weight: AND below required by hardware, also make sure 'sign' is always 'positive' */
320 	moi->v1srcstv &= 0x0000fffc;
321 }
322 
323 static void gx00_bes_program_move_overlay(move_overlay_info moi)
324 {
325 	/*************************************
326 	 *** sync to BES (Back End Scaler) ***
327 	 *************************************/
328 
329 	/* Make sure reprogramming the BES completes before the next retrace occurs,
330 	 * to prevent register-update glitches (double buffer feature). */
331 
332 	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
333 	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
334 	 * this resolution will generate about 180Mhz pixelclock while we can do
335 	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
336 	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
337 	 * screen is set to the highest refreshrate/resolution possible. */
338 	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
339 
340 
341 	/**************************************
342 	 *** actually program the registers ***
343 	 **************************************/
344 
345 	BESW(HCOORD, moi.hcoordv);
346 	BESW(VCOORD, moi.vcoordv);
347 	BESW(HSRCST, moi.hsrcstv);
348 	BESW(HSRCEND, moi.hsrcendv);
349 	BESW(A1ORG, moi.a1orgv);
350 	BESW(V1WGHT, moi.v1srcstv);
351 
352 	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
353 	 * programming the registers above actually costs 180uS here */
354 	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
355 }
356 
357 status_t gx00_configure_bes
358 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
359 {
360 	/* yuy2 (4:2:2) colorspace calculations */
361 	/* Note: Some calculations will have to be modified for other colorspaces if they are incorporated. */
362 
363 	/* Note:
364 	 * in BeOS R5.0.3 and DANO:
365 	 * 'ow->offset_xxx' is always 0, so not used;
366 	 * 'ow->width' and 'ow->height' are the output window size: does not change
367 	 * if window is clipping;
368 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
369 	 * window. These values can be negative: this means the window is clipping
370 	 * at the left or the top of the display, respectively. */
371 
372 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
373 	 * displayed on screen. This is used for the 'hardware zoom' function. */
374 
375 	/* output window position and clipping info for source buffer */
376 	move_overlay_info moi;
377 	/* calculated BES register values */
378 	uint32 	hiscalv, hsrclstv, viscalv, v1srclstv, globctlv, ctlv;
379 	/* interval representation, used for scaling calculations */
380 	uint16 intrep;
381 	/* inverse scaling factor, used for source positioning */
382 	uint32 ifactor;
383 	/* copy of overlay view which has checked valid values */
384 	overlay_view my_ov;
385 
386 	/* Slowdown the G200-G550 BES if the pixelclock is too high for it to cope.
387 	 * This will in fact half the horizontal resolution of the BES with high
388 	 * pixelclocks (by setting a BES hardware 'zoom' = 2x).
389 	 * If you want optimal output quality better make sure you set the refreshrate/resolution
390 	 * of your monitor not too high ... */
391 	uint16 acczoom = 1;
392 	LOG(4,("Overlay: pixelclock is %dkHz, ", si->dm.timing.pixel_clock));
393 	if (si->dm.timing.pixel_clock > BESMAXSPEED)
394 	{
395 		/* BES running at half speed and resolution */
396 		/* This is how it works (BES slowing down):
397 		 * - Activate BES internal horizontal hardware scaling = 4x (in GLOBCTL below),
398 		 * - This also sets up BES only getting half the amount of pixels per line from
399 		 *   the input picture buffer (in effect half-ing the BES pixelclock input speed).
400 		 * Now in order to get the picture back to original size, we need to also double
401 		 * the inverse horizontal scaling factor here (x4 /2 /2 = 1x again).
402 		 * Note that every other pixel is now doubled or interpolated, according to another
403 		 * GLOBCTL bit. */
404 		acczoom = 2;
405 		LOG(4,("slowing down BES!\n"));
406 	}
407 	else
408 	{
409 		/* BES running at full speed and resolution */
410 		LOG(4,("BES is running at full speed\n"));
411 	}
412 
413 
414 	/**************************************************************************************
415 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
416 	 **************************************************************************************/
417 	my_ov = *ov;
418 	/* check for valid 'coordinates' */
419 	if (my_ov.width == 0) my_ov.width++;
420 	if (my_ov.height == 0) my_ov.height++;
421 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
422 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
423 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
424 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
425 	if (my_ov.v_start > (ob->height - 1))
426 		my_ov.v_start = (ob->height - 1);
427 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
428 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
429 
430 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
431 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
432 
433 	/* save for nv_bes_calc_move_overlay() */
434 	si->overlay.ow = *ow;
435 	si->overlay.ob = *ob;
436 	si->overlay.my_ov = my_ov;
437 
438 
439 	/********************************
440 	 *** setup horizontal scaling ***
441 	 ********************************/
442 
443 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
444 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
445 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
446 
447 	/* determine interval representation value, taking zoom into account */
448 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
449 	{
450 		/* horizontal filtering is ON */
451 		if ((my_ov.width == ow->width) | (ow->width < 2))
452 		{
453 			/* no horizontal scaling used, OR destination width < 2 */
454 			intrep = 0;
455 		}
456 		else
457 		{
458 			intrep = 1;
459 		}
460 	}
461 	else
462 	{
463 		/* horizontal filtering is OFF */
464 		if ((ow->width < my_ov.width) & (ow->width >= 2))
465 		{
466 			/* horizontal downscaling used AND destination width >= 2 */
467 			intrep = 1;
468 		}
469 		else
470 		{
471 			intrep = 0;
472 		}
473 	}
474 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
475 
476 	/* calculate inverse horizontal scaling factor, taking zoom into account */
477 	/* standard scaling formula: */
478 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
479 
480 	/* correct factor to prevent most-right visible 'line' from distorting */
481 	ifactor -= (1 << 2);
482 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
483 
484 	/* compensate for accelerated 2x zoom (slowdown BES if pixelclock is too high) */
485 	hiscalv = ifactor * acczoom;
486 	/* save for gx00_bes_calc_move_overlay() */
487 	si->overlay.h_ifactor = ifactor;
488 	LOG(4,("Overlay: horizontal speed compensated factor is %f\n", (float)65536 / hiscalv));
489 
490 	/* check scaling factor (and modify if needed) to be within scaling limits */
491 	if (((((uint32)my_ov.width) << 16) / 16384) > hiscalv)
492 	{
493 		/* (non-inverse) factor too large, set factor to max. valid value */
494 		hiscalv = ((((uint32)my_ov.width) << 16) / 16384);
495 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
496 	}
497 	if (hiscalv >= (32 << 16))
498 	{
499 		/* (non-inverse) factor too small, set factor to min. valid value */
500 		hiscalv = 0x1ffffc;
501 		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
502 	}
503 	/* AND below is required by hardware */
504 	hiscalv &= 0x001ffffc;
505 
506 
507 	/******************************
508 	 *** setup vertical scaling ***
509 	 ******************************/
510 
511 	/* determine interval representation value, taking zoom into account */
512 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
513 	{
514 		/* vertical filtering is ON */
515 		if ((my_ov.height == ow->height) | (ow->height < 2))
516 		{
517 			/* no vertical scaling used, OR destination height < 2 */
518 			intrep = 0;
519 		}
520 		else
521 		{
522 			intrep = 1;
523 		}
524 	}
525 	else
526 	{
527 		/* vertical filtering is OFF */
528 		if ((ow->height < my_ov.height) & (ow->height >= 2))
529 		{
530 			/* vertical downscaling used AND destination height >= 2 */
531 			intrep = 1;
532 		}
533 		else
534 		{
535 			intrep = 0;
536 		}
537 	}
538 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
539 
540 	/* calculate inverse vertical scaling factor, taking zoom into account */
541 	/* standard scaling formula: */
542 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
543 
544 	/* correct factor to prevent lowest visible line from distorting */
545 	ifactor -= (1 << 2);
546 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
547 
548 	/* preserve ifactor for source positioning calculations later on */
549 	viscalv = ifactor;
550 	/* save for gx00_bes_calc_move_overlay() */
551 	si->overlay.v_ifactor = ifactor;
552 
553 	/* check scaling factor (and modify if needed) to be within scaling limits */
554 	if (((((uint32)my_ov.height) << 16) / 16384) > viscalv)
555 	{
556 		/* (non-inverse) factor too large, set factor to max. valid value */
557 		viscalv = ((((uint32)my_ov.height) << 16) / 16384);
558 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
559 	}
560 	if (viscalv >= (32 << 16))
561 	{
562 		/* (non-inverse) factor too small, set factor to min. valid value */
563 		viscalv = 0x1ffffc;
564 		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
565 	}
566 	/* AND below is required by hardware */
567 	viscalv &= 0x001ffffc;
568 
569 
570 	/********************************************************************************
571 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
572 	 ********************************************************************************/
573 	gx00_bes_calc_move_overlay(&moi);
574 
575 
576 	/***************************************
577 	 *** setup misc. source bitmap stuff ***
578 	 ***************************************/
579 
580 	/* setup horizontal source last position excluding slopspace:
581 	 * this is the last pixel that will be used for calculating interpolated pixels */
582 	hsrclstv = ((ob->width - 1) - si->overlay.myBufInfo[offset].slopspace) << 16;
583 	/* AND below required by hardware */
584 	hsrclstv &= 0x03ff0000;
585 
586 	/* setup field 1 (is our complete frame) vertical source last position.
587 	 * this is the last pixel that will be used for calculating interpolated pixels */
588 	v1srclstv = (ob->height - 1);
589 	/* AND below required by hardware */
590 	v1srclstv &= 0x000003ff;
591 
592 
593 	/*****************************
594 	 *** log color keying info ***
595 	 *****************************/
596 
597 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
598 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
599 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
600 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
601 
602 
603 	/*************************
604 	 *** setup BES control ***
605 	 *************************/
606 
607 	/* BES global control: setup functions */
608 	globctlv = 0;
609 
610 	/* slowdown BES if nessesary */
611 	if (acczoom == 1)
612 	{
613 		/* run at full speed and resolution */
614 		globctlv |= 0 << 0;
615 		/* disable filtering for half speed interpolation */
616 		globctlv |= 0 << 1;
617 	}
618 	else
619 	{
620 		/* run at half speed and resolution */
621 		globctlv |= 1 << 0;
622 		/* enable filtering for half speed interpolation */
623 		globctlv |= 1 << 1;
624 	}
625 
626 	/* 4:2:0 specific setup: not needed here */
627 	globctlv |= 0 << 3;
628 	/* BES testregister: keep zero */
629 	globctlv |= 0 << 4;
630 	/* the following bits marked (> G200) *must* be zero on G200: */
631 	/* 4:2:0 specific setup: not needed here (> G200) */
632 	globctlv |= 0 << 5;
633 	/* select yuy2 byte-order to B_YCbCr422 (> G200) */
634 	globctlv |= 0 << 6;
635 	/* BES internal contrast and brighness controls are not used, disabled (> G200) */
636 	globctlv |= 0 << 7;
637 	/* RGB specific setup: not needed here, so disabled (> G200) */
638 	globctlv |= 0 << 8;
639 	globctlv |= 0 << 9;
640 	/* 4:2:0 specific setup: not needed here (> G200) */
641 	globctlv |= 0 << 10;
642 	/* Tell BES when to copy the new register values to the actual active registers.
643 	 * bits 16-27 (12 bits) are the CRTC vert. count value at which copying takes
644 	 * place.
645 	 * (This is the double buffering feature: programming must be completed *before*
646 	 *  the CRTC vert count value set here!) */
647 	/* CRTC vert count for copying = $000, so during retrace, line 0. */
648 	globctlv |= 0x000 << 16;
649 
650 	/* BES control: enable scaler and setup functions */
651 	/* pre-reset all bits */
652 	ctlv = 0;
653 	/* enable BES */
654 	ctlv |= 1 << 0;
655 	/* we start displaying at an even startline (zero) in 'field 1' (no hardware de-interlacing is used) */
656 	ctlv |= 0 << 6;
657 	/* we don't use field 2, so its startline is not important */
658 	ctlv |= 0 << 7;
659 
660 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
661 	/* enable horizontal filtering on scaling if asked for: if we *are* actually scaling */
662 	if ((ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) && (hiscalv != (0x01 << 16)))
663 	{
664 		ctlv |= 1 << 10;
665 		LOG(6,("Overlay: using horizontal interpolation on scaling\n"));
666 	}
667 	else
668 	{
669 		ctlv |= 0 << 10;
670 		LOG(6,("Overlay: using horizontal dropping or replication on scaling\n"));
671 	}
672 	/* enable vertical filtering on scaling if asked for: if we are *upscaling* only */
673 	if ((ow->flags & B_OVERLAY_VERTICAL_FILTERING) && (viscalv < (0x01 << 16)))
674 	{
675 		ctlv |= 1 << 11;
676 		LOG(6,("Overlay: using vertical interpolation on scaling\n"));
677 	}
678 	else
679 	{
680 		ctlv |= 0 << 11;
681 		LOG(6,("Overlay: using vertical dropping or replication on scaling\n"));
682 	}
683 
684 	/* use actual calculated weight for horizontal interpolation */
685 	ctlv |= 0 << 12;
686 	/* use horizontal chroma interpolation upsampling on BES input picture */
687 	ctlv |= 1 << 16;
688 	/* select 4:2:2 BES input format */
689 	ctlv |= 0 << 17;
690 	/* dithering is enabled */
691 	ctlv |= 1 << 18;
692 	/* horizontal mirroring is not used */
693 	ctlv |= 0 << 19;
694 	/* BES output should be in color */
695 	ctlv |= 0 << 20;
696 	/* BES output blanking is disabled: we want a picture, no 'black box'! */
697 	ctlv |= 0 << 21;
698 	/* we do software field select (field select is not used) */
699 	ctlv |= 0 << 24;
700 	/* we always display field 1 in buffer A, this contains our full frames */
701 	/* select field 1 */
702 	ctlv |= 0 << 25;
703 	/* select buffer A */
704 	ctlv |= 0 << 26;
705 
706 
707 	/*************************************
708 	 *** sync to BES (Back End Scaler) ***
709 	 *************************************/
710 
711 	/* Make sure reprogramming the BES completes before the next retrace occurs,
712 	 * to prevent register-update glitches (double buffer feature). */
713 
714 	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
715 	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
716 	 * this resolution will generate about 180Mhz pixelclock while we can do
717 	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
718 	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
719 	 * screen is set to the highest refreshrate/resolution possible. */
720 	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
721 
722 
723 	/**************************************
724 	 *** actually program the registers ***
725 	 **************************************/
726 
727 	BESW(HCOORD, moi.hcoordv);
728 	BESW(VCOORD, moi.vcoordv);
729 	BESW(HISCAL, hiscalv);
730 	BESW(HSRCST, moi.hsrcstv);
731 	BESW(HSRCEND, moi.hsrcendv);
732 	BESW(HSRCLST, hsrclstv);
733 	BESW(VISCAL, viscalv);
734 	BESW(A1ORG, moi.a1orgv);
735 	BESW(V1WGHT, moi.v1srcstv);
736 	BESW(V1SRCLST, v1srclstv);
737 	BESW(GLOBCTL, globctlv);
738 	BESW(CTL, ctlv);
739 
740 
741 	/**************************
742 	 *** setup color keying ***
743 	 **************************/
744 
745 	/* setup colorkeying */
746 	DXIW(COLKEY, (ow->alpha.value & ow->alpha.mask));
747 
748 	DXIW(COLKEY0RED, (ow->red.value & ow->red.mask));
749 	DXIW(COLKEY0GREEN, (ow->green.value & ow->green.mask));
750 	DXIW(COLKEY0BLUE, (ow->blue.value & ow->blue.mask));
751 
752 	DXIW(COLMSK, ow->alpha.mask);
753 
754 	DXIW(COLMSK0RED, ow->red.mask);
755 	DXIW(COLMSK0GREEN, ow->green.mask);
756 	DXIW(COLMSK0BLUE, ow->blue.mask);
757 
758 	/* setup colorkeying */
759 	if (ow->flags & B_OVERLAY_COLOR_KEY)
760 		DXIW(KEYOPMODE,0x01);
761 	else
762 		DXIW(KEYOPMODE,0x00);
763 
764 
765 	/*************************
766 	 *** setup misc. stuff ***
767 	 *************************/
768 
769 	/* setup brightness and contrast to be 'neutral' (this is not implemented on G200) */
770 	BESW(LUMACTL, 0x00000080);
771 
772 	/* setup source pitch including slopspace (in pixels); AND is required by hardware */
773 	BESW(PITCH, (ob->width & 0x00000fff));
774 
775 	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
776 	 * programming the registers above actually costs 180uS here */
777 	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
778 
779 	/* note that overlay is in use (for gx00_bes_move_overlay()) */
780 	si->overlay.active = true;
781 
782 	return B_OK;
783 }
784 
785 status_t gx00_release_bes()
786 {
787 	/* setup BES control: disable scaler */
788 	BESW(CTL, 0x00000000);
789 
790 	/* note that overlay is not in use (for gx00_bes_move_overlay()) */
791 	si->overlay.active = false;
792 
793 	return B_OK;
794 }
795