xref: /haiku/src/add-ons/accelerants/matrox/engine/mga_bes.c (revision 13581b3d2a71545960b98fefebc5225b5bf29072)
1 /* G200-G550 Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-11/2009 */
3 
4 #define MODULE_BIT 0x00000200
5 
6 #include "mga_std.h"
7 
8 typedef struct move_overlay_info move_overlay_info;
9 
10 struct move_overlay_info
11 {
12 	uint32 hcoordv;		/* left and right edges of video output window */
13 	uint32 vcoordv;		/* top and bottom edges of video output window */
14 	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15 	uint32 hsrcendv;	/* horizontal source end in source buffer (clipping) */
16 	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
17 	uintptr_t a1orgv;	/* alternate source clipping via startadress of source buffer */
18 };
19 
20 static void gx00_bes_calc_move_overlay(move_overlay_info *moi);
21 static void gx00_bes_program_move_overlay(move_overlay_info moi);
22 
23 /* move the overlay output window in virtualscreens */
24 /* Note:
25  * si->dm.h_display_start and si->dm.v_display_start determine where the new
26  * output window is located! */
27 void gx00_bes_move_overlay()
28 {
29 	move_overlay_info moi;
30 
31 	/* abort if overlay is not active */
32 	if (!si->overlay.active) return;
33 
34 	gx00_bes_calc_move_overlay(&moi);
35 	gx00_bes_program_move_overlay(moi);
36 }
37 
38 static void gx00_bes_calc_move_overlay(move_overlay_info *moi)
39 {
40 	/* misc used variables */
41 	uint16 temp1, temp2;
42 	/* visible screen window in virtual workspaces */
43 	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
44 
45 	/* the BES does not respect virtual_workspaces, but adheres to CRTC
46 	 * constraints only */
47 	crtc_hstart = si->dm.h_display_start;
48 	/* make dualhead switch mode with TVout enabled work while we're at it.. */
49 	if (si->switched_crtcs)
50 	{
51 		crtc_hstart += si->dm.timing.h_display;
52 	}
53 	/* horizontal end is the first position beyond the displayed range on the CRTC */
54 	crtc_hend = crtc_hstart + si->dm.timing.h_display;
55 	crtc_vstart = si->dm.v_display_start;
56 	/* vertical end is the first position beyond the displayed range on the CRTC */
57 	crtc_vend = crtc_vstart + si->dm.timing.v_display;
58 
59 
60 	/****************************************
61 	 *** setup all edges of output window ***
62 	 ****************************************/
63 
64 	/* setup left and right edges of output window */
65 	moi->hcoordv = 0;
66 	/* left edge coordinate of output window, must be inside desktop */
67 	/* clipping on the left side */
68 	if (si->overlay.ow.h_start < crtc_hstart)
69 	{
70 		temp1 = 0;
71 	}
72 	else
73 	{
74 		/* clipping on the right side */
75 		if (si->overlay.ow.h_start >= (crtc_hend - 1))
76 		{
77 			/* width < 2 is not allowed */
78 			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
79 		}
80 		else
81 		/* no clipping here */
82 		{
83 			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
84 		}
85 	}
86 	moi->hcoordv |= temp1 << 16;
87 	/* right edge coordinate of output window, must be inside desktop */
88 	/* width < 2 is not allowed */
89 	if (si->overlay.ow.width < 2)
90 	{
91 		temp2 = (temp1 + 1) & 0x7ff;
92 	}
93 	else
94 	{
95 		/* clipping on the right side */
96 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
97 		{
98 			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
99 		}
100 		else
101 		{
102 			/* clipping on the left side */
103 			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
104 			{
105 				/* width < 2 is not allowed */
106 				temp2 = 1;
107 			}
108 			else
109 			/* no clipping here */
110 			{
111 				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
112 			}
113 		}
114 	}
115 	moi->hcoordv |= temp2 << 0;
116 	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
117 
118 	/* setup top and bottom edges of output window */
119 	moi->vcoordv = 0;
120 	/* top edge coordinate of output window, must be inside desktop */
121 	/* clipping on the top side */
122 	if (si->overlay.ow.v_start < crtc_vstart)
123 	{
124 		temp1 = 0;
125 	}
126 	else
127 	{
128 		/* clipping on the bottom side */
129 		if (si->overlay.ow.v_start >= (crtc_vend - 1))
130 		{
131 			/* height < 2 is not allowed */
132 			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
133 		}
134 		else
135 		/* no clipping here */
136 		{
137 			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
138 		}
139 	}
140 	moi->vcoordv |= temp1 << 16;
141 	/* bottom edge coordinate of output window, must be inside desktop */
142 	/* height < 2 is not allowed */
143 	if (si->overlay.ow.height < 2)
144 	{
145 		temp2 = (temp1 + 1) & 0x7ff;
146 	}
147 	else
148 	{
149 		/* clipping on the bottom side */
150 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
151 		{
152 			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
153 		}
154 		else
155 		{
156 			/* clipping on the top side */
157 			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
158 			{
159 				/* height < 2 is not allowed */
160 				temp2 = 1;
161 			}
162 			else
163 			/* no clipping here */
164 			{
165 				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
166 			}
167 		}
168 	}
169 	moi->vcoordv |= temp2 << 0;
170 	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
171 
172 
173 	/*********************************
174 	 *** setup horizontal clipping ***
175 	 *********************************/
176 
177 	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
178 	/* Note:
179 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
180 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
181 	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
182 	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
183 	/* Note also:
184 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
185 	moi->hsrcstv = 0;
186 	/* check for destination horizontal clipping at left side */
187 	if (si->overlay.ow.h_start < crtc_hstart)
188 	{
189 		/* check if entire destination picture is clipping left:
190 		 * (2 pixels will be clamped onscreen at least) */
191 		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
192 		{
193 			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
194 			moi->hsrcstv += (si->overlay.ow.width - 2);
195 		}
196 		else
197 		{
198 			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
199 			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
200 		}
201 		LOG(4,("Overlay: clipping left...\n"));
202 
203 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
204 		 * Note that this also already takes care of aligning the value to the BES register! */
205 		moi->hsrcstv *= si->overlay.h_ifactor;
206 	}
207 	/* take zoom into account */
208 	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
209 	/* AND below required by hardware */
210 	moi->hsrcstv &= 0x07fffffc;
211 	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
212 
213 	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
214 	/* Note:
215 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
216 	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
217 	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
218 	/* Note also:
219 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
220 	moi->hsrcendv = 0;
221 	/* check for destination horizontal clipping at right side */
222 	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
223 	{
224 		/* check if entire destination picture is clipping right:
225 		 * (2 pixels will be clamped onscreen at least) */
226 		if (si->overlay.ow.h_start > (crtc_hend - 2))
227 		{
228 			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
229 			moi->hsrcendv += (si->overlay.ow.width - 2);
230 		}
231 		else
232 		{
233 			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
234 			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
235 		}
236 		LOG(4,("Overlay: clipping right...\n"));
237 
238 		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
239 		 * Note that this also already takes care of aligning the value to the BES register! */
240 		moi->hsrcendv *= si->overlay.h_ifactor;
241 		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
242 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
243 	}
244 	else
245 	{
246 		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
247 		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
248 	}
249 	/* AND below required by hardware (confirmed G200 can do upto 1024 pixels, G450 and G550 can do above.) */
250 	moi->hsrcendv &= 0x07fffffc;
251 	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
252 
253 
254 	/*******************************
255 	 *** setup vertical clipping ***
256 	 *******************************/
257 
258 	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
259 	/* Note: this exists of two parts:
260 	 * 1. setup fractional part (sign is always 'positive');
261 	 * 2. setup relative base_adress, taking clipping on top (and zoom) into account.
262 	 * Both parts are done intertwined below. */
263 	/* Note:
264 	 * The method is to calculate, based on 1:1 scaling, based on the output window.
265 	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
266 	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
267 	/* Note also:
268 	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
269 
270 	/* calculate relative base_adress and 'vertical weight fractional part' */
271 	moi->v1srcstv = 0;
272 	/* calculate origin adress */
273 	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
274 	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
275 	LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
276 	/* check for destination vertical clipping at top side */
277 	if (si->overlay.ow.v_start < crtc_vstart)
278 	{
279 		/* check if entire destination picture is clipping at top:
280 		 * (2 pixels will be clamped onscreen at least) */
281 		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
282 		{
283 			/* increase source buffer origin with 'fixed value':
284 			 * (integer part of ('total height - 2' of dest. picture in pixels * inverse scaling factor)) *
285 			 * bytes per row source picture */
286 			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
287 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
288 		} else {
289 			/* increase source buffer origin with:
290 			 * (integer part of (number of destination picture clipping pixels * inverse scaling factor)) *
291 			 * bytes per row source picture */
292 			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
293 			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
294 		}
295 		LOG(4,("Overlay: clipping at top...\n"));
296 	}
297 	/* take zoom into account */
298 	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
299 	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
300 	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
301 	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
302 
303 	/* Note:
304 	 * Because all > G200 overlay units will ignore b0-3 of the calculated adress,
305 	 * we do not use the above way for horizontal source positioning.
306 	 * (G200 cards ignore b0-2.)
307 	 * If we did, 8 source-image pixel jumps (in 4:2:2 colorspace) will occur if the picture
308 	 * is shifted horizontally during left clipping on all > G200 cards, while G200 cards
309 	 * will have 4 source-image pixel jumps occuring. */
310 
311 	/* AND below is required by G200-G550 hardware. > G200 cards can have max. 32Mb RAM on board
312 	 * (16Mb on G200 cards). Compatible setting used (between G200 and the rest), this has no
313 	 * downside consequences here. */
314 	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
315 	moi->a1orgv &= 0x01fffff0;
316 
317 	/* field 1 weight: AND below required by hardware, also make sure 'sign' is always 'positive' */
318 	moi->v1srcstv &= 0x0000fffc;
319 }
320 
321 static void gx00_bes_program_move_overlay(move_overlay_info moi)
322 {
323 	/*************************************
324 	 *** sync to BES (Back End Scaler) ***
325 	 *************************************/
326 
327 	/* Make sure reprogramming the BES completes before the next retrace occurs,
328 	 * to prevent register-update glitches (double buffer feature). */
329 
330 	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
331 	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
332 	 * this resolution will generate about 180Mhz pixelclock while we can do
333 	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
334 	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
335 	 * screen is set to the highest refreshrate/resolution possible. */
336 	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
337 
338 
339 	/**************************************
340 	 *** actually program the registers ***
341 	 **************************************/
342 
343 	BESW(HCOORD, moi.hcoordv);
344 	BESW(VCOORD, moi.vcoordv);
345 	BESW(HSRCST, moi.hsrcstv);
346 	BESW(HSRCEND, moi.hsrcendv);
347 	BESW(A1ORG, moi.a1orgv);
348 	BESW(V1WGHT, moi.v1srcstv);
349 
350 	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
351 	 * programming the registers above actually costs 180uS here */
352 	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
353 }
354 
355 status_t gx00_configure_bes
356 	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
357 {
358 	/* yuy2 (4:2:2) colorspace calculations */
359 	/* Note: Some calculations will have to be modified for other colorspaces if they are incorporated. */
360 
361 	/* Note:
362 	 * in BeOS R5.0.3 and DANO:
363 	 * 'ow->offset_xxx' is always 0, so not used;
364 	 * 'ow->width' and 'ow->height' are the output window size: does not change
365 	 * if window is clipping;
366 	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
367 	 * window. These values can be negative: this means the window is clipping
368 	 * at the left or the top of the display, respectively. */
369 
370 	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
371 	 * displayed on screen. This is used for the 'hardware zoom' function. */
372 
373 	/* output window position and clipping info for source buffer */
374 	move_overlay_info moi;
375 	/* calculated BES register values */
376 	uint32 	hiscalv, hsrclstv, viscalv, v1srclstv, globctlv, ctlv;
377 	/* interval representation, used for scaling calculations */
378 	uint16 intrep;
379 	/* inverse scaling factor, used for source positioning */
380 	uint32 ifactor;
381 	/* copy of overlay view which has checked valid values */
382 	overlay_view my_ov;
383 
384 	/* Slowdown the G200-G550 BES if the pixelclock is too high for it to cope.
385 	 * This will in fact half the horizontal resolution of the BES with high
386 	 * pixelclocks (by setting a BES hardware 'zoom' = 2x).
387 	 * If you want optimal output quality better make sure you set the refreshrate/resolution
388 	 * of your monitor not too high ... */
389 	uint16 acczoom = 1;
390 	LOG(4,("Overlay: pixelclock is %dkHz, ", si->dm.timing.pixel_clock));
391 	if (si->dm.timing.pixel_clock > BESMAXSPEED)
392 	{
393 		/* BES running at half speed and resolution */
394 		/* This is how it works (BES slowing down):
395 		 * - Activate BES internal horizontal hardware scaling = 4x (in GLOBCTL below),
396 		 * - This also sets up BES only getting half the amount of pixels per line from
397 		 *   the input picture buffer (in effect half-ing the BES pixelclock input speed).
398 		 * Now in order to get the picture back to original size, we need to also double
399 		 * the inverse horizontal scaling factor here (x4 /2 /2 = 1x again).
400 		 * Note that every other pixel is now doubled or interpolated, according to another
401 		 * GLOBCTL bit. */
402 		acczoom = 2;
403 		LOG(4,("slowing down BES!\n"));
404 	}
405 	else
406 	{
407 		/* BES running at full speed and resolution */
408 		LOG(4,("BES is running at full speed\n"));
409 	}
410 
411 
412 	/**************************************************************************************
413 	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
414 	 **************************************************************************************/
415 	my_ov = *ov;
416 	/* check for valid 'coordinates' */
417 	if (my_ov.width == 0) my_ov.width++;
418 	if (my_ov.height == 0) my_ov.height++;
419 	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
420 		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
421 	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
422 		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
423 	if (my_ov.v_start > (ob->height - 1))
424 		my_ov.v_start = (ob->height - 1);
425 	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
426 		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
427 
428 	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
429 		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
430 
431 	/* save for nv_bes_calc_move_overlay() */
432 	si->overlay.ow = *ow;
433 	si->overlay.ob = *ob;
434 	si->overlay.my_ov = my_ov;
435 
436 
437 	/********************************
438 	 *** setup horizontal scaling ***
439 	 ********************************/
440 
441 	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
442 			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
443 	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
444 
445 	/* determine interval representation value, taking zoom into account */
446 	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
447 	{
448 		/* horizontal filtering is ON */
449 		if ((my_ov.width == ow->width) | (ow->width < 2))
450 		{
451 			/* no horizontal scaling used, OR destination width < 2 */
452 			intrep = 0;
453 		}
454 		else
455 		{
456 			intrep = 1;
457 		}
458 	}
459 	else
460 	{
461 		/* horizontal filtering is OFF */
462 		if ((ow->width < my_ov.width) & (ow->width >= 2))
463 		{
464 			/* horizontal downscaling used AND destination width >= 2 */
465 			intrep = 1;
466 		}
467 		else
468 		{
469 			intrep = 0;
470 		}
471 	}
472 	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
473 
474 	/* calculate inverse horizontal scaling factor, taking zoom into account */
475 	/* standard scaling formula: */
476 	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
477 
478 	/* correct factor to prevent most-right visible 'line' from distorting */
479 	ifactor -= (1 << 2);
480 	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
481 
482 	/* compensate for accelerated 2x zoom (slowdown BES if pixelclock is too high) */
483 	hiscalv = ifactor * acczoom;
484 	/* save for gx00_bes_calc_move_overlay() */
485 	si->overlay.h_ifactor = ifactor;
486 	LOG(4,("Overlay: horizontal speed compensated factor is %f\n", (float)65536 / hiscalv));
487 
488 	/* check scaling factor (and modify if needed) to be within scaling limits */
489 	if (((((uint32)my_ov.width) << 16) / 16384) > hiscalv)
490 	{
491 		/* (non-inverse) factor too large, set factor to max. valid value */
492 		hiscalv = ((((uint32)my_ov.width) << 16) / 16384);
493 		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
494 	}
495 	if (hiscalv >= (32 << 16))
496 	{
497 		/* (non-inverse) factor too small, set factor to min. valid value */
498 		hiscalv = 0x1ffffc;
499 		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
500 	}
501 	/* AND below is required by hardware */
502 	hiscalv &= 0x001ffffc;
503 
504 
505 	/******************************
506 	 *** setup vertical scaling ***
507 	 ******************************/
508 
509 	/* determine interval representation value, taking zoom into account */
510 	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
511 	{
512 		/* vertical filtering is ON */
513 		if ((my_ov.height == ow->height) | (ow->height < 2))
514 		{
515 			/* no vertical scaling used, OR destination height < 2 */
516 			intrep = 0;
517 		}
518 		else
519 		{
520 			intrep = 1;
521 		}
522 	}
523 	else
524 	{
525 		/* vertical filtering is OFF */
526 		if ((ow->height < my_ov.height) & (ow->height >= 2))
527 		{
528 			/* vertical downscaling used AND destination height >= 2 */
529 			intrep = 1;
530 		}
531 		else
532 		{
533 			intrep = 0;
534 		}
535 	}
536 	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
537 
538 	/* calculate inverse vertical scaling factor, taking zoom into account */
539 	/* standard scaling formula: */
540 	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
541 
542 	/* correct factor to prevent lowest visible line from distorting */
543 	ifactor -= (1 << 2);
544 	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
545 
546 	/* preserve ifactor for source positioning calculations later on */
547 	viscalv = ifactor;
548 	/* save for gx00_bes_calc_move_overlay() */
549 	si->overlay.v_ifactor = ifactor;
550 
551 	/* check scaling factor (and modify if needed) to be within scaling limits */
552 	if (((((uint32)my_ov.height) << 16) / 16384) > viscalv)
553 	{
554 		/* (non-inverse) factor too large, set factor to max. valid value */
555 		viscalv = ((((uint32)my_ov.height) << 16) / 16384);
556 		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
557 	}
558 	if (viscalv >= (32 << 16))
559 	{
560 		/* (non-inverse) factor too small, set factor to min. valid value */
561 		viscalv = 0x1ffffc;
562 		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
563 	}
564 	/* AND below is required by hardware */
565 	viscalv &= 0x001ffffc;
566 
567 
568 	/********************************************************************************
569 	 *** setup all edges of output window, setup horizontal and vertical clipping ***
570 	 ********************************************************************************/
571 	gx00_bes_calc_move_overlay(&moi);
572 
573 
574 	/***************************************
575 	 *** setup misc. source bitmap stuff ***
576 	 ***************************************/
577 
578 	/* setup horizontal source last position excluding slopspace:
579 	 * this is the last pixel that will be used for calculating interpolated pixels */
580 	hsrclstv = ((ob->width - 1) - si->overlay.myBufInfo[offset].slopspace) << 16;
581 	/* AND below required by hardware */
582 	hsrclstv &= 0x07ff0000;
583 
584 	/* setup field 1 (is our complete frame) vertical source last position.
585 	 * this is the last pixel that will be used for calculating interpolated pixels */
586 	v1srclstv = (ob->height - 1);
587 	/* AND below required by hardware */
588 	v1srclstv &= 0x000007ff;
589 
590 
591 	/*****************************
592 	 *** log color keying info ***
593 	 *****************************/
594 
595 	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
596 		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
597 	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
598 		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
599 
600 
601 	/*************************
602 	 *** setup BES control ***
603 	 *************************/
604 
605 	/* BES global control: setup functions */
606 	globctlv = 0;
607 
608 	/* slowdown BES if nessesary */
609 	if (acczoom == 1)
610 	{
611 		/* run at full speed and resolution */
612 		globctlv |= 0 << 0;
613 		/* disable filtering for half speed interpolation */
614 		globctlv |= 0 << 1;
615 	}
616 	else
617 	{
618 		/* run at half speed and resolution */
619 		globctlv |= 1 << 0;
620 		/* enable filtering for half speed interpolation */
621 		globctlv |= 1 << 1;
622 	}
623 
624 	/* 4:2:0 specific setup: not needed here */
625 	globctlv |= 0 << 3;
626 	/* BES testregister: keep zero */
627 	globctlv |= 0 << 4;
628 	/* the following bits marked (> G200) *must* be zero on G200: */
629 	/* 4:2:0 specific setup: not needed here (> G200) */
630 	globctlv |= 0 << 5;
631 	/* select yuy2 byte-order to B_YCbCr422 (> G200) */
632 	globctlv |= 0 << 6;
633 	/* BES internal contrast and brighness controls are not used, disabled (> G200) */
634 	globctlv |= 0 << 7;
635 	/* RGB specific setup: not needed here, so disabled (> G200) */
636 	globctlv |= 0 << 8;
637 	globctlv |= 0 << 9;
638 	/* 4:2:0 specific setup: not needed here (> G200) */
639 	globctlv |= 0 << 10;
640 	/* Tell BES when to copy the new register values to the actual active registers.
641 	 * bits 16-27 (12 bits) are the CRTC vert. count value at which copying takes
642 	 * place.
643 	 * (This is the double buffering feature: programming must be completed *before*
644 	 *  the CRTC vert count value set here!) */
645 	/* CRTC vert count for copying = $000, so during retrace, line 0. */
646 	globctlv |= 0x000 << 16;
647 
648 	/* BES control: enable scaler and setup functions */
649 	/* pre-reset all bits */
650 	ctlv = 0;
651 	/* enable BES */
652 	ctlv |= 1 << 0;
653 	/* we start displaying at an even startline (zero) in 'field 1' (no hardware de-interlacing is used) */
654 	ctlv |= 0 << 6;
655 	/* we don't use field 2, so its startline is not important */
656 	ctlv |= 0 << 7;
657 
658 	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
659 	/* enable horizontal filtering on scaling if asked for: if we *are* actually scaling */
660 	if ((ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) && (hiscalv != (0x01 << 16)))
661 	{
662 		ctlv |= 1 << 10;
663 		LOG(6,("Overlay: using horizontal interpolation on scaling\n"));
664 	}
665 	else
666 	{
667 		ctlv |= 0 << 10;
668 		LOG(6,("Overlay: using horizontal dropping or replication on scaling\n"));
669 	}
670 	/* enable vertical filtering on scaling if asked for: if we are *upscaling* only */
671 	if ((ow->flags & B_OVERLAY_VERTICAL_FILTERING) && (viscalv < (0x01 << 16)) && (ob->width <= 1024))	{
672 		ctlv |= 1 << 11;
673 		LOG(6,("Overlay: using vertical interpolation on scaling\n"));
674 	} else {
675 		ctlv |= 0 << 11;
676 		LOG(6,("Overlay: using vertical dropping or replication on scaling\n"));
677 	}
678 
679 	/* use actual calculated weight for horizontal interpolation */
680 	ctlv |= 0 << 12;
681 	/* use horizontal chroma interpolation upsampling on BES input picture */
682 	ctlv |= 1 << 16;
683 	/* select 4:2:2 BES input format */
684 	ctlv |= 0 << 17;
685 	/* dithering is enabled */
686 	ctlv |= 1 << 18;
687 	/* horizontal mirroring is not used */
688 	ctlv |= 0 << 19;
689 	/* BES output should be in color */
690 	ctlv |= 0 << 20;
691 	/* BES output blanking is disabled: we want a picture, no 'black box'! */
692 	ctlv |= 0 << 21;
693 	/* we do software field select (field select is not used) */
694 	ctlv |= 0 << 24;
695 	/* we always display field 1 in buffer A, this contains our full frames */
696 	/* select field 1 */
697 	ctlv |= 0 << 25;
698 	/* select buffer A */
699 	ctlv |= 0 << 26;
700 
701 
702 	/*************************************
703 	 *** sync to BES (Back End Scaler) ***
704 	 *************************************/
705 
706 	/* Make sure reprogramming the BES completes before the next retrace occurs,
707 	 * to prevent register-update glitches (double buffer feature). */
708 
709 	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
710 	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
711 	 * this resolution will generate about 180Mhz pixelclock while we can do
712 	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
713 	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
714 	 * screen is set to the highest refreshrate/resolution possible. */
715 	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
716 
717 
718 	/**************************************
719 	 *** actually program the registers ***
720 	 **************************************/
721 
722 	BESW(HCOORD, moi.hcoordv);
723 	BESW(VCOORD, moi.vcoordv);
724 	BESW(HISCAL, hiscalv);
725 	BESW(HSRCST, moi.hsrcstv);
726 	BESW(HSRCEND, moi.hsrcendv);
727 	BESW(HSRCLST, hsrclstv);
728 	BESW(VISCAL, viscalv);
729 	BESW(A1ORG, moi.a1orgv);
730 	BESW(V1WGHT, moi.v1srcstv);
731 	BESW(V1SRCLST, v1srclstv);
732 	BESW(GLOBCTL, globctlv);
733 	BESW(CTL, ctlv);
734 
735 
736 	/**************************
737 	 *** setup color keying ***
738 	 **************************/
739 
740 	/* setup colorkeying */
741 	DXIW(COLKEY, (ow->alpha.value & ow->alpha.mask));
742 
743 	DXIW(COLKEY0RED, (ow->red.value & ow->red.mask));
744 	DXIW(COLKEY0GREEN, (ow->green.value & ow->green.mask));
745 	DXIW(COLKEY0BLUE, (ow->blue.value & ow->blue.mask));
746 
747 	DXIW(COLMSK, ow->alpha.mask);
748 
749 	DXIW(COLMSK0RED, ow->red.mask);
750 	DXIW(COLMSK0GREEN, ow->green.mask);
751 	DXIW(COLMSK0BLUE, ow->blue.mask);
752 
753 	/* setup colorkeying */
754 	if (ow->flags & B_OVERLAY_COLOR_KEY)
755 		DXIW(KEYOPMODE,0x01);
756 	else
757 		DXIW(KEYOPMODE,0x00);
758 
759 
760 	/*************************
761 	 *** setup misc. stuff ***
762 	 *************************/
763 
764 	/* setup brightness and contrast to be 'neutral' (this is not implemented on G200) */
765 	BESW(LUMACTL, 0x00000080);
766 
767 	/* setup source pitch including slopspace (in pixels); AND is required by hardware */
768 	BESW(PITCH, (ob->width & 0x00000fff));
769 
770 	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
771 	 * programming the registers above actually costs 180uS here */
772 	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
773 
774 	/* note that overlay is in use (for gx00_bes_move_overlay()) */
775 	si->overlay.active = true;
776 
777 	return B_OK;
778 }
779 
780 status_t gx00_release_bes()
781 {
782 	/* setup BES control: disable scaler */
783 	BESW(CTL, 0x00000000);
784 
785 	/* note that overlay is not in use (for gx00_bes_move_overlay()) */
786 	si->overlay.active = false;
787 
788 	return B_OK;
789 }
790