xref: /haiku/src/add-ons/accelerants/intel_extreme/overlay.cpp (revision 529cd177b573aaba391c8adc9c9f5ad76a14bf81)
1 /*
2  * Copyright 2006-2009, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *
8  * The phase coefficient computation was taken from the X driver written by
9  * Alan Hourihane and David Dawes.
10  */
11 
12 
13 #include "accelerant.h"
14 #include "accelerant_protos.h"
15 #include "commands.h"
16 
17 #include <Debug.h>
18 #include <math.h>
19 #include <stdlib.h>
20 #include <string.h>
21 
22 #include <AGP.h>
23 
24 
25 #undef TRACE
26 //#define TRACE_OVERLAY
27 #ifdef TRACE_OVERLAY
28 #	define TRACE(x...) _sPrintf("intel_extreme accelerant:" x)
29 #else
30 #	define TRACE(x...)
31 #endif
32 
33 #define ERROR(x...) _sPrintf("intel_extreme accelerant: " x)
34 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
35 
36 
37 #define NUM_HORIZONTAL_TAPS		5
38 #define NUM_VERTICAL_TAPS		3
39 #define NUM_HORIZONTAL_UV_TAPS	3
40 #define NUM_VERTICAL_UV_TAPS	3
41 #define NUM_PHASES				17
42 #define MAX_TAPS				5
43 
44 struct phase_coefficient {
45 	uint8	sign;
46 	uint8	exponent;
47 	uint16	mantissa;
48 };
49 
50 
51 /*!	Splits the coefficient floating point value into the 3 components
52 	sign, mantissa, and exponent.
53 */
54 static bool
55 split_coefficient(double &coefficient, int32 mantissaSize,
56 	phase_coefficient &splitCoefficient)
57 {
58 	double absCoefficient = fabs(coefficient);
59 
60 	int sign;
61 	if (coefficient < 0.0)
62 		sign = 1;
63 	else
64 		sign = 0;
65 
66 	int32 intCoefficient, res;
67 	int32 maxValue = 1 << mantissaSize;
68 	res = 12 - mantissaSize;
69 
70 	if ((intCoefficient = (int)(absCoefficient * 4 * maxValue + 0.5))
71 			< maxValue) {
72 		splitCoefficient.exponent = 3;
73 		splitCoefficient.mantissa = intCoefficient << res;
74 		coefficient = (double)intCoefficient / (double)(4 * maxValue);
75 	} else if ((intCoefficient = (int)(absCoefficient * 2 * maxValue + 0.5))
76 			< maxValue) {
77 		splitCoefficient.exponent = 2;
78 		splitCoefficient.mantissa = intCoefficient << res;
79 		coefficient = (double)intCoefficient / (double)(2 * maxValue);
80 	} else if ((intCoefficient = (int)(absCoefficient * maxValue + 0.5))
81 			< maxValue) {
82 		splitCoefficient.exponent = 1;
83 		splitCoefficient.mantissa = intCoefficient << res;
84 		coefficient = (double)intCoefficient / (double)maxValue;
85 	} else if ((intCoefficient = (int)(absCoefficient * maxValue * 0.5 + 0.5))
86 			< maxValue) {
87 		splitCoefficient.exponent = 0;
88 		splitCoefficient.mantissa = intCoefficient << res;
89 		coefficient = (double)intCoefficient / (double)(maxValue / 2);
90 	} else {
91 		// coefficient out of range
92 		return false;
93 	}
94 
95 	splitCoefficient.sign = sign;
96 	if (sign)
97 		coefficient = -coefficient;
98 
99 	return true;
100 }
101 
102 
103 static void
104 update_coefficients(int32 taps, double filterCutOff, bool horizontal, bool isY,
105 	phase_coefficient* splitCoefficients)
106 {
107 	if (filterCutOff < 1)
108 		filterCutOff = 1;
109 	if (filterCutOff > 3)
110 		filterCutOff = 3;
111 
112 	bool isVerticalUV = !horizontal && !isY;
113 	int32 mantissaSize = horizontal ? 7 : 6;
114 
115 	double rawCoefficients[MAX_TAPS * 32], coefficients[NUM_PHASES][MAX_TAPS];
116 
117 	int32 num = taps * 16;
118 	for (int32 i = 0; i < num * 2; i++) {
119 		double sinc;
120 		double value = (1.0 / filterCutOff) * taps * M_PI * (i - num)
121 			/ (2 * num);
122 		if (value == 0.0)
123 			sinc = 1.0;
124 		else
125 			sinc = sin(value) / value;
126 
127 		// Hamming window
128 		double window = (0.5 - 0.5 * cos(i * M_PI / num));
129 		rawCoefficients[i] = sinc * window;
130 	}
131 
132 	for (int32 i = 0; i < NUM_PHASES; i++) {
133 		// Normalise the coefficients
134 		double sum = 0.0;
135 		int32 pos;
136 		for (int32 j = 0; j < taps; j++) {
137 			pos = i + j * 32;
138 			sum += rawCoefficients[pos];
139 		}
140 		for (int32 j = 0; j < taps; j++) {
141 			pos = i + j * 32;
142 			coefficients[i][j] = rawCoefficients[pos] / sum;
143 		}
144 
145 		// split them into sign/mantissa/exponent
146 		for (int32 j = 0; j < taps; j++) {
147 			pos = j + i * taps;
148 
149 			split_coefficient(coefficients[i][j], mantissaSize
150 				+ (((j == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0),
151 				splitCoefficients[pos]);
152 		}
153 
154 		int32 tapAdjust[MAX_TAPS];
155 		tapAdjust[0] = (taps - 1) / 2;
156 		for (int32 j = 1, k = 1; j <= tapAdjust[0]; j++, k++) {
157 			tapAdjust[k] = tapAdjust[0] - j;
158 			tapAdjust[++k] = tapAdjust[0] + j;
159 		}
160 
161 		// Adjust the coefficients
162 		sum = 0.0;
163 		for (int32 j = 0; j < taps; j++) {
164 			sum += coefficients[i][j];
165 		}
166 
167 		if (sum != 1.0) {
168 			for (int32 k = 0; k < taps; k++) {
169 				int32 tap2Fix = tapAdjust[k];
170 				double diff = 1.0 - sum;
171 
172 				coefficients[i][tap2Fix] += diff;
173 				pos = tap2Fix + i * taps;
174 
175 				split_coefficient(coefficients[i][tap2Fix], mantissaSize
176 					+ (((tap2Fix == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0),
177 					splitCoefficients[pos]);
178 
179 				sum = 0.0;
180 				for (int32 j = 0; j < taps; j++) {
181 					sum += coefficients[i][j];
182 				}
183 				if (sum == 1.0)
184 					break;
185 			}
186 		}
187 	}
188 }
189 
190 
191 static void
192 set_color_key(uint8 red, uint8 green, uint8 blue, uint8 redMask,
193 	uint8 greenMask, uint8 blueMask)
194 {
195 	overlay_registers* registers = gInfo->overlay_registers;
196 
197 	registers->color_key_red = red;
198 	registers->color_key_green = green;
199 	registers->color_key_blue = blue;
200 	registers->color_key_mask_red = ~redMask;
201 	registers->color_key_mask_green = ~greenMask;
202 	registers->color_key_mask_blue = ~blueMask;
203 	registers->color_key_enabled = true;
204 }
205 
206 
207 static void
208 set_color_key(const overlay_window* window)
209 {
210 	switch (gInfo->shared_info->current_mode.space) {
211 		case B_CMAP8:
212 			set_color_key(0, 0, window->blue.value, 0x0, 0x0, 0xff);
213 			break;
214 		case B_RGB15:
215 			set_color_key(window->red.value << 3, window->green.value << 3,
216 				window->blue.value << 3, window->red.mask << 3,
217 				window->green.mask << 3, window->blue.mask << 3);
218 			break;
219 		case B_RGB16:
220 			set_color_key(window->red.value << 3, window->green.value << 2,
221 				window->blue.value << 3, window->red.mask << 3,
222 				window->green.mask << 2, window->blue.mask << 3);
223 			break;
224 
225 		default:
226 			set_color_key(window->red.value, window->green.value,
227 				window->blue.value, window->red.mask, window->green.mask,
228 				window->blue.mask);
229 			break;
230 	}
231 }
232 
233 
234 static void
235 update_overlay(bool updateCoefficients)
236 {
237 	if (!gInfo->shared_info->overlay_active
238 		|| gInfo->shared_info->device_type.InGroup(INTEL_TYPE_965))
239 		return;
240 
241 	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
242 	queue.PutFlush();
243 	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
244 	queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, updateCoefficients);
245 
246 	// make sure the flip is done now
247 	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
248 	queue.PutFlush();
249 
250 	TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
251 		__func__, read32(INTEL_OVERLAY_UPDATE),
252 		read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS),
253 		*(((uint32*)gInfo->overlay_registers) + 0x68/4), read32(0x30168),
254 		read32(0x2024));
255 }
256 
257 
258 static void
259 show_overlay(void)
260 {
261 	if (gInfo->shared_info->overlay_active
262 		|| gInfo->shared_info->device_type.InGroup(INTEL_TYPE_965))
263 		return;
264 
265 	gInfo->shared_info->overlay_active = true;
266 	gInfo->overlay_registers->overlay_enabled = true;
267 
268 	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
269 	queue.PutOverlayFlip(COMMAND_OVERLAY_ON, true);
270 	queue.PutFlush();
271 
272 	TRACE("%s: UP: %lx, TST: %lx, ST: %lx, CMD: %lx (%lx), ERR: %lx\n",
273 		__func__, read32(INTEL_OVERLAY_UPDATE),
274 		read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS),
275 		*(((uint32*)gInfo->overlay_registers) + 0x68/4),
276 		read32(0x30168), read32(0x2024));
277 }
278 
279 
280 static void
281 hide_overlay(void)
282 {
283 	if (!gInfo->shared_info->overlay_active
284 		|| gInfo->shared_info->device_type.InGroup(INTEL_TYPE_965))
285 		return;
286 
287 	overlay_registers* registers = gInfo->overlay_registers;
288 
289 	gInfo->shared_info->overlay_active = false;
290 	registers->overlay_enabled = false;
291 
292 	QueueCommands queue(gInfo->shared_info->primary_ring_buffer);
293 
294 	// flush pending commands
295 	queue.PutFlush();
296 	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
297 
298 	// clear overlay enabled bit
299 	queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, false);
300 	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
301 
302 	// turn off overlay engine
303 	queue.PutOverlayFlip(COMMAND_OVERLAY_OFF, false);
304 	queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP);
305 
306 	gInfo->current_overlay = NULL;
307 }
308 
309 
310 //	#pragma mark -
311 
312 
313 uint32
314 intel_overlay_count(const display_mode* mode)
315 {
316 	// TODO: make this depending on the amount of RAM and the screen mode
317 	// (and we could even have more than one when using 3D as well)
318 	return 1;
319 }
320 
321 
322 const uint32*
323 intel_overlay_supported_spaces(const display_mode* mode)
324 {
325 	static const uint32 kSupportedSpaces[] = {B_RGB15, B_RGB16, B_RGB32,
326 		B_YCbCr422, 0};
327 	static const uint32 kSupportedi965Spaces[] = {B_YCbCr422, 0};
328 	intel_shared_info &sharedInfo = *gInfo->shared_info;
329 
330 	if (sharedInfo.device_type.InGroup(INTEL_TYPE_96x))
331 		return kSupportedi965Spaces;
332 
333 	return kSupportedSpaces;
334 }
335 
336 
337 uint32
338 intel_overlay_supported_features(uint32 colorSpace)
339 {
340 	return B_OVERLAY_COLOR_KEY
341 		| B_OVERLAY_HORIZONTAL_FILTERING
342 		| B_OVERLAY_VERTICAL_FILTERING
343 		| B_OVERLAY_HORIZONTAL_MIRRORING;
344 }
345 
346 
347 const overlay_buffer*
348 intel_allocate_overlay_buffer(color_space colorSpace, uint16 width,
349 	uint16 height)
350 {
351 	TRACE("%s(width %u, height %u, colorSpace %lu)\n", __func__, width,
352 		height, colorSpace);
353 
354 	intel_shared_info &sharedInfo = *gInfo->shared_info;
355 	uint32 bytesPerPixel;
356 
357 	switch (colorSpace) {
358 		case B_RGB15:
359 			bytesPerPixel = 2;
360 			break;
361 		case B_RGB16:
362 			bytesPerPixel = 2;
363 			break;
364 		case B_RGB32:
365 			bytesPerPixel = 4;
366 			break;
367 		case B_YCbCr422:
368 			bytesPerPixel = 2;
369 			break;
370 		default:
371 			return NULL;
372 	}
373 
374 	struct overlay* overlay = (struct overlay*)malloc(sizeof(struct overlay));
375 	if (overlay == NULL)
376 		return NULL;
377 
378 	// TODO: locking!
379 
380 	// alloc graphics mem
381 
382 	int32 alignment = 0x3f;
383 	if (sharedInfo.device_type.InGroup(INTEL_TYPE_965))
384 		alignment = 0xff;
385 
386 	overlay_buffer* buffer = &overlay->buffer;
387 	buffer->space = colorSpace;
388 	buffer->width = width;
389 	buffer->height = height;
390 	buffer->bytes_per_row = (width * bytesPerPixel + alignment) & ~alignment;
391 
392 	status_t status = intel_allocate_memory(buffer->bytes_per_row * height,
393 		0, overlay->buffer_base);
394 	if (status < B_OK) {
395 		free(overlay);
396 		return NULL;
397 	}
398 
399 	if (sharedInfo.device_type.InGroup(INTEL_TYPE_965)) {
400 		status = intel_allocate_memory(INTEL_i965_OVERLAY_STATE_SIZE,
401 			B_APERTURE_NON_RESERVED, overlay->state_base);
402 		if (status < B_OK) {
403 			intel_free_memory(overlay->buffer_base);
404 			free(overlay);
405 			return NULL;
406 		}
407 
408 		overlay->state_offset = overlay->state_base
409 			- (addr_t)gInfo->shared_info->graphics_memory;
410 	}
411 
412 	overlay->buffer_offset = overlay->buffer_base
413 		- (addr_t)gInfo->shared_info->graphics_memory;
414 
415 	buffer->buffer = (uint8*)overlay->buffer_base;
416 	buffer->buffer_dma = (uint8*)gInfo->shared_info->physical_graphics_memory
417 		+ overlay->buffer_offset;
418 
419 	TRACE("%s: base=%x, offset=%x, address=%x, physical address=%x\n",
420 		__func__, overlay->buffer_base, overlay->buffer_offset,
421 		buffer->buffer, buffer->buffer_dma);
422 
423 	return buffer;
424 }
425 
426 
427 status_t
428 intel_release_overlay_buffer(const overlay_buffer* buffer)
429 {
430 	CALLED();
431 
432 	struct overlay* overlay = (struct overlay*)buffer;
433 
434 	// TODO: locking!
435 
436 	if (gInfo->current_overlay == overlay)
437 		hide_overlay();
438 
439 	intel_free_memory(overlay->buffer_base);
440 	if (gInfo->shared_info->device_type.InGroup(INTEL_TYPE_965))
441 		intel_free_memory(overlay->state_base);
442 	free(overlay);
443 
444 	return B_OK;
445 }
446 
447 
448 status_t
449 intel_get_overlay_constraints(const display_mode* mode,
450 	const overlay_buffer* buffer, overlay_constraints* constraints)
451 {
452 	CALLED();
453 
454 	// taken from the Radeon driver...
455 
456 	// scaler input restrictions
457 	// TODO: check all these values; most of them are probably too restrictive
458 
459 	// position
460 	constraints->view.h_alignment = 0;
461 	constraints->view.v_alignment = 0;
462 
463 	// alignment
464 	switch (buffer->space) {
465 		case B_RGB15:
466 			constraints->view.width_alignment = 7;
467 			break;
468 		case B_RGB16:
469 			constraints->view.width_alignment = 7;
470 			break;
471 		case B_RGB32:
472 			constraints->view.width_alignment = 3;
473 			break;
474 		case B_YCbCr422:
475 			constraints->view.width_alignment = 7;
476 			break;
477 		case B_YUV12:
478 			constraints->view.width_alignment = 7;
479 			break;
480 		default:
481 			return B_BAD_VALUE;
482 	}
483 	constraints->view.height_alignment = 0;
484 
485 	// size
486 	constraints->view.width.min = 4;		// make 4-tap filter happy
487 	constraints->view.height.min = 4;
488 	constraints->view.width.max = buffer->width;
489 	constraints->view.height.max = buffer->height;
490 
491 	// scaler output restrictions
492 	constraints->window.h_alignment = 0;
493 	constraints->window.v_alignment = 0;
494 	constraints->window.width_alignment = 0;
495 	constraints->window.height_alignment = 0;
496 	constraints->window.width.min = 2;
497 	constraints->window.width.max = mode->virtual_width;
498 	constraints->window.height.min = 2;
499 	constraints->window.height.max = mode->virtual_height;
500 
501 	// TODO: the minimum values are not tested
502 	constraints->h_scale.min = 1.0f / (1 << 4);
503 	constraints->h_scale.max = buffer->width * 7;
504 	constraints->v_scale.min = 1.0f / (1 << 4);
505 	constraints->v_scale.max = buffer->height * 7;
506 
507 	return B_OK;
508 }
509 
510 
511 overlay_token
512 intel_allocate_overlay(void)
513 {
514 	CALLED();
515 
516 	// we only have a single overlay channel
517 	if (atomic_or(&gInfo->shared_info->overlay_channel_used, 1) != 0)
518 		return NULL;
519 
520 	return (overlay_token)++gInfo->shared_info->overlay_token;
521 }
522 
523 
524 status_t
525 intel_release_overlay(overlay_token overlayToken)
526 {
527 	CALLED();
528 
529 	// we only have a single token, which simplifies this
530 	if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token)
531 		return B_BAD_VALUE;
532 
533 	atomic_and(&gInfo->shared_info->overlay_channel_used, 0);
534 
535 	return B_OK;
536 }
537 
538 
539 status_t
540 intel_configure_overlay(overlay_token overlayToken,
541 	const overlay_buffer* buffer, const overlay_window* window,
542 	const overlay_view* view)
543 {
544 	CALLED();
545 
546 	if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token)
547 		return B_BAD_VALUE;
548 
549 	if (window == NULL || view == NULL) {
550 		hide_overlay();
551 		return B_OK;
552 	}
553 
554 	struct overlay* overlay = (struct overlay*)buffer;
555 	overlay_registers* registers = gInfo->overlay_registers;
556 	bool updateCoefficients = false;
557 	uint32 bytesPerPixel = 2;
558 
559 	switch (buffer->space) {
560 		case B_RGB15:
561 			registers->source_format = OVERLAY_FORMAT_RGB15;
562 			break;
563 		case B_RGB16:
564 			registers->source_format = OVERLAY_FORMAT_RGB16;
565 			break;
566 		case B_RGB32:
567 			registers->source_format = OVERLAY_FORMAT_RGB32;
568 			bytesPerPixel = 4;
569 			break;
570 		case B_YCbCr422:
571 			registers->source_format = OVERLAY_FORMAT_YCbCr422;
572 			break;
573 	}
574 
575 	if (!gInfo->shared_info->overlay_active
576 		|| memcmp(&gInfo->last_overlay_view, view, sizeof(overlay_view))
577 		|| memcmp(&gInfo->last_overlay_frame, window, sizeof(overlay_frame))) {
578 		// scaling has changed, program window and scaling factor
579 
580 		// clip the window to on screen bounds
581 		// TODO: this is not yet complete or correct - especially if we start
582 		// to support moving the display!
583 		int32 left, top, right, bottom;
584 		left = window->h_start;
585 		right = window->h_start + window->width;
586 		top = window->v_start;
587 		bottom = window->v_start + window->height;
588 		if (left < 0)
589 			left = 0;
590 		if (top < 0)
591 			top = 0;
592 		if (right > gInfo->shared_info->current_mode.timing.h_display)
593 			right = gInfo->shared_info->current_mode.timing.h_display;
594 		if (bottom > gInfo->shared_info->current_mode.timing.v_display)
595 			bottom = gInfo->shared_info->current_mode.timing.v_display;
596 		if (left >= right || top >= bottom) {
597 			// overlay is not within visible bounds
598 			hide_overlay();
599 			return B_OK;
600 		}
601 
602 		registers->window_left = left;
603 		registers->window_top = top;
604 		registers->window_width = right - left;
605 		registers->window_height = bottom - top;
606 
607 		uint32 horizontalScale = (view->width << 12) / window->width;
608 		uint32 verticalScale = (view->height << 12) / window->height;
609 		uint32 horizontalScaleUV = horizontalScale >> 1;
610 		uint32 verticalScaleUV = verticalScale >> 1;
611 		horizontalScale = horizontalScaleUV << 1;
612 		verticalScale = verticalScaleUV << 1;
613 
614 		// we need to offset the overlay view to adapt it to the clipping
615 		// (in addition to whatever offset is desired already)
616 		left = view->h_start - (int32)((window->h_start - left)
617 			* (horizontalScale / 4096.0) + 0.5);
618 		top = view->v_start - (int32)((window->v_start - top)
619 			* (verticalScale / 4096.0) + 0.5);
620 		right = view->h_start + view->width;
621 		bottom = view->v_start + view->height;
622 
623 		gInfo->overlay_position_buffer_offset = buffer->bytes_per_row * top
624 			+ left * bytesPerPixel;
625 
626 		// Note: in non-planar mode, you *must* not program the source
627 		// width/height UV registers - they must stay cleared, or the chip is
628 		// doing strange stuff.
629 		// On the other hand, you have to program the UV scaling registers, or
630 		// the result will be wrong, too.
631 		registers->source_width_rgb = right - left;
632 		registers->source_height_rgb = bottom - top;
633 		if (gInfo->shared_info->device_type.InFamily(INTEL_TYPE_8xx)) {
634 			registers->source_bytes_per_row_rgb = (((overlay->buffer_offset
635 				+ (view->width << 1) + 0x1f) >> 5)
636 				- (overlay->buffer_offset >> 5) - 1) << 2;
637 		} else {
638 			int yaddress = overlay->buffer_offset;
639 			int yswidth = view->width << 1;
640 			registers->source_bytes_per_row_rgb = (((((yaddress
641 				+ yswidth + 0x3f) >> 6) - (yaddress >> 6)) << 1) - 1) << 2;
642 		}
643 
644 		// horizontal scaling
645 		registers->scale_rgb.horizontal_downscale_factor
646 			= horizontalScale >> 12;
647 		registers->scale_rgb.horizontal_scale_fraction
648 			= horizontalScale & 0xfff;
649 		registers->scale_uv.horizontal_downscale_factor
650 			= horizontalScaleUV >> 12;
651 		registers->scale_uv.horizontal_scale_fraction
652 			= horizontalScaleUV & 0xfff;
653 
654 		// vertical scaling
655 		registers->scale_rgb.vertical_scale_fraction = verticalScale & 0xfff;
656 		registers->scale_uv.vertical_scale_fraction = verticalScaleUV & 0xfff;
657 		registers->vertical_scale_rgb = verticalScale >> 12;
658 		registers->vertical_scale_uv = verticalScaleUV >> 12;
659 
660 		TRACE("scale: h = %ld.%ld, v = %ld.%ld\n", horizontalScale >> 12,
661 			horizontalScale & 0xfff, verticalScale >> 12,
662 			verticalScale & 0xfff);
663 
664 		if (verticalScale != gInfo->last_vertical_overlay_scale
665 			|| horizontalScale != gInfo->last_horizontal_overlay_scale) {
666 			// Recompute phase coefficients (taken from X driver)
667 			updateCoefficients = true;
668 
669 			phase_coefficient coefficients[NUM_HORIZONTAL_TAPS * NUM_PHASES];
670 			update_coefficients(NUM_HORIZONTAL_TAPS, horizontalScale / 4096.0,
671 				true, true, coefficients);
672 
673 			phase_coefficient coefficientsUV[
674 				NUM_HORIZONTAL_UV_TAPS * NUM_PHASES];
675 			update_coefficients(NUM_HORIZONTAL_UV_TAPS,
676 				horizontalScaleUV / 4096.0, true, false, coefficientsUV);
677 
678 			int32 pos = 0;
679 			for (int32 i = 0; i < NUM_PHASES; i++) {
680 				for (int32 j = 0; j < NUM_HORIZONTAL_TAPS; j++) {
681 					registers->horizontal_coefficients_rgb[pos]
682 						= coefficients[pos].sign << 15
683 							| coefficients[pos].exponent << 12
684 							| coefficients[pos].mantissa;
685 					pos++;
686 				}
687 			}
688 
689 			pos = 0;
690 			for (int32 i = 0; i < NUM_PHASES; i++) {
691 				for (int32 j = 0; j < NUM_HORIZONTAL_UV_TAPS; j++) {
692 					registers->horizontal_coefficients_uv[pos]
693 						= coefficientsUV[pos].sign << 15
694 							| coefficientsUV[pos].exponent << 12
695 							| coefficientsUV[pos].mantissa;
696 					pos++;
697 				}
698 			}
699 
700 			gInfo->last_vertical_overlay_scale = verticalScale;
701 			gInfo->last_horizontal_overlay_scale = horizontalScale;
702 		}
703 
704 		gInfo->last_overlay_view = *view;
705 		gInfo->last_overlay_frame = *(overlay_frame*)window;
706 	}
707 
708 	registers->color_control_output_mode = true;
709 	registers->select_pipe = 0;
710 
711 	// program buffer
712 
713 	registers->buffer_rgb0
714 		= overlay->buffer_offset + gInfo->overlay_position_buffer_offset;
715 	registers->stride_rgb = buffer->bytes_per_row;
716 
717 	registers->mirroring_mode
718 		= (window->flags & B_OVERLAY_HORIZONTAL_MIRRORING) != 0
719 			? OVERLAY_MIRROR_HORIZONTAL : OVERLAY_MIRROR_NORMAL;
720 	registers->ycbcr422_order = 0;
721 
722 	if (!gInfo->shared_info->overlay_active) {
723 		// overlay is shown for the first time
724 		set_color_key(window);
725 		show_overlay();
726 	} else
727 		update_overlay(updateCoefficients);
728 
729 	gInfo->current_overlay = overlay;
730 	return B_OK;
731 }
732 
733