1 /* 2 * Copyright 2006, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Axel Dörfler, axeld@pinc-software.de 7 * 8 * The phase coefficient computation was taken from the X driver written by 9 * Alan Hourihane and David Dawes. 10 */ 11 12 13 #include "accelerant.h" 14 #include "accelerant_protos.h" 15 #include "commands.h" 16 17 #include <math.h> 18 #include <stdlib.h> 19 20 21 //#define TRACE_OVERLAY 22 #ifdef TRACE_OVERLAY 23 extern "C" void _sPrintf(const char *format, ...); 24 # define TRACE(x) _sPrintf x 25 #else 26 # define TRACE(x) ; 27 #endif 28 29 30 #define NUM_HORIZONTAL_TAPS 5 31 #define NUM_VERTICAL_TAPS 3 32 #define NUM_HORIZONTAL_UV_TAPS 3 33 #define NUM_VERTICAL_UV_TAPS 3 34 #define NUM_PHASES 17 35 #define MAX_TAPS 5 36 37 struct phase_coefficient { 38 uint8 sign; 39 uint8 exponent; 40 uint16 mantissa; 41 }; 42 43 44 /*! 45 Splits the coefficient floating point value into the 3 components 46 sign, mantissa, and exponent. 47 */ 48 static bool 49 split_coefficient(double &coefficient, int32 mantissaSize, 50 phase_coefficient &splitCoefficient) 51 { 52 double absCoefficient = fabs(coefficient); 53 54 int sign; 55 if (coefficient < 0.0) 56 sign = 1; 57 else 58 sign = 0; 59 60 int32 intCoefficient, res; 61 int32 maxValue = 1 << mantissaSize; 62 res = 12 - mantissaSize; 63 64 if ((intCoefficient = (int)(absCoefficient * 4 * maxValue + 0.5)) < maxValue) { 65 splitCoefficient.exponent = 3; 66 splitCoefficient.mantissa = intCoefficient << res; 67 coefficient = (double)intCoefficient / (double)(4 * maxValue); 68 } else if ((intCoefficient = (int)(absCoefficient * 2 * maxValue + 0.5)) < maxValue) { 69 splitCoefficient.exponent = 2; 70 splitCoefficient.mantissa = intCoefficient << res; 71 coefficient = (double)intCoefficient / (double)(2 * maxValue); 72 } else if ((intCoefficient = (int)(absCoefficient * maxValue + 0.5)) < maxValue) { 73 splitCoefficient.exponent = 1; 74 splitCoefficient.mantissa = intCoefficient << res; 75 coefficient = (double)intCoefficient / (double)maxValue; 76 } else if ((intCoefficient = (int)(absCoefficient * maxValue * 0.5 + 0.5)) < maxValue) { 77 splitCoefficient.exponent = 0; 78 splitCoefficient.mantissa = intCoefficient << res; 79 coefficient = (double)intCoefficient / (double)(maxValue / 2); 80 } else { 81 // coefficient out of range 82 return false; 83 } 84 85 splitCoefficient.sign = sign; 86 if (sign) 87 coefficient = -coefficient; 88 89 return true; 90 } 91 92 93 static void 94 update_coefficients(int32 taps, double filterCutOff, bool horizontal, bool isY, 95 phase_coefficient *splitCoefficients) 96 { 97 if (filterCutOff < 1) 98 filterCutOff = 1; 99 if (filterCutOff > 3) 100 filterCutOff = 3; 101 102 bool isVerticalUV = !horizontal && !isY; 103 int32 mantissaSize = horizontal ? 7 : 6; 104 105 double rawCoefficients[MAX_TAPS * 32], coefficients[NUM_PHASES][MAX_TAPS]; 106 107 int32 num = taps * 16; 108 for (int32 i = 0; i < num * 2; i++) { 109 double sinc; 110 double value = (1.0 / filterCutOff) * taps * PI * (i - num) / (2 * num); 111 if (value == 0.0) 112 sinc = 1.0; 113 else 114 sinc = sin(value) / value; 115 116 // Hamming window 117 double window = (0.5 - 0.5 * cos(i * PI / num)); 118 rawCoefficients[i] = sinc * window; 119 } 120 121 for (int32 i = 0; i < NUM_PHASES; i++) { 122 // Normalise the coefficients 123 double sum = 0.0; 124 int32 pos; 125 for (int32 j = 0; j < taps; j++) { 126 pos = i + j * 32; 127 sum += rawCoefficients[pos]; 128 } 129 for (int32 j = 0; j < taps; j++) { 130 pos = i + j * 32; 131 coefficients[i][j] = rawCoefficients[pos] / sum; 132 } 133 134 // split them into sign/mantissa/exponent 135 for (int32 j = 0; j < taps; j++) { 136 pos = j + i * taps; 137 138 split_coefficient(coefficients[i][j], mantissaSize 139 + (((j == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0), 140 splitCoefficients[pos]); 141 } 142 143 int32 tapAdjust[MAX_TAPS]; 144 tapAdjust[0] = (taps - 1) / 2; 145 for (int32 j = 1, k = 1; j <= tapAdjust[0]; j++, k++) { 146 tapAdjust[k] = tapAdjust[0] - j; 147 tapAdjust[++k] = tapAdjust[0] + j; 148 } 149 150 // Adjust the coefficients 151 sum = 0.0; 152 for (int32 j = 0; j < taps; j++) { 153 sum += coefficients[i][j]; 154 } 155 156 if (sum != 1.0) { 157 for (int32 k = 0; k < taps; k++) { 158 int32 tap2Fix = tapAdjust[k]; 159 double diff = 1.0 - sum; 160 161 coefficients[i][tap2Fix] += diff; 162 pos = tap2Fix + i * taps; 163 164 split_coefficient(coefficients[i][tap2Fix], mantissaSize 165 + (((tap2Fix == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0), 166 splitCoefficients[pos]); 167 168 sum = 0.0; 169 for (int32 j = 0; j < taps; j++) { 170 sum += coefficients[i][j]; 171 } 172 if (sum == 1.0) 173 break; 174 } 175 } 176 } 177 } 178 179 180 static void 181 set_color_key(uint8 red, uint8 green, uint8 blue, 182 uint8 redMask, uint8 greenMask, uint8 blueMask) 183 { 184 overlay_registers *registers = gInfo->overlay_registers; 185 186 registers->color_key_red = red; 187 registers->color_key_green = green; 188 registers->color_key_blue = blue; 189 registers->color_key_mask_red = ~redMask; 190 registers->color_key_mask_green = ~greenMask; 191 registers->color_key_mask_blue = ~blueMask; 192 registers->color_key_enabled = true; 193 } 194 195 196 static void 197 set_color_key(const overlay_window *window) 198 { 199 switch (gInfo->shared_info->current_mode.space) { 200 case B_CMAP8: 201 set_color_key(0, 0, window->blue.value, 0x0, 0x0, 0xff); 202 break; 203 case B_RGB15: 204 set_color_key(window->red.value << 3, window->green.value << 3, 205 window->blue.value << 3, window->red.mask << 3, window->green.mask << 3, 206 window->blue.mask << 3); 207 break; 208 case B_RGB16: 209 set_color_key(window->red.value << 3, window->green.value << 2, 210 window->blue.value << 3, window->red.mask << 3, window->green.mask << 2, 211 window->blue.mask << 3); 212 break; 213 214 default: 215 set_color_key(window->red.value, window->green.value, 216 window->blue.value, window->red.mask, window->green.mask, 217 window->blue.mask); 218 break; 219 } 220 } 221 222 223 static void 224 update_overlay(bool updateCoefficients) 225 { 226 if (!gInfo->shared_info->overlay_active) 227 return; 228 229 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 230 queue.PutFlush(); 231 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 232 queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, updateCoefficients); 233 234 // TRACE(("update overlay: UPDATE: %lx, TEST: %lx, STATUS: %lx, EXTENDED_STATUS: %lx\n", 235 // read32(INTEL_OVERLAY_UPDATE), read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS), 236 // read32(INTEL_OVERLAY_EXTENDED_STATUS))); 237 } 238 239 240 static void 241 show_overlay(void) 242 { 243 if (gInfo->shared_info->overlay_active) 244 return; 245 246 overlay_registers *registers = gInfo->overlay_registers; 247 248 gInfo->shared_info->overlay_active = true; 249 registers->overlay_enabled = true; 250 251 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 252 queue.PutFlush(); 253 queue.PutOverlayFlip(COMMAND_OVERLAY_ON, true); 254 } 255 256 257 static void 258 hide_overlay(void) 259 { 260 if (!gInfo->shared_info->overlay_active) 261 return; 262 263 overlay_registers *registers = gInfo->overlay_registers; 264 265 gInfo->shared_info->overlay_active = false; 266 registers->overlay_enabled = false; 267 268 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 269 270 // flush pending commands 271 queue.PutFlush(); 272 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 273 274 // clear overlay enabled bit 275 queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, false); 276 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 277 278 // turn off overlay engine 279 queue.PutOverlayFlip(COMMAND_OVERLAY_OFF, false); 280 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 281 282 gInfo->current_overlay = NULL; 283 } 284 285 286 // #pragma mark - 287 288 289 uint32 290 intel_overlay_count(const display_mode *mode) 291 { 292 // TODO: make this depending on the amount of RAM and the screen mode 293 return 1; 294 } 295 296 297 const uint32 * 298 intel_overlay_supported_spaces(const display_mode *mode) 299 { 300 static const uint32 kSupportedSpaces[] = {B_RGB15, B_RGB16, B_RGB32, B_YCbCr422, 0}; 301 302 return kSupportedSpaces; 303 } 304 305 306 uint32 307 intel_overlay_supported_features(uint32 colorSpace) 308 { 309 return B_OVERLAY_COLOR_KEY 310 | B_OVERLAY_HORIZONTAL_FILTERING 311 | B_OVERLAY_VERTICAL_FILTERING 312 | B_OVERLAY_HORIZONTAL_MIRRORING; 313 } 314 315 316 const overlay_buffer * 317 intel_allocate_overlay_buffer(color_space colorSpace, uint16 width, 318 uint16 height) 319 { 320 TRACE(("intel_allocate_overlay_buffer(width %u, height %u, colorSpace %lu)\n", 321 width, height, colorSpace)); 322 323 uint32 bytesPerPixel; 324 325 switch (colorSpace) { 326 case B_RGB15: 327 bytesPerPixel = 2; 328 break; 329 case B_RGB16: 330 bytesPerPixel = 2; 331 break; 332 case B_RGB32: 333 bytesPerPixel = 4; 334 break; 335 case B_YCbCr422: 336 bytesPerPixel = 2; 337 break; 338 default: 339 return NULL; 340 } 341 342 struct overlay *overlay = (struct overlay *)malloc(sizeof(struct overlay)); 343 if (overlay == NULL) 344 return NULL; 345 346 // TODO: locking! 347 348 // alloc graphics mem 349 overlay_buffer *buffer = &overlay->buffer; 350 351 buffer->space = colorSpace; 352 buffer->width = width; 353 buffer->height = height; 354 buffer->bytes_per_row = (width * bytesPerPixel + 0x3f) & ~0x3f; 355 356 status_t status = intel_allocate_memory(buffer->bytes_per_row * height, 357 overlay->buffer_handle, overlay->buffer_offset); 358 if (status < B_OK) { 359 free(overlay); 360 return NULL; 361 } 362 363 buffer->buffer = gInfo->shared_info->graphics_memory + overlay->buffer_offset; 364 buffer->buffer_dma = gInfo->shared_info->physical_graphics_memory + overlay->buffer_offset; 365 366 TRACE(("allocated overlay buffer: handle=%x, offset=%x, address=%x, physical address=%x\n", 367 overlay->buffer_handle, overlay->buffer_offset, buffer->buffer, buffer->buffer_dma)); 368 369 return buffer; 370 } 371 372 373 status_t 374 intel_release_overlay_buffer(const overlay_buffer *buffer) 375 { 376 TRACE(("intel_release_overlay_buffer(buffer %p)\n", buffer)); 377 378 struct overlay *overlay = (struct overlay *)buffer; 379 380 // TODO: locking! 381 382 if (gInfo->current_overlay == overlay) 383 hide_overlay(); 384 385 intel_free_memory(overlay->buffer_handle); 386 free(overlay); 387 388 return B_OK; 389 } 390 391 392 status_t 393 intel_get_overlay_constraints(const display_mode *mode, const overlay_buffer *buffer, 394 overlay_constraints *constraints) 395 { 396 TRACE(("intel_get_overlay_constraints(buffer %p)\n", buffer)); 397 398 // taken from the Radeon driver... 399 400 // scaler input restrictions 401 // TODO: check all these values; most of them are probably too restrictive 402 403 // position 404 constraints->view.h_alignment = 0; 405 constraints->view.v_alignment = 0; 406 407 // alignment 408 switch (buffer->space) { 409 case B_RGB15: 410 constraints->view.width_alignment = 7; 411 break; 412 case B_RGB16: 413 constraints->view.width_alignment = 7; 414 break; 415 case B_RGB32: 416 constraints->view.width_alignment = 3; 417 break; 418 case B_YCbCr422: 419 constraints->view.width_alignment = 7; 420 break; 421 case B_YUV12: 422 constraints->view.width_alignment = 7; 423 default: 424 return B_BAD_VALUE; 425 } 426 constraints->view.height_alignment = 0; 427 428 // size 429 constraints->view.width.min = 4; // make 4-tap filter happy 430 constraints->view.height.min = 4; 431 constraints->view.width.max = buffer->width; 432 constraints->view.height.max = buffer->height; 433 434 // scaler output restrictions 435 constraints->window.h_alignment = 0; 436 constraints->window.v_alignment = 0; 437 constraints->window.width_alignment = 0; 438 constraints->window.height_alignment = 0; 439 constraints->window.width.min = 2; 440 constraints->window.width.max = mode->virtual_width; 441 constraints->window.height.min = 2; 442 constraints->window.height.max = mode->virtual_height; 443 444 // TODO: the minimum values are not tested 445 constraints->h_scale.min = 1.0f / (1 << 4); 446 constraints->h_scale.max = buffer->width * 7; 447 constraints->v_scale.min = 1.0f / (1 << 4); 448 constraints->v_scale.max = buffer->height * 7; 449 450 return B_OK; 451 } 452 453 454 overlay_token 455 intel_allocate_overlay(void) 456 { 457 TRACE(("intel_allocate_overlay()\n")); 458 459 // we only have a single overlay channel 460 if (atomic_or(&gInfo->shared_info->overlay_channel_used, 1) != 0) 461 return NULL; 462 463 return (overlay_token)++gInfo->shared_info->overlay_token; 464 } 465 466 467 status_t 468 intel_release_overlay(overlay_token overlayToken) 469 { 470 TRACE(("intel_allocate_overlay(token %ld)\n", (uint32)overlayToken)); 471 472 // we only have a single token, which simplifies this 473 if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token) 474 return B_BAD_VALUE; 475 476 atomic_and(&gInfo->shared_info->overlay_channel_used, 0); 477 478 return B_OK; 479 } 480 481 482 status_t 483 intel_configure_overlay(overlay_token overlayToken, const overlay_buffer *buffer, 484 const overlay_window *window, const overlay_view *view) 485 { 486 TRACE(("intel_configure_overlay: buffer %p, window %p, view %p\n", 487 buffer, window, view)); 488 489 if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token) 490 return B_BAD_VALUE; 491 492 if (window == NULL && view == NULL) { 493 hide_overlay(); 494 return B_OK; 495 } 496 497 struct overlay *overlay = (struct overlay *)buffer; 498 overlay_registers *registers = gInfo->overlay_registers; 499 bool updateCoefficients = false; 500 uint32 bytesPerPixel = 2; 501 502 switch (buffer->space) { 503 case B_RGB15: 504 registers->source_format = OVERLAY_FORMAT_RGB15; 505 break; 506 case B_RGB16: 507 registers->source_format = OVERLAY_FORMAT_RGB16; 508 break; 509 case B_RGB32: 510 registers->source_format = OVERLAY_FORMAT_RGB32; 511 bytesPerPixel = 4; 512 break; 513 case B_YCbCr422: 514 registers->source_format = OVERLAY_FORMAT_YCbCr422; 515 break; 516 } 517 518 if (!gInfo->shared_info->overlay_active 519 || memcmp(&gInfo->last_overlay_view, view, sizeof(overlay_view)) 520 || memcmp(&gInfo->last_overlay_frame, window, sizeof(overlay_frame))) { 521 // scaling has changed, program window and scaling factor 522 523 // clip the window to on screen bounds 524 // TODO: this is not yet complete or correct - especially if we start 525 // to support moving the display! 526 int32 left, top, right, bottom; 527 left = window->h_start; 528 right = window->h_start + window->width; 529 top = window->v_start; 530 bottom = window->v_start + window->height; 531 if (left < 0) 532 left = 0; 533 if (top < 0) 534 top = 0; 535 if (right > gInfo->shared_info->current_mode.timing.h_display) 536 right = gInfo->shared_info->current_mode.timing.h_display; 537 if (bottom > gInfo->shared_info->current_mode.timing.v_display) 538 bottom = gInfo->shared_info->current_mode.timing.v_display; 539 if (left >= right || top >= bottom) { 540 // overlay is not within visible bounds 541 hide_overlay(); 542 return B_OK; 543 } 544 545 registers->window_left = left; 546 registers->window_top = top; 547 registers->window_width = right - left; 548 registers->window_height = bottom - top; 549 550 uint32 horizontalScale = (view->width << 12) / window->width; 551 uint32 verticalScale = (view->height << 12) / window->height; 552 uint32 horizontalScaleUV = horizontalScale >> 1; 553 uint32 verticalScaleUV = verticalScale >> 1; 554 horizontalScale = horizontalScaleUV << 1; 555 verticalScale = verticalScaleUV << 1; 556 557 // we need to offset the overlay view to adapt it to the clipping 558 // (in addition to whatever offset is desired already) 559 left = view->h_start - (int32)((window->h_start - left) * (horizontalScale / 4096.0) + 0.5); 560 top = view->v_start - (int32)((window->v_start - top) * (verticalScale / 4096.0) + 0.5); 561 right = view->h_start + view->width; 562 bottom = view->v_start + view->height; 563 564 gInfo->overlay_position_buffer_offset = buffer->bytes_per_row * top 565 + left * bytesPerPixel; 566 567 // Note: in non-planar mode, you *must* not program the source width/height 568 // UV registers - they must stay cleared, or the chip is doing strange stuff. 569 // On the other hand, you have to program the UV scaling registers, or the 570 // result will be wrong, too. 571 registers->source_width_rgb = right - left; 572 registers->source_height_rgb = bottom - top; 573 registers->source_bytes_per_row_rgb = (((overlay->buffer_offset + (view->width << 1) 574 + 0x1f) >> 5) - (overlay->buffer_offset >> 5) - 1) << 2; 575 576 // horizontal scaling 577 registers->scale_rgb.horizontal_downscale_factor = horizontalScale >> 12; 578 registers->scale_rgb.horizontal_scale_fraction = horizontalScale & 0xfff; 579 registers->scale_uv.horizontal_downscale_factor = horizontalScaleUV >> 12; 580 registers->scale_uv.horizontal_scale_fraction = horizontalScaleUV & 0xfff; 581 582 // vertical scaling 583 registers->scale_rgb.vertical_scale_fraction = verticalScale & 0xfff; 584 registers->scale_uv.vertical_scale_fraction = verticalScaleUV & 0xfff; 585 registers->vertical_scale_rgb = verticalScale >> 12; 586 registers->vertical_scale_uv = verticalScaleUV >> 12; 587 588 TRACE(("scale: h = %ld.%ld, v = %ld.%ld\n", horizontalScale >> 12, 589 horizontalScale & 0xfff, verticalScale >> 12, verticalScale & 0xfff)); 590 591 if (verticalScale != gInfo->last_vertical_overlay_scale 592 || horizontalScale != gInfo->last_horizontal_overlay_scale) { 593 // Recompute phase coefficients (taken from X driver) 594 updateCoefficients = true; 595 596 phase_coefficient coefficients[NUM_HORIZONTAL_TAPS * NUM_PHASES]; 597 update_coefficients(NUM_HORIZONTAL_TAPS, horizontalScale / 4096.0, 598 true, true, coefficients); 599 600 phase_coefficient coefficientsUV[NUM_HORIZONTAL_UV_TAPS * NUM_PHASES]; 601 update_coefficients(NUM_HORIZONTAL_UV_TAPS, horizontalScaleUV / 4096.0, 602 true, false, coefficientsUV); 603 604 int32 pos = 0; 605 for (int32 i = 0; i < NUM_PHASES; i++) { 606 for (int32 j = 0; j < NUM_HORIZONTAL_TAPS; j++) { 607 registers->horizontal_coefficients_rgb[pos] = coefficients[pos].sign << 15 608 | coefficients[pos].exponent << 12 609 | coefficients[pos].mantissa; 610 pos++; 611 } 612 } 613 614 pos = 0; 615 for (int32 i = 0; i < NUM_PHASES; i++) { 616 for (int32 j = 0; j < NUM_HORIZONTAL_UV_TAPS; j++) { 617 registers->horizontal_coefficients_uv[pos] = coefficientsUV[pos].sign << 15 618 | coefficientsUV[pos].exponent << 12 619 | coefficientsUV[pos].mantissa; 620 pos++; 621 } 622 } 623 624 gInfo->last_vertical_overlay_scale = verticalScale; 625 gInfo->last_horizontal_overlay_scale = horizontalScale; 626 } 627 628 gInfo->last_overlay_view = *view; 629 gInfo->last_overlay_frame = *(overlay_frame *)window; 630 } 631 632 registers->color_control_output_mode = true; 633 registers->select_pipe = 0; 634 635 // program buffer 636 637 registers->buffer_rgb0 = overlay->buffer_offset + gInfo->overlay_position_buffer_offset; 638 registers->stride_rgb = buffer->bytes_per_row; 639 640 registers->mirroring_mode = (window->flags & B_OVERLAY_HORIZONTAL_MIRRORING) != 0 641 ? OVERLAY_MIRROR_HORIZONTAL : OVERLAY_MIRROR_NORMAL; 642 registers->ycbcr422_order = 0; 643 644 if (!gInfo->shared_info->overlay_active) { 645 // overlay is shown for the first time 646 set_color_key(window); 647 show_overlay(); 648 } else 649 update_overlay(updateCoefficients); 650 651 gInfo->current_overlay = overlay; 652 return B_OK; 653 } 654 655