1 /* 2 * Copyright 2006, Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Axel Dörfler, axeld@pinc-software.de 7 * 8 * The phase coefficient computation was taken from the X driver written by 9 * Alan Hourihane and David Dawes. 10 */ 11 12 13 #include "accelerant.h" 14 #include "accelerant_protos.h" 15 #include "commands.h" 16 17 #include <math.h> 18 #include <stdlib.h> 19 #include <string.h> 20 21 22 //#define TRACE_OVERLAY 23 #ifdef TRACE_OVERLAY 24 extern "C" void _sPrintf(const char *format, ...); 25 # define TRACE(x) _sPrintf x 26 #else 27 # define TRACE(x) ; 28 #endif 29 30 31 #define NUM_HORIZONTAL_TAPS 5 32 #define NUM_VERTICAL_TAPS 3 33 #define NUM_HORIZONTAL_UV_TAPS 3 34 #define NUM_VERTICAL_UV_TAPS 3 35 #define NUM_PHASES 17 36 #define MAX_TAPS 5 37 38 struct phase_coefficient { 39 uint8 sign; 40 uint8 exponent; 41 uint16 mantissa; 42 }; 43 44 45 /*! 46 Splits the coefficient floating point value into the 3 components 47 sign, mantissa, and exponent. 48 */ 49 static bool 50 split_coefficient(double &coefficient, int32 mantissaSize, 51 phase_coefficient &splitCoefficient) 52 { 53 double absCoefficient = fabs(coefficient); 54 55 int sign; 56 if (coefficient < 0.0) 57 sign = 1; 58 else 59 sign = 0; 60 61 int32 intCoefficient, res; 62 int32 maxValue = 1 << mantissaSize; 63 res = 12 - mantissaSize; 64 65 if ((intCoefficient = (int)(absCoefficient * 4 * maxValue + 0.5)) < maxValue) { 66 splitCoefficient.exponent = 3; 67 splitCoefficient.mantissa = intCoefficient << res; 68 coefficient = (double)intCoefficient / (double)(4 * maxValue); 69 } else if ((intCoefficient = (int)(absCoefficient * 2 * maxValue + 0.5)) < maxValue) { 70 splitCoefficient.exponent = 2; 71 splitCoefficient.mantissa = intCoefficient << res; 72 coefficient = (double)intCoefficient / (double)(2 * maxValue); 73 } else if ((intCoefficient = (int)(absCoefficient * maxValue + 0.5)) < maxValue) { 74 splitCoefficient.exponent = 1; 75 splitCoefficient.mantissa = intCoefficient << res; 76 coefficient = (double)intCoefficient / (double)maxValue; 77 } else if ((intCoefficient = (int)(absCoefficient * maxValue * 0.5 + 0.5)) < maxValue) { 78 splitCoefficient.exponent = 0; 79 splitCoefficient.mantissa = intCoefficient << res; 80 coefficient = (double)intCoefficient / (double)(maxValue / 2); 81 } else { 82 // coefficient out of range 83 return false; 84 } 85 86 splitCoefficient.sign = sign; 87 if (sign) 88 coefficient = -coefficient; 89 90 return true; 91 } 92 93 94 static void 95 update_coefficients(int32 taps, double filterCutOff, bool horizontal, bool isY, 96 phase_coefficient *splitCoefficients) 97 { 98 if (filterCutOff < 1) 99 filterCutOff = 1; 100 if (filterCutOff > 3) 101 filterCutOff = 3; 102 103 bool isVerticalUV = !horizontal && !isY; 104 int32 mantissaSize = horizontal ? 7 : 6; 105 106 double rawCoefficients[MAX_TAPS * 32], coefficients[NUM_PHASES][MAX_TAPS]; 107 108 int32 num = taps * 16; 109 for (int32 i = 0; i < num * 2; i++) { 110 double sinc; 111 double value = (1.0 / filterCutOff) * taps * PI * (i - num) / (2 * num); 112 if (value == 0.0) 113 sinc = 1.0; 114 else 115 sinc = sin(value) / value; 116 117 // Hamming window 118 double window = (0.5 - 0.5 * cos(i * PI / num)); 119 rawCoefficients[i] = sinc * window; 120 } 121 122 for (int32 i = 0; i < NUM_PHASES; i++) { 123 // Normalise the coefficients 124 double sum = 0.0; 125 int32 pos; 126 for (int32 j = 0; j < taps; j++) { 127 pos = i + j * 32; 128 sum += rawCoefficients[pos]; 129 } 130 for (int32 j = 0; j < taps; j++) { 131 pos = i + j * 32; 132 coefficients[i][j] = rawCoefficients[pos] / sum; 133 } 134 135 // split them into sign/mantissa/exponent 136 for (int32 j = 0; j < taps; j++) { 137 pos = j + i * taps; 138 139 split_coefficient(coefficients[i][j], mantissaSize 140 + (((j == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0), 141 splitCoefficients[pos]); 142 } 143 144 int32 tapAdjust[MAX_TAPS]; 145 tapAdjust[0] = (taps - 1) / 2; 146 for (int32 j = 1, k = 1; j <= tapAdjust[0]; j++, k++) { 147 tapAdjust[k] = tapAdjust[0] - j; 148 tapAdjust[++k] = tapAdjust[0] + j; 149 } 150 151 // Adjust the coefficients 152 sum = 0.0; 153 for (int32 j = 0; j < taps; j++) { 154 sum += coefficients[i][j]; 155 } 156 157 if (sum != 1.0) { 158 for (int32 k = 0; k < taps; k++) { 159 int32 tap2Fix = tapAdjust[k]; 160 double diff = 1.0 - sum; 161 162 coefficients[i][tap2Fix] += diff; 163 pos = tap2Fix + i * taps; 164 165 split_coefficient(coefficients[i][tap2Fix], mantissaSize 166 + (((tap2Fix == (taps - 1) / 2) && !isVerticalUV) ? 2 : 0), 167 splitCoefficients[pos]); 168 169 sum = 0.0; 170 for (int32 j = 0; j < taps; j++) { 171 sum += coefficients[i][j]; 172 } 173 if (sum == 1.0) 174 break; 175 } 176 } 177 } 178 } 179 180 181 static void 182 set_color_key(uint8 red, uint8 green, uint8 blue, 183 uint8 redMask, uint8 greenMask, uint8 blueMask) 184 { 185 overlay_registers *registers = gInfo->overlay_registers; 186 187 registers->color_key_red = red; 188 registers->color_key_green = green; 189 registers->color_key_blue = blue; 190 registers->color_key_mask_red = ~redMask; 191 registers->color_key_mask_green = ~greenMask; 192 registers->color_key_mask_blue = ~blueMask; 193 registers->color_key_enabled = true; 194 } 195 196 197 static void 198 set_color_key(const overlay_window *window) 199 { 200 switch (gInfo->shared_info->current_mode.space) { 201 case B_CMAP8: 202 set_color_key(0, 0, window->blue.value, 0x0, 0x0, 0xff); 203 break; 204 case B_RGB15: 205 set_color_key(window->red.value << 3, window->green.value << 3, 206 window->blue.value << 3, window->red.mask << 3, window->green.mask << 3, 207 window->blue.mask << 3); 208 break; 209 case B_RGB16: 210 set_color_key(window->red.value << 3, window->green.value << 2, 211 window->blue.value << 3, window->red.mask << 3, window->green.mask << 2, 212 window->blue.mask << 3); 213 break; 214 215 default: 216 set_color_key(window->red.value, window->green.value, 217 window->blue.value, window->red.mask, window->green.mask, 218 window->blue.mask); 219 break; 220 } 221 } 222 223 224 static void 225 update_overlay(bool updateCoefficients) 226 { 227 if (!gInfo->shared_info->overlay_active) 228 return; 229 230 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 231 queue.PutFlush(); 232 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 233 queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, updateCoefficients); 234 235 // make sure the flip is done now 236 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 237 queue.PutFlush(); 238 239 // TRACE(("update overlay: UPDATE: %lx, TEST: %lx, STATUS: %lx, EXTENDED_STATUS: %lx\n", 240 // read32(INTEL_OVERLAY_UPDATE), read32(INTEL_OVERLAY_TEST), read32(INTEL_OVERLAY_STATUS), 241 // read32(INTEL_OVERLAY_EXTENDED_STATUS))); 242 } 243 244 245 static void 246 show_overlay(void) 247 { 248 if (gInfo->shared_info->overlay_active) 249 return; 250 251 gInfo->shared_info->overlay_active = true; 252 gInfo->overlay_registers->overlay_enabled = true; 253 254 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 255 queue.PutOverlayFlip(COMMAND_OVERLAY_ON, true); 256 queue.PutFlush(); 257 } 258 259 260 static void 261 hide_overlay(void) 262 { 263 if (!gInfo->shared_info->overlay_active) 264 return; 265 266 overlay_registers *registers = gInfo->overlay_registers; 267 268 gInfo->shared_info->overlay_active = false; 269 registers->overlay_enabled = false; 270 271 QueueCommands queue(gInfo->shared_info->secondary_ring_buffer); 272 273 // flush pending commands 274 queue.PutFlush(); 275 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 276 277 // clear overlay enabled bit 278 queue.PutOverlayFlip(COMMAND_OVERLAY_CONTINUE, false); 279 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 280 281 // turn off overlay engine 282 queue.PutOverlayFlip(COMMAND_OVERLAY_OFF, false); 283 queue.PutWaitFor(COMMAND_WAIT_FOR_OVERLAY_FLIP); 284 285 gInfo->current_overlay = NULL; 286 } 287 288 289 // #pragma mark - 290 291 292 uint32 293 intel_overlay_count(const display_mode *mode) 294 { 295 // TODO: make this depending on the amount of RAM and the screen mode 296 return 1; 297 } 298 299 300 const uint32 * 301 intel_overlay_supported_spaces(const display_mode *mode) 302 { 303 static const uint32 kSupportedSpaces[] = {B_RGB15, B_RGB16, B_RGB32, B_YCbCr422, 0}; 304 305 return kSupportedSpaces; 306 } 307 308 309 uint32 310 intel_overlay_supported_features(uint32 colorSpace) 311 { 312 return B_OVERLAY_COLOR_KEY 313 | B_OVERLAY_HORIZONTAL_FILTERING 314 | B_OVERLAY_VERTICAL_FILTERING 315 | B_OVERLAY_HORIZONTAL_MIRRORING; 316 } 317 318 319 const overlay_buffer * 320 intel_allocate_overlay_buffer(color_space colorSpace, uint16 width, 321 uint16 height) 322 { 323 TRACE(("intel_allocate_overlay_buffer(width %u, height %u, colorSpace %lu)\n", 324 width, height, colorSpace)); 325 326 uint32 bytesPerPixel; 327 328 switch (colorSpace) { 329 case B_RGB15: 330 bytesPerPixel = 2; 331 break; 332 case B_RGB16: 333 bytesPerPixel = 2; 334 break; 335 case B_RGB32: 336 bytesPerPixel = 4; 337 break; 338 case B_YCbCr422: 339 bytesPerPixel = 2; 340 break; 341 default: 342 return NULL; 343 } 344 345 struct overlay *overlay = (struct overlay *)malloc(sizeof(struct overlay)); 346 if (overlay == NULL) 347 return NULL; 348 349 // TODO: locking! 350 351 // alloc graphics mem 352 overlay_buffer *buffer = &overlay->buffer; 353 354 buffer->space = colorSpace; 355 buffer->width = width; 356 buffer->height = height; 357 buffer->bytes_per_row = (width * bytesPerPixel + 0x3f) & ~0x3f; 358 359 status_t status = intel_allocate_memory(buffer->bytes_per_row * height, 360 overlay->buffer_handle, overlay->buffer_offset); 361 if (status < B_OK) { 362 free(overlay); 363 return NULL; 364 } 365 366 buffer->buffer = gInfo->shared_info->graphics_memory + overlay->buffer_offset; 367 buffer->buffer_dma = gInfo->shared_info->physical_graphics_memory + overlay->buffer_offset; 368 369 TRACE(("allocated overlay buffer: handle=%x, offset=%x, address=%x, physical address=%x\n", 370 overlay->buffer_handle, overlay->buffer_offset, buffer->buffer, buffer->buffer_dma)); 371 372 return buffer; 373 } 374 375 376 status_t 377 intel_release_overlay_buffer(const overlay_buffer *buffer) 378 { 379 TRACE(("intel_release_overlay_buffer(buffer %p)\n", buffer)); 380 381 struct overlay *overlay = (struct overlay *)buffer; 382 383 // TODO: locking! 384 385 if (gInfo->current_overlay == overlay) 386 hide_overlay(); 387 388 intel_free_memory(overlay->buffer_handle); 389 free(overlay); 390 391 return B_OK; 392 } 393 394 395 status_t 396 intel_get_overlay_constraints(const display_mode *mode, const overlay_buffer *buffer, 397 overlay_constraints *constraints) 398 { 399 TRACE(("intel_get_overlay_constraints(buffer %p)\n", buffer)); 400 401 // taken from the Radeon driver... 402 403 // scaler input restrictions 404 // TODO: check all these values; most of them are probably too restrictive 405 406 // position 407 constraints->view.h_alignment = 0; 408 constraints->view.v_alignment = 0; 409 410 // alignment 411 switch (buffer->space) { 412 case B_RGB15: 413 constraints->view.width_alignment = 7; 414 break; 415 case B_RGB16: 416 constraints->view.width_alignment = 7; 417 break; 418 case B_RGB32: 419 constraints->view.width_alignment = 3; 420 break; 421 case B_YCbCr422: 422 constraints->view.width_alignment = 7; 423 break; 424 case B_YUV12: 425 constraints->view.width_alignment = 7; 426 default: 427 return B_BAD_VALUE; 428 } 429 constraints->view.height_alignment = 0; 430 431 // size 432 constraints->view.width.min = 4; // make 4-tap filter happy 433 constraints->view.height.min = 4; 434 constraints->view.width.max = buffer->width; 435 constraints->view.height.max = buffer->height; 436 437 // scaler output restrictions 438 constraints->window.h_alignment = 0; 439 constraints->window.v_alignment = 0; 440 constraints->window.width_alignment = 0; 441 constraints->window.height_alignment = 0; 442 constraints->window.width.min = 2; 443 constraints->window.width.max = mode->virtual_width; 444 constraints->window.height.min = 2; 445 constraints->window.height.max = mode->virtual_height; 446 447 // TODO: the minimum values are not tested 448 constraints->h_scale.min = 1.0f / (1 << 4); 449 constraints->h_scale.max = buffer->width * 7; 450 constraints->v_scale.min = 1.0f / (1 << 4); 451 constraints->v_scale.max = buffer->height * 7; 452 453 return B_OK; 454 } 455 456 457 overlay_token 458 intel_allocate_overlay(void) 459 { 460 TRACE(("intel_allocate_overlay()\n")); 461 462 // we only have a single overlay channel 463 if (atomic_or(&gInfo->shared_info->overlay_channel_used, 1) != 0) 464 return NULL; 465 466 return (overlay_token)++gInfo->shared_info->overlay_token; 467 } 468 469 470 status_t 471 intel_release_overlay(overlay_token overlayToken) 472 { 473 TRACE(("intel_allocate_overlay(token %ld)\n", (uint32)overlayToken)); 474 475 // we only have a single token, which simplifies this 476 if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token) 477 return B_BAD_VALUE; 478 479 atomic_and(&gInfo->shared_info->overlay_channel_used, 0); 480 481 return B_OK; 482 } 483 484 485 status_t 486 intel_configure_overlay(overlay_token overlayToken, const overlay_buffer *buffer, 487 const overlay_window *window, const overlay_view *view) 488 { 489 TRACE(("intel_configure_overlay: buffer %p, window %p, view %p\n", 490 buffer, window, view)); 491 492 if (overlayToken != (overlay_token)gInfo->shared_info->overlay_token) 493 return B_BAD_VALUE; 494 495 if (window == NULL && view == NULL) { 496 hide_overlay(); 497 return B_OK; 498 } 499 500 struct overlay *overlay = (struct overlay *)buffer; 501 overlay_registers *registers = gInfo->overlay_registers; 502 bool updateCoefficients = false; 503 uint32 bytesPerPixel = 2; 504 505 switch (buffer->space) { 506 case B_RGB15: 507 registers->source_format = OVERLAY_FORMAT_RGB15; 508 break; 509 case B_RGB16: 510 registers->source_format = OVERLAY_FORMAT_RGB16; 511 break; 512 case B_RGB32: 513 registers->source_format = OVERLAY_FORMAT_RGB32; 514 bytesPerPixel = 4; 515 break; 516 case B_YCbCr422: 517 registers->source_format = OVERLAY_FORMAT_YCbCr422; 518 break; 519 } 520 521 if (!gInfo->shared_info->overlay_active 522 || memcmp(&gInfo->last_overlay_view, view, sizeof(overlay_view)) 523 || memcmp(&gInfo->last_overlay_frame, window, sizeof(overlay_frame))) { 524 // scaling has changed, program window and scaling factor 525 526 // clip the window to on screen bounds 527 // TODO: this is not yet complete or correct - especially if we start 528 // to support moving the display! 529 int32 left, top, right, bottom; 530 left = window->h_start; 531 right = window->h_start + window->width; 532 top = window->v_start; 533 bottom = window->v_start + window->height; 534 if (left < 0) 535 left = 0; 536 if (top < 0) 537 top = 0; 538 if (right > gInfo->shared_info->current_mode.timing.h_display) 539 right = gInfo->shared_info->current_mode.timing.h_display; 540 if (bottom > gInfo->shared_info->current_mode.timing.v_display) 541 bottom = gInfo->shared_info->current_mode.timing.v_display; 542 if (left >= right || top >= bottom) { 543 // overlay is not within visible bounds 544 hide_overlay(); 545 return B_OK; 546 } 547 548 registers->window_left = left; 549 registers->window_top = top; 550 registers->window_width = right - left; 551 registers->window_height = bottom - top; 552 553 uint32 horizontalScale = (view->width << 12) / window->width; 554 uint32 verticalScale = (view->height << 12) / window->height; 555 uint32 horizontalScaleUV = horizontalScale >> 1; 556 uint32 verticalScaleUV = verticalScale >> 1; 557 horizontalScale = horizontalScaleUV << 1; 558 verticalScale = verticalScaleUV << 1; 559 560 // we need to offset the overlay view to adapt it to the clipping 561 // (in addition to whatever offset is desired already) 562 left = view->h_start - (int32)((window->h_start - left) * (horizontalScale / 4096.0) + 0.5); 563 top = view->v_start - (int32)((window->v_start - top) * (verticalScale / 4096.0) + 0.5); 564 right = view->h_start + view->width; 565 bottom = view->v_start + view->height; 566 567 gInfo->overlay_position_buffer_offset = buffer->bytes_per_row * top 568 + left * bytesPerPixel; 569 570 // Note: in non-planar mode, you *must* not program the source width/height 571 // UV registers - they must stay cleared, or the chip is doing strange stuff. 572 // On the other hand, you have to program the UV scaling registers, or the 573 // result will be wrong, too. 574 registers->source_width_rgb = right - left; 575 registers->source_height_rgb = bottom - top; 576 registers->source_bytes_per_row_rgb = (((overlay->buffer_offset + (view->width << 1) 577 + 0x1f) >> 5) - (overlay->buffer_offset >> 5) - 1) << 2; 578 579 // horizontal scaling 580 registers->scale_rgb.horizontal_downscale_factor = horizontalScale >> 12; 581 registers->scale_rgb.horizontal_scale_fraction = horizontalScale & 0xfff; 582 registers->scale_uv.horizontal_downscale_factor = horizontalScaleUV >> 12; 583 registers->scale_uv.horizontal_scale_fraction = horizontalScaleUV & 0xfff; 584 585 // vertical scaling 586 registers->scale_rgb.vertical_scale_fraction = verticalScale & 0xfff; 587 registers->scale_uv.vertical_scale_fraction = verticalScaleUV & 0xfff; 588 registers->vertical_scale_rgb = verticalScale >> 12; 589 registers->vertical_scale_uv = verticalScaleUV >> 12; 590 591 TRACE(("scale: h = %ld.%ld, v = %ld.%ld\n", horizontalScale >> 12, 592 horizontalScale & 0xfff, verticalScale >> 12, verticalScale & 0xfff)); 593 594 if (verticalScale != gInfo->last_vertical_overlay_scale 595 || horizontalScale != gInfo->last_horizontal_overlay_scale) { 596 // Recompute phase coefficients (taken from X driver) 597 updateCoefficients = true; 598 599 phase_coefficient coefficients[NUM_HORIZONTAL_TAPS * NUM_PHASES]; 600 update_coefficients(NUM_HORIZONTAL_TAPS, horizontalScale / 4096.0, 601 true, true, coefficients); 602 603 phase_coefficient coefficientsUV[NUM_HORIZONTAL_UV_TAPS * NUM_PHASES]; 604 update_coefficients(NUM_HORIZONTAL_UV_TAPS, horizontalScaleUV / 4096.0, 605 true, false, coefficientsUV); 606 607 int32 pos = 0; 608 for (int32 i = 0; i < NUM_PHASES; i++) { 609 for (int32 j = 0; j < NUM_HORIZONTAL_TAPS; j++) { 610 registers->horizontal_coefficients_rgb[pos] = coefficients[pos].sign << 15 611 | coefficients[pos].exponent << 12 612 | coefficients[pos].mantissa; 613 pos++; 614 } 615 } 616 617 pos = 0; 618 for (int32 i = 0; i < NUM_PHASES; i++) { 619 for (int32 j = 0; j < NUM_HORIZONTAL_UV_TAPS; j++) { 620 registers->horizontal_coefficients_uv[pos] = coefficientsUV[pos].sign << 15 621 | coefficientsUV[pos].exponent << 12 622 | coefficientsUV[pos].mantissa; 623 pos++; 624 } 625 } 626 627 gInfo->last_vertical_overlay_scale = verticalScale; 628 gInfo->last_horizontal_overlay_scale = horizontalScale; 629 } 630 631 gInfo->last_overlay_view = *view; 632 gInfo->last_overlay_frame = *(overlay_frame *)window; 633 } 634 635 registers->color_control_output_mode = true; 636 registers->select_pipe = 0; 637 638 // program buffer 639 640 registers->buffer_rgb0 = overlay->buffer_offset + gInfo->overlay_position_buffer_offset; 641 registers->stride_rgb = buffer->bytes_per_row; 642 643 registers->mirroring_mode = (window->flags & B_OVERLAY_HORIZONTAL_MIRRORING) != 0 644 ? OVERLAY_MIRROR_HORIZONTAL : OVERLAY_MIRROR_NORMAL; 645 registers->ycbcr422_order = 0; 646 647 if (!gInfo->shared_info->overlay_active) { 648 // overlay is shown for the first time 649 set_color_key(window); 650 show_overlay(); 651 } else 652 update_overlay(updateCoefficients); 653 654 gInfo->current_overlay = overlay; 655 return B_OK; 656 } 657 658