1 /* 2 * Copyright (C) 2001 Carlos Hasan 3 * Copyright (C) 2001 François Revol 4 * Copyright (C) 2001 Axel Dörfler 5 * Copyright (C) 2004 Marcus Overhagen 6 * Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de> 7 * Copyright (C) 2014 Colin Günther <coling@gmx.de> 8 * Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk> 9 * 10 * All rights reserved. Distributed under the terms of the MIT License. 11 */ 12 13 //! libavcodec based decoder for Haiku 14 15 16 #include "AVCodecDecoder.h" 17 18 #include <new> 19 20 #include <assert.h> 21 #include <string.h> 22 23 #include <Bitmap.h> 24 #include <Debug.h> 25 26 #include "Utilities.h" 27 28 29 #undef TRACE 30 //#define TRACE_AV_CODEC 31 #ifdef TRACE_AV_CODEC 32 # define TRACE(x...) printf(x) 33 # define TRACE_AUDIO(x...) printf(x) 34 # define TRACE_VIDEO(x...) printf(x) 35 #else 36 # define TRACE(x...) 37 # define TRACE_AUDIO(x...) 38 # define TRACE_VIDEO(x...) 39 #endif 40 41 //#define LOG_STREAM_TO_FILE 42 #ifdef LOG_STREAM_TO_FILE 43 # include <File.h> 44 static BFile sAudioStreamLogFile( 45 "/boot/home/Desktop/AVCodecDebugAudioStream.raw", 46 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 47 static BFile sVideoStreamLogFile( 48 "/boot/home/Desktop/AVCodecDebugVideoStream.raw", 49 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 50 static int sDumpedPackets = 0; 51 #endif 52 53 54 #if LIBAVCODEC_VERSION_INT > ((54 << 16) | (50 << 8)) 55 typedef AVCodecID CodecID; 56 #endif 57 #if LIBAVCODEC_VERSION_INT < ((55 << 16) | (45 << 8)) 58 #define av_frame_alloc avcodec_alloc_frame 59 #define av_frame_unref avcodec_get_frame_defaults 60 #define av_frame_free avcodec_free_frame 61 #endif 62 63 64 struct wave_format_ex { 65 uint16 format_tag; 66 uint16 channels; 67 uint32 frames_per_sec; 68 uint32 avg_bytes_per_sec; 69 uint16 block_align; 70 uint16 bits_per_sample; 71 uint16 extra_size; 72 // extra_data[extra_size] 73 } _PACKED; 74 75 struct avformat_codec_context { 76 int sample_rate; 77 int channels; 78 }; 79 80 81 // profiling related globals 82 #define DO_PROFILING 0 83 84 static bigtime_t decodingTime = 0; 85 static bigtime_t conversionTime = 0; 86 static long profileCounter = 0; 87 88 89 AVCodecDecoder::AVCodecDecoder() 90 : 91 fHeader(), 92 fInputFormat(), 93 fFrame(0), 94 fIsAudio(false), 95 fCodec(NULL), 96 fContext(avcodec_alloc_context3(NULL)), 97 fResampleContext(NULL), 98 fDecodedData(NULL), 99 fDecodedDataSizeInBytes(0), 100 fPostProcessedDecodedPicture(av_frame_alloc()), 101 fRawDecodedPicture(av_frame_alloc()), 102 fRawDecodedAudio(av_frame_alloc()), 103 104 fCodecInitDone(false), 105 106 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 107 fSwsContext(NULL), 108 #else 109 fFormatConversionFunc(NULL), 110 #endif 111 112 fExtraData(NULL), 113 fExtraDataSize(0), 114 fBlockAlign(0), 115 116 fOutputColorSpace(B_NO_COLOR_SPACE), 117 fOutputFrameCount(0), 118 fOutputFrameRate(1.0), 119 fOutputFrameSize(0), 120 fInputFrameSize(0), 121 122 fChunkBuffer(NULL), 123 fChunkBufferSize(0), 124 fAudioDecodeError(false), 125 126 fDecodedDataBuffer(av_frame_alloc()), 127 fDecodedDataBufferOffset(0), 128 fDecodedDataBufferSize(0) 129 { 130 TRACE("AVCodecDecoder::AVCodecDecoder()\n"); 131 132 system_info info; 133 get_system_info(&info); 134 135 fContext->err_recognition = AV_EF_CAREFUL; 136 fContext->error_concealment = 3; 137 fContext->thread_count = info.cpu_count; 138 } 139 140 141 AVCodecDecoder::~AVCodecDecoder() 142 { 143 TRACE("[%c] AVCodecDecoder::~AVCodecDecoder()\n", fIsAudio?('a'):('v')); 144 145 #if DO_PROFILING 146 if (profileCounter > 0) { 147 printf("[%c] profile: d1 = %lld, d2 = %lld (%Ld)\n", 148 fIsAudio?('a'):('v'), decodingTime / profileCounter, 149 conversionTime / profileCounter, fFrame); 150 } 151 #endif 152 153 if (fCodecInitDone) 154 avcodec_close(fContext); 155 156 swr_free(&fResampleContext); 157 free(fChunkBuffer); 158 free(fDecodedData); 159 160 av_free(fPostProcessedDecodedPicture); 161 av_free(fRawDecodedPicture); 162 av_free(fRawDecodedAudio->opaque); 163 av_free(fRawDecodedAudio); 164 av_free(fContext); 165 av_free(fDecodedDataBuffer); 166 167 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 168 if (fSwsContext != NULL) 169 sws_freeContext(fSwsContext); 170 #endif 171 172 delete[] fExtraData; 173 } 174 175 176 void 177 AVCodecDecoder::GetCodecInfo(media_codec_info* mci) 178 { 179 snprintf(mci->short_name, 32, "%s", fCodec->name); 180 snprintf(mci->pretty_name, 96, "%s", fCodec->long_name); 181 mci->id = 0; 182 mci->sub_id = fCodec->id; 183 } 184 185 186 status_t 187 AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer, 188 size_t infoSize) 189 { 190 if (ioEncodedFormat->type != B_MEDIA_ENCODED_AUDIO 191 && ioEncodedFormat->type != B_MEDIA_ENCODED_VIDEO) 192 return B_ERROR; 193 194 fIsAudio = (ioEncodedFormat->type == B_MEDIA_ENCODED_AUDIO); 195 TRACE("[%c] AVCodecDecoder::Setup()\n", fIsAudio?('a'):('v')); 196 197 #ifdef TRACE_AV_CODEC 198 char buffer[1024]; 199 string_for_format(*ioEncodedFormat, buffer, sizeof(buffer)); 200 TRACE("[%c] input_format = %s\n", fIsAudio?('a'):('v'), buffer); 201 TRACE("[%c] infoSize = %ld\n", fIsAudio?('a'):('v'), infoSize); 202 TRACE("[%c] user_data_type = %08lx\n", fIsAudio?('a'):('v'), 203 ioEncodedFormat->user_data_type); 204 TRACE("[%c] meta_data_size = %ld\n", fIsAudio?('a'):('v'), 205 ioEncodedFormat->MetaDataSize()); 206 #endif 207 208 media_format_description description; 209 if (BMediaFormats().GetCodeFor(*ioEncodedFormat, 210 B_MISC_FORMAT_FAMILY, &description) == B_OK) { 211 if (description.u.misc.file_format != 'ffmp') 212 return B_NOT_SUPPORTED; 213 fCodec = avcodec_find_decoder(static_cast<CodecID>( 214 description.u.misc.codec)); 215 if (fCodec == NULL) { 216 TRACE(" unable to find the correct FFmpeg " 217 "decoder (id = %lu)\n", description.u.misc.codec); 218 return B_ERROR; 219 } 220 TRACE(" found decoder %s\n", fCodec->name); 221 222 const void* extraData = infoBuffer; 223 fExtraDataSize = infoSize; 224 if (description.family == B_WAV_FORMAT_FAMILY 225 && infoSize >= sizeof(wave_format_ex)) { 226 TRACE(" trying to use wave_format_ex\n"); 227 // Special case extra data in B_WAV_FORMAT_FAMILY 228 const wave_format_ex* waveFormatData 229 = (const wave_format_ex*)infoBuffer; 230 231 size_t waveFormatSize = infoSize; 232 if (waveFormatData != NULL && waveFormatSize > 0) { 233 fBlockAlign = waveFormatData->block_align; 234 TRACE(" found block align: %d\n", fBlockAlign); 235 fExtraDataSize = waveFormatData->extra_size; 236 // skip the wave_format_ex from the extra data. 237 extraData = waveFormatData + 1; 238 } 239 } else { 240 if (fIsAudio) { 241 fBlockAlign 242 = ioEncodedFormat->u.encoded_audio.output.buffer_size; 243 TRACE(" using buffer_size as block align: %d\n", 244 fBlockAlign); 245 } 246 } 247 if (extraData != NULL && fExtraDataSize > 0) { 248 TRACE("AVCodecDecoder: extra data size %ld\n", infoSize); 249 delete[] fExtraData; 250 fExtraData = new(std::nothrow) char[fExtraDataSize]; 251 if (fExtraData != NULL) 252 memcpy(fExtraData, infoBuffer, fExtraDataSize); 253 else 254 fExtraDataSize = 0; 255 } 256 257 fInputFormat = *ioEncodedFormat; 258 return B_OK; 259 } else { 260 TRACE("AVCodecDecoder: BMediaFormats().GetCodeFor() failed.\n"); 261 } 262 263 printf("AVCodecDecoder::Setup failed!\n"); 264 return B_ERROR; 265 } 266 267 268 status_t 269 AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time) 270 { 271 status_t ret = B_OK; 272 // Reset the FFmpeg codec to flush buffers, so we keep the sync 273 if (fCodecInitDone) { 274 avcodec_flush_buffers(fContext); 275 _ResetTempPacket(); 276 } 277 278 // Flush internal buffers as well. 279 free(fChunkBuffer); 280 fChunkBuffer = NULL; 281 fChunkBufferSize = 0; 282 fDecodedDataBufferOffset = 0; 283 fDecodedDataBufferSize = 0; 284 fDecodedDataSizeInBytes = 0; 285 286 fFrame = frame; 287 288 return ret; 289 } 290 291 292 status_t 293 AVCodecDecoder::NegotiateOutputFormat(media_format* inOutFormat) 294 { 295 TRACE("AVCodecDecoder::NegotiateOutputFormat() [%c] \n", 296 fIsAudio?('a'):('v')); 297 298 #ifdef TRACE_AV_CODEC 299 char buffer[1024]; 300 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 301 TRACE(" [%c] requested format = %s\n", fIsAudio?('a'):('v'), buffer); 302 #endif 303 304 if (fIsAudio) 305 return _NegotiateAudioOutputFormat(inOutFormat); 306 else 307 return _NegotiateVideoOutputFormat(inOutFormat); 308 } 309 310 311 status_t 312 AVCodecDecoder::Decode(void* outBuffer, int64* outFrameCount, 313 media_header* mediaHeader, media_decode_info* info) 314 { 315 if (!fCodecInitDone) 316 return B_NO_INIT; 317 318 status_t ret; 319 if (fIsAudio) 320 ret = _DecodeAudio(outBuffer, outFrameCount, mediaHeader, info); 321 else 322 ret = _DecodeVideo(outBuffer, outFrameCount, mediaHeader, info); 323 324 return ret; 325 } 326 327 328 // #pragma mark - 329 330 331 void 332 AVCodecDecoder::_ResetTempPacket() 333 { 334 av_init_packet(&fTempPacket); 335 fTempPacket.size = 0; 336 fTempPacket.data = NULL; 337 } 338 339 340 status_t 341 AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat) 342 { 343 TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n"); 344 345 _ApplyEssentialAudioContainerPropertiesToContext(); 346 // This makes audio formats play that encode the audio properties in 347 // the audio container (e.g. WMA) and not in the audio frames 348 // themself (e.g. MP3). 349 // Note: Doing this step unconditionally is OK, because the first call 350 // to _DecodeNextAudioFrameChunk() will update the essential audio 351 // format properties accordingly regardless of the settings here. 352 353 // close any previous instance 354 if (fCodecInitDone) { 355 fCodecInitDone = false; 356 avcodec_close(fContext); 357 } 358 359 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 360 fCodecInitDone = true; 361 else { 362 TRACE("avcodec_open() failed to init codec!\n"); 363 return B_ERROR; 364 } 365 366 free(fChunkBuffer); 367 fChunkBuffer = NULL; 368 fChunkBufferSize = 0; 369 fAudioDecodeError = false; 370 fDecodedDataBufferOffset = 0; 371 fDecodedDataBufferSize = 0; 372 373 _ResetTempPacket(); 374 375 status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk(); 376 if (statusOfDecodingFirstFrameChunk != B_OK) { 377 TRACE("[a] decoding first audio frame chunk failed\n"); 378 return B_ERROR; 379 } 380 381 media_multi_audio_format outputAudioFormat; 382 outputAudioFormat = media_raw_audio_format::wildcard; 383 outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN; 384 outputAudioFormat.frame_rate = fContext->sample_rate; 385 outputAudioFormat.channel_count = fContext->channels; 386 ConvertAVSampleFormatToRawAudioFormat(fContext->sample_fmt, 387 outputAudioFormat.format); 388 // Check that format is not still a wild card! 389 if (outputAudioFormat.format == 0) { 390 TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n"); 391 outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT; 392 } 393 outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size; 394 // Check that buffer_size has a sane value 395 size_t sampleSize = outputAudioFormat.format 396 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 397 if (outputAudioFormat.buffer_size == 0) { 398 outputAudioFormat.buffer_size = 512 * sampleSize 399 * outputAudioFormat.channel_count; 400 } 401 402 inOutFormat->type = B_MEDIA_RAW_AUDIO; 403 inOutFormat->u.raw_audio = outputAudioFormat; 404 inOutFormat->require_flags = 0; 405 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 406 407 // Initialize variables needed to manage decoding as much audio frames as 408 // needed to fill the buffer_size. 409 fOutputFrameSize = sampleSize * outputAudioFormat.channel_count; 410 fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize; 411 fOutputFrameRate = outputAudioFormat.frame_rate; 412 if (av_sample_fmt_is_planar(fContext->sample_fmt)) 413 fInputFrameSize = sampleSize; 414 else 415 fInputFrameSize = fOutputFrameSize; 416 417 fRawDecodedAudio->opaque 418 = av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context)); 419 if (fRawDecodedAudio->opaque == NULL) 420 return B_NO_MEMORY; 421 422 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 423 fResampleContext = swr_alloc_set_opts(NULL, 424 fContext->channel_layout, fContext->request_sample_fmt, 425 fContext->sample_rate, 426 fContext->channel_layout, fContext->sample_fmt, fContext->sample_rate, 427 0, NULL); 428 swr_init(fResampleContext); 429 } 430 431 TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, " 432 "output frame size: %d, count: %ld, rate: %.2f\n", 433 fContext->bit_rate, fContext->sample_rate, fContext->channels, 434 fOutputFrameSize, fOutputFrameCount, fOutputFrameRate); 435 436 return B_OK; 437 } 438 439 440 status_t 441 AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat) 442 { 443 TRACE("AVCodecDecoder::_NegotiateVideoOutputFormat()\n"); 444 445 TRACE(" requested video format 0x%x\n", 446 inOutFormat->u.raw_video.display.format); 447 448 _ApplyEssentialVideoContainerPropertiesToContext(); 449 // This makes video formats play that encode the video properties in 450 // the video container (e.g. WMV) and not in the video frames 451 // themself (e.g. MPEG2). 452 // Note: Doing this step unconditionally is OK, because the first call 453 // to _DecodeNextVideoFrame() will update the essential video format 454 // properties accordingly regardless of the settings here. 455 456 bool codecCanHandleIncompleteFrames 457 = (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0; 458 if (codecCanHandleIncompleteFrames) { 459 // Expect and handle video frames to be splitted across consecutive 460 // data chunks. 461 fContext->flags |= CODEC_FLAG_TRUNCATED; 462 } 463 464 // close any previous instance 465 if (fCodecInitDone) { 466 fCodecInitDone = false; 467 avcodec_close(fContext); 468 } 469 470 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 471 fCodecInitDone = true; 472 else { 473 TRACE("avcodec_open() failed to init codec!\n"); 474 return B_ERROR; 475 } 476 477 // Make MediaPlayer happy (if not in rgb32 screen depth and no overlay, 478 // it will only ask for YCbCr, which DrawBitmap doesn't handle, so the 479 // default colordepth is RGB32). 480 if (inOutFormat->u.raw_video.display.format == B_YCbCr422) 481 fOutputColorSpace = B_YCbCr422; 482 else 483 fOutputColorSpace = B_RGB32; 484 485 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 486 if (fSwsContext != NULL) 487 sws_freeContext(fSwsContext); 488 fSwsContext = NULL; 489 #else 490 fFormatConversionFunc = 0; 491 #endif 492 493 free(fChunkBuffer); 494 fChunkBuffer = NULL; 495 fChunkBufferSize = 0; 496 497 _ResetTempPacket(); 498 499 status_t statusOfDecodingFirstFrame = _DecodeNextVideoFrame(); 500 if (statusOfDecodingFirstFrame != B_OK) { 501 TRACE("[v] decoding first video frame failed\n"); 502 return B_ERROR; 503 } 504 505 // Note: fSwsContext / fFormatConversionFunc should have been initialized 506 // by first call to _DecodeNextVideoFrame() above. 507 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 508 if (fSwsContext == NULL) { 509 TRACE("No SWS Scale context or decoder has not set the pixel format " 510 "yet!\n"); 511 } 512 #else 513 if (fFormatConversionFunc == NULL) { 514 TRACE("no pixel format conversion function found or decoder has " 515 "not set the pixel format yet!\n"); 516 } 517 #endif 518 519 inOutFormat->type = B_MEDIA_RAW_VIDEO; 520 inOutFormat->require_flags = 0; 521 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 522 inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output; 523 inOutFormat->u.raw_video.interlace = 1; 524 // Progressive (non-interlaced) video frames are delivered 525 inOutFormat->u.raw_video.first_active 526 = fHeader.u.raw_video.first_active_line; 527 inOutFormat->u.raw_video.last_active = fHeader.u.raw_video.line_count; 528 inOutFormat->u.raw_video.pixel_width_aspect 529 = fHeader.u.raw_video.pixel_width_aspect; 530 inOutFormat->u.raw_video.pixel_height_aspect 531 = fHeader.u.raw_video.pixel_height_aspect; 532 #if 0 533 // This was added by Colin Günther in order to handle streams with a 534 // variable frame rate. fOutputFrameRate is computed from the stream 535 // time_base, but it actually assumes a timebase equal to the FPS. As far 536 // as I can see, a stream with a variable frame rate would have a higher 537 // resolution time_base and increment the pts (presentation time) of each 538 // frame by a value bigger than one. 539 // 540 // Fixed rate stream: 541 // time_base = 1/50s, frame PTS = 1, 2, 3... (for 50Hz) 542 // 543 // Variable rate stream: 544 // time_base = 1/300s, frame PTS = 6, 12, 18, ... (for 50Hz) 545 // time_base = 1/300s, frame PTS = 5, 10, 15, ... (for 60Hz) 546 // 547 // The fOutputFrameRate currently does not take this into account and 548 // ignores the PTS. This results in playing the above sample at 300Hz 549 // instead of 50 or 60. 550 // 551 // However, comparing the PTS for two consecutive implies we have already 552 // decoded 2 frames, which may not be the case when this method is first 553 // called. 554 inOutFormat->u.raw_video.field_rate = fOutputFrameRate; 555 // Was calculated by first call to _DecodeNextVideoFrame() 556 #endif 557 inOutFormat->u.raw_video.display.format = fOutputColorSpace; 558 inOutFormat->u.raw_video.display.line_width 559 = fHeader.u.raw_video.display_line_width; 560 inOutFormat->u.raw_video.display.line_count 561 = fHeader.u.raw_video.display_line_count; 562 inOutFormat->u.raw_video.display.bytes_per_row 563 = fHeader.u.raw_video.bytes_per_row; 564 565 #ifdef TRACE_AV_CODEC 566 char buffer[1024]; 567 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 568 TRACE("[v] outFormat = %s\n", buffer); 569 TRACE(" returned video format 0x%x\n", 570 inOutFormat->u.raw_video.display.format); 571 #endif 572 573 return B_OK; 574 } 575 576 577 /*! \brief Fills the outBuffer with one or more already decoded audio frames. 578 579 Besides the main duty described above, this method also fills out the other 580 output parameters as documented below. 581 582 \param outBuffer Pointer to the output buffer to copy the decoded audio 583 frames to. 584 \param outFrameCount Pointer to the output variable to assign the number of 585 copied audio frames (usually several audio frames at once). 586 \param mediaHeader Pointer to the output media header that contains the 587 properties of the decoded audio frame being the first in the outBuffer. 588 \param info Specifies additional decoding parameters. (Note: unused). 589 590 \returns B_OK Decoding audio frames succeeded. 591 \returns B_LAST_BUFFER_ERROR There are no more audio frames available. 592 \returns Other error codes 593 */ 594 status_t 595 AVCodecDecoder::_DecodeAudio(void* outBuffer, int64* outFrameCount, 596 media_header* mediaHeader, media_decode_info* info) 597 { 598 TRACE_AUDIO("AVCodecDecoder::_DecodeAudio(audio start_time %.6fs)\n", 599 mediaHeader->start_time / 1000000.0); 600 601 status_t audioDecodingStatus 602 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextAudioFrame(); 603 604 if (audioDecodingStatus != B_OK) 605 return audioDecodingStatus; 606 607 *outFrameCount = fDecodedDataSizeInBytes / fOutputFrameSize; 608 *mediaHeader = fHeader; 609 memcpy(outBuffer, fDecodedData, fDecodedDataSizeInBytes); 610 611 fDecodedDataSizeInBytes = 0; 612 613 return B_OK; 614 } 615 616 617 /*! \brief Fills the outBuffer with an already decoded video frame. 618 619 Besides the main duty described above, this method also fills out the other 620 output parameters as documented below. 621 622 \param outBuffer Pointer to the output buffer to copy the decoded video 623 frame to. 624 \param outFrameCount Pointer to the output variable to assign the number of 625 copied video frames (usually one video frame). 626 \param mediaHeader Pointer to the output media header that contains the 627 decoded video frame properties. 628 \param info Specifies additional decoding parameters. (Note: unused). 629 630 \returns B_OK Decoding a video frame succeeded. 631 \returns B_LAST_BUFFER_ERROR There are no more video frames available. 632 \returns Other error codes 633 */ 634 status_t 635 AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount, 636 media_header* mediaHeader, media_decode_info* info) 637 { 638 status_t videoDecodingStatus 639 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextVideoFrame(); 640 641 if (videoDecodingStatus != B_OK) 642 return videoDecodingStatus; 643 644 *outFrameCount = 1; 645 *mediaHeader = fHeader; 646 memcpy(outBuffer, fDecodedData, mediaHeader->size_used); 647 648 fDecodedDataSizeInBytes = 0; 649 650 return B_OK; 651 } 652 653 654 /*! \brief Decodes next audio frame. 655 656 We decode at least one audio frame into fDecodedData. To achieve this goal, 657 we might need to request several chunks of encoded data resulting in a 658 variable execution time of this function. 659 660 The length of the decoded audio frame(s) is stored in 661 fDecodedDataSizeInBytes. If this variable is greater than zero you can 662 assert that all audio frames in fDecodedData are valid. 663 664 It is assumed that the number of expected audio frames is stored in 665 fOutputFrameCount. So _DecodeNextAudioFrame() must be called only after 666 fOutputFrameCount has been set. 667 668 Note: fOutputFrameCount contains the maximum number of frames a caller 669 of BMediaDecoder::Decode() expects to receive. There is a direct 670 relationship between fOutputFrameCount and the buffer size a caller of 671 BMediaDecoder::Decode() will provide so we make sure to respect this limit 672 for fDecodedDataSizeInBytes. 673 674 On return with status code B_OK the following conditions hold true: 675 1. fDecodedData contains as much audio frames as the caller of 676 BMediaDecoder::Decode() expects. 677 2. fDecodedData contains lesser audio frames as the caller of 678 BMediaDecoder::Decode() expects only when one of the following 679 conditions hold true: 680 i No more audio frames left. Consecutive calls to 681 _DecodeNextAudioFrame() will then result in the return of 682 status code B_LAST_BUFFER_ERROR. 683 ii TODO: A change in the size of the audio frames. 684 3. fHeader is populated with the audio frame properties of the first 685 audio frame in fDecodedData. Especially the start_time field of 686 fHeader relates to that first audio frame. Start times of 687 consecutive audio frames in fDecodedData have to be calculated 688 manually (using the frame rate and the frame duration) if the 689 caller needs them. 690 691 TODO: Handle change of channel_count. Such a change results in a change of 692 the audio frame size and thus has different buffer requirements. 693 The most sane approach for implementing this is to return the audio frames 694 that were still decoded with the previous channel_count and inform the 695 client of BMediaDecoder::Decode() about the change so that it can adapt to 696 it. Furthermore we need to adapt our fDecodedData to the new buffer size 697 requirements accordingly. 698 699 \returns B_OK when we successfully decoded enough audio frames 700 \returns B_LAST_BUFFER_ERROR when there are no more audio frames available. 701 \returns Other Errors 702 */ 703 status_t 704 AVCodecDecoder::_DecodeNextAudioFrame() 705 { 706 assert(fTempPacket.size >= 0); 707 assert(fDecodedDataSizeInBytes == 0); 708 // _DecodeNextAudioFrame needs to be called on empty fDecodedData only! 709 // If this assert holds wrong we have a bug somewhere. 710 711 status_t resetStatus = _ResetRawDecodedAudio(); 712 if (resetStatus != B_OK) 713 return resetStatus; 714 715 while (fRawDecodedAudio->nb_samples < fOutputFrameCount) { 716 _CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow(); 717 718 bool decodedDataBufferHasData = fDecodedDataBufferSize > 0; 719 if (decodedDataBufferHasData) { 720 _MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes(); 721 continue; 722 } 723 724 status_t decodeAudioChunkStatus = _DecodeNextAudioFrameChunk(); 725 if (decodeAudioChunkStatus != B_OK) 726 return decodeAudioChunkStatus; 727 } 728 729 fFrame += fRawDecodedAudio->nb_samples; 730 fDecodedDataSizeInBytes = fRawDecodedAudio->linesize[0]; 731 732 _UpdateMediaHeaderForAudioFrame(); 733 734 #ifdef DEBUG 735 dump_ffframe_audio(fRawDecodedAudio, "ffaudi"); 736 #endif 737 738 TRACE_AUDIO(" frame count: %ld current: %lld\n", 739 fRawDecodedAudio->nb_samples, fFrame); 740 741 return B_OK; 742 } 743 744 745 /*! \brief Applies all essential audio input properties to fContext that were 746 passed to AVCodecDecoder when Setup() was called. 747 748 Note: This function must be called before the AVCodec is opened via 749 avcodec_open2(). Otherwise the behaviour of FFMPEG's audio decoding 750 function avcodec_decode_audio4() is undefined. 751 752 Essential properties applied from fInputFormat.u.encoded_audio: 753 - bit_rate copied to fContext->bit_rate 754 - frame_size copied to fContext->frame_size 755 - output.format converted to fContext->sample_fmt 756 - output.frame_rate copied to fContext->sample_rate 757 - output.channel_count copied to fContext->channels 758 759 Other essential properties being applied: 760 - fBlockAlign to fContext->block_align 761 - fExtraData to fContext->extradata 762 - fExtraDataSize to fContext->extradata_size 763 764 TODO: Either the following documentation section should be removed or this 765 TODO when it is clear whether fInputFormat.MetaData() and 766 fInputFormat.MetaDataSize() have to be applied to fContext. See the related 767 TODO in the method implementation. 768 Only applied when fInputFormat.MetaDataSize() is greater than zero: 769 - fInputFormat.MetaData() to fContext->extradata 770 - fInputFormat.MetaDataSize() to fContext->extradata_size 771 */ 772 void 773 AVCodecDecoder::_ApplyEssentialAudioContainerPropertiesToContext() 774 { 775 media_encoded_audio_format containerProperties 776 = fInputFormat.u.encoded_audio; 777 778 fContext->bit_rate 779 = static_cast<int>(containerProperties.bit_rate); 780 fContext->frame_size 781 = static_cast<int>(containerProperties.frame_size); 782 ConvertRawAudioFormatToAVSampleFormat( 783 containerProperties.output.format, fContext->sample_fmt); 784 #if LIBAVCODEC_VERSION_INT > ((52 << 16) | (114 << 8)) 785 ConvertRawAudioFormatToAVSampleFormat( 786 containerProperties.output.format, fContext->request_sample_fmt); 787 #endif 788 fContext->sample_rate 789 = static_cast<int>(containerProperties.output.frame_rate); 790 fContext->channels 791 = static_cast<int>(containerProperties.output.channel_count); 792 // Check that channel count is not still a wild card! 793 if (fContext->channels == 0) { 794 TRACE(" channel_count still a wild-card, assuming stereo.\n"); 795 fContext->channels = 2; 796 } 797 798 fContext->block_align = fBlockAlign; 799 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 800 fContext->extradata_size = fExtraDataSize; 801 802 // TODO: This probably needs to go away, there is some misconception 803 // about extra data / info buffer and meta data. See 804 // Reader::GetStreamInfo(). The AVFormatReader puts extradata and 805 // extradata_size into media_format::MetaData(), but used to ignore 806 // the infoBuffer passed to GetStreamInfo(). I think this may be why 807 // the code below was added. 808 if (fInputFormat.MetaDataSize() > 0) { 809 fContext->extradata = static_cast<uint8_t*>( 810 const_cast<void*>(fInputFormat.MetaData())); 811 fContext->extradata_size = fInputFormat.MetaDataSize(); 812 } 813 814 TRACE(" bit_rate %d, sample_rate %d, channels %d, block_align %d, " 815 "extradata_size %d\n", fContext->bit_rate, fContext->sample_rate, 816 fContext->channels, fContext->block_align, fContext->extradata_size); 817 } 818 819 820 /*! \brief Resets important fields in fRawDecodedVideo to their default values. 821 822 Note: Also initializes fDecodedData if not done already. 823 824 \returns B_OK Resetting successfully completed. 825 \returns B_NO_MEMORY No memory left for correct operation. 826 */ 827 status_t 828 AVCodecDecoder::_ResetRawDecodedAudio() 829 { 830 if (fDecodedData == NULL) { 831 size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize; 832 fDecodedData 833 = static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData)); 834 } 835 if (fDecodedData == NULL) 836 return B_NO_MEMORY; 837 838 fRawDecodedAudio->data[0] = fDecodedData; 839 fRawDecodedAudio->linesize[0] = 0; 840 fRawDecodedAudio->format = AV_SAMPLE_FMT_NONE; 841 fRawDecodedAudio->pkt_dts = AV_NOPTS_VALUE; 842 fRawDecodedAudio->nb_samples = 0; 843 memset(fRawDecodedAudio->opaque, 0, sizeof(avformat_codec_context)); 844 845 return B_OK; 846 } 847 848 849 /*! \brief Checks fDecodedDataBufferSize and fTempPacket for invalid values, 850 reports them and assigns valid values. 851 852 Note: This method is intended to be called before any code is executed that 853 deals with moving, loading or decoding any audio frames. 854 */ 855 void 856 AVCodecDecoder::_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow() 857 { 858 if (fDecodedDataBufferSize < 0) { 859 fprintf(stderr, "Decoding read past the end of the decoded data " 860 "buffer! %" B_PRId32 "\n", fDecodedDataBufferSize); 861 fDecodedDataBufferSize = 0; 862 } 863 if (fTempPacket.size < 0) { 864 fprintf(stderr, "Decoding read past the end of the temp packet! %d\n", 865 fTempPacket.size); 866 fTempPacket.size = 0; 867 } 868 } 869 870 871 /*! \brief Moves audio frames from fDecodedDataBuffer to fRawDecodedAudio (and 872 thus to fDecodedData) and updates the start times of fRawDecodedAudio, 873 fDecodedDataBuffer and fTempPacket accordingly. 874 875 When moving audio frames to fRawDecodedAudio this method also makes sure 876 that the following important fields of fRawDecodedAudio are populated and 877 updated with correct values: 878 - fRawDecodedAudio->data[0]: Points to first free byte of fDecodedData 879 - fRawDecodedAudio->linesize[0]: Total size of frames in fDecodedData 880 - fRawDecodedAudio->format: Format of first audio frame 881 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 882 - fRawDecodedAudio->nb_samples: Number of audio frames 883 - fRawDecodedAudio->opaque: Contains the following fields for the first 884 audio frame: 885 - channels: Channel count of first audio frame 886 - sample_rate: Frame rate of first audio frame 887 888 This function assumes to be called only when the following assumptions 889 hold true: 890 1. There are decoded audio frames available in fDecodedDataBuffer 891 meaning that fDecodedDataBufferSize is greater than zero. 892 2. There is space left in fRawDecodedAudio to move some audio frames 893 in. This means that fRawDecodedAudio has lesser audio frames than 894 the maximum allowed (specified by fOutputFrameCount). 895 3. The audio frame rate is known so that we can calculate the time 896 range (covered by the moved audio frames) to update the start times 897 accordingly. 898 4. The field fRawDecodedAudio->opaque points to a memory block 899 representing a structure of type avformat_codec_context. 900 901 After this function returns the caller can safely make the following 902 assumptions: 903 1. The number of decoded audio frames in fDecodedDataBuffer is 904 decreased though it may still be greater then zero. 905 2. The number of frames in fRawDecodedAudio has increased and all 906 important fields are updated (see listing above). 907 3. Start times of fDecodedDataBuffer and fTempPacket were increased 908 with the time range covered by the moved audio frames. 909 910 Note: This function raises an exception (by calling the debugger), when 911 fDecodedDataBufferSize is not a multiple of fOutputFrameSize. 912 */ 913 void 914 AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes() 915 { 916 assert(fDecodedDataBufferSize > 0); 917 assert(fRawDecodedAudio->nb_samples < fOutputFrameCount); 918 assert(fOutputFrameRate > 0); 919 920 int32 outFrames = fOutputFrameCount - fRawDecodedAudio->nb_samples; 921 int32 inFrames = fDecodedDataBufferSize; 922 923 int32 frames = min_c(outFrames, inFrames); 924 if (frames == 0) 925 debugger("fDecodedDataBufferSize not multiple of frame size!"); 926 927 // Some decoders do not support format conversion on themselves, or use 928 // "planar" audio (each channel separated instead of interleaved samples). 929 // In that case, we use swresample to convert the data 930 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 931 const uint8_t* ptr[8]; 932 for (int i = 0; i < 8; i++) { 933 if (fDecodedDataBuffer->data[i] == NULL) 934 ptr[i] = NULL; 935 else 936 ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset; 937 } 938 939 // When there are more input frames than space in the output buffer, 940 // we could feed everything to swr and it would buffer the extra data. 941 // However, there is no easy way to flush that data without feeding more 942 // input, and it makes our timestamp computations fail. 943 // So, we feed only as much frames as we can get out, and handle the 944 // buffering ourselves. 945 // TODO Ideally, we should try to size our output buffer so that it can 946 // always hold all the output (swr provides helper functions for this) 947 inFrames = frames; 948 frames = swr_convert(fResampleContext, fRawDecodedAudio->data, 949 outFrames, ptr, inFrames); 950 951 if (frames < 0) 952 debugger("resampling failed"); 953 } else { 954 memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0] 955 + fDecodedDataBufferOffset, frames * fOutputFrameSize); 956 outFrames = frames; 957 inFrames = frames; 958 } 959 960 size_t remainingSize = inFrames * fInputFrameSize; 961 size_t decodedSize = outFrames * fOutputFrameSize; 962 fDecodedDataBufferSize -= inFrames; 963 964 bool firstAudioFramesCopiedToRawDecodedAudio 965 = fRawDecodedAudio->data[0] != fDecodedData; 966 if (!firstAudioFramesCopiedToRawDecodedAudio) { 967 fRawDecodedAudio->format = fDecodedDataBuffer->format; 968 fRawDecodedAudio->pkt_dts = fDecodedDataBuffer->pkt_dts; 969 970 avformat_codec_context* codecContext 971 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 972 codecContext->channels = fContext->channels; 973 codecContext->sample_rate = fContext->sample_rate; 974 } 975 976 fRawDecodedAudio->data[0] += decodedSize; 977 fRawDecodedAudio->linesize[0] += decodedSize; 978 fRawDecodedAudio->nb_samples += outFrames; 979 980 fDecodedDataBufferOffset += remainingSize; 981 982 // Update start times accordingly 983 bigtime_t framesTimeInterval = static_cast<bigtime_t>( 984 (1000000LL * frames) / fOutputFrameRate); 985 fDecodedDataBuffer->pkt_dts += framesTimeInterval; 986 // Start time of buffer is updated in case that it contains 987 // more audio frames to move. 988 fTempPacket.dts += framesTimeInterval; 989 // Start time of fTempPacket is updated in case the fTempPacket 990 // contains more audio frames to decode. 991 } 992 993 994 /*! \brief Decodes next chunk of audio frames. 995 996 This method handles all the details of loading the input buffer 997 (fChunkBuffer) at the right time and of calling FFMPEG often engouh until 998 some audio frames have been decoded. 999 1000 FFMPEG decides how much audio frames belong to a chunk. Because of that 1001 it is very likely that _DecodeNextAudioFrameChunk has to be called several 1002 times to decode enough audio frames to please the caller of 1003 BMediaDecoder::Decode(). 1004 1005 This function assumes to be called only when the following assumptions 1006 hold true: 1007 1. fDecodedDataBufferSize equals zero. 1008 1009 After this function returns successfully the caller can safely make the 1010 following assumptions: 1011 1. fDecodedDataBufferSize is greater than zero. 1012 2. fDecodedDataBufferOffset is set to zero. 1013 3. fDecodedDataBuffer contains audio frames. 1014 1015 1016 \returns B_OK on successfully decoding one audio frame chunk. 1017 \returns B_LAST_BUFFER_ERROR No more audio frame chunks available. From 1018 this point on further calls will return this same error. 1019 \returns B_ERROR Decoding failed 1020 */ 1021 status_t 1022 AVCodecDecoder::_DecodeNextAudioFrameChunk() 1023 { 1024 assert(fDecodedDataBufferSize == 0); 1025 1026 while (fDecodedDataBufferSize == 0) { 1027 status_t loadingChunkStatus 1028 = _LoadNextChunkIfNeededAndAssignStartTime(); 1029 if (loadingChunkStatus != B_OK) 1030 return loadingChunkStatus; 1031 1032 status_t decodingStatus 1033 = _DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer(); 1034 if (decodingStatus != B_OK) { 1035 // Assume the audio decoded until now is broken so replace it with 1036 // some silence. 1037 memset(fDecodedData, 0, fRawDecodedAudio->linesize[0]); 1038 1039 if (!fAudioDecodeError) { 1040 // Report failure if not done already 1041 int32 chunkBufferOffset = fTempPacket.data - fChunkBuffer; 1042 printf("########### audio decode error, " 1043 "fTempPacket.size %d, fChunkBuffer data offset %" B_PRId32 1044 "\n", fTempPacket.size, chunkBufferOffset); 1045 fAudioDecodeError = true; 1046 } 1047 1048 // Assume that next audio chunk can be decoded so keep decoding. 1049 continue; 1050 } 1051 1052 fAudioDecodeError = false; 1053 } 1054 1055 return B_OK; 1056 } 1057 1058 1059 /*! \brief Tries to decode at least one audio frame and store it in the 1060 fDecodedDataBuffer. 1061 1062 This function assumes to be called only when the following assumptions 1063 hold true: 1064 1. fDecodedDataBufferSize equals zero. 1065 2. fTempPacket.size is greater than zero. 1066 1067 After this function returns successfully the caller can safely make the 1068 following assumptions: 1069 1. fDecodedDataBufferSize is greater than zero in the common case. 1070 Also see "Note" below. 1071 2. fTempPacket was updated to exclude the data chunk that was consumed 1072 by avcodec_decode_audio4(). 1073 3. fDecodedDataBufferOffset is set to zero. 1074 1075 When this function failed to decode at least one audio frame due to a 1076 decoding error the caller can safely make the following assumptions: 1077 1. fDecodedDataBufferSize equals zero. 1078 2. fTempPacket.size equals zero. 1079 1080 Note: It is possible that there wasn't any audio frame decoded into 1081 fDecodedDataBuffer after calling this function. This is normal and can 1082 happen when there was either a decoding error or there is some decoding 1083 delay in FFMPEGs audio decoder. Another call to this method is totally 1084 safe and is even expected as long as the calling assumptions hold true. 1085 1086 \returns B_OK Decoding successful. fDecodedDataBuffer contains decoded 1087 audio frames only when fDecodedDataBufferSize is greater than zero. 1088 fDecodedDataBuffer is empty, when avcodec_decode_audio4() didn't return 1089 audio frames due to delayed decoding or incomplete audio frames. 1090 \returns B_ERROR Decoding failed thus fDecodedDataBuffer contains no audio 1091 frames. 1092 */ 1093 status_t 1094 AVCodecDecoder::_DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer() 1095 { 1096 assert(fDecodedDataBufferSize == 0); 1097 assert(fTempPacket.size > 0); 1098 1099 memset(fDecodedDataBuffer, 0, sizeof(AVFrame)); 1100 av_frame_unref(fDecodedDataBuffer); 1101 fDecodedDataBufferOffset = 0; 1102 int gotAudioFrame = 0; 1103 1104 int encodedDataSizeInBytes = avcodec_decode_audio4(fContext, 1105 fDecodedDataBuffer, &gotAudioFrame, &fTempPacket); 1106 if (encodedDataSizeInBytes <= 0) { 1107 // Error or failure to produce decompressed output. 1108 // Skip the temp packet data entirely. 1109 fTempPacket.size = 0; 1110 return B_ERROR; 1111 } 1112 1113 fTempPacket.data += encodedDataSizeInBytes; 1114 fTempPacket.size -= encodedDataSizeInBytes; 1115 1116 bool gotNoAudioFrame = gotAudioFrame == 0; 1117 if (gotNoAudioFrame) 1118 return B_OK; 1119 1120 fDecodedDataBufferSize = fDecodedDataBuffer->nb_samples; 1121 if (fDecodedDataBufferSize < 0) 1122 fDecodedDataBufferSize = 0; 1123 1124 return B_OK; 1125 } 1126 1127 1128 /*! \brief Updates relevant fields of the class member fHeader with the 1129 properties of the most recently decoded audio frame. 1130 1131 The following fields of fHeader are updated: 1132 - fHeader.type 1133 - fHeader.file_pos 1134 - fHeader.orig_size 1135 - fHeader.start_time 1136 - fHeader.size_used 1137 - fHeader.u.raw_audio.frame_rate 1138 - fHeader.u.raw_audio.channel_count 1139 1140 It is assumed that this function is called only when the following asserts 1141 hold true: 1142 1. We actually got a new audio frame decoded by the audio decoder. 1143 2. fHeader wasn't updated for the new audio frame yet. You MUST call 1144 this method only once per decoded audio frame. 1145 3. fRawDecodedAudio's fields relate to the first audio frame contained 1146 in fDecodedData. Especially the following fields are of importance: 1147 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 1148 - fRawDecodedAudio->opaque: Contains the following fields for 1149 the first audio frame: 1150 - channels: Channel count of first audio frame 1151 - sample_rate: Frame rate of first audio frame 1152 */ 1153 void 1154 AVCodecDecoder::_UpdateMediaHeaderForAudioFrame() 1155 { 1156 fHeader.type = B_MEDIA_RAW_AUDIO; 1157 fHeader.file_pos = 0; 1158 fHeader.orig_size = 0; 1159 fHeader.start_time = fRawDecodedAudio->pkt_dts; 1160 fHeader.size_used = fRawDecodedAudio->linesize[0]; 1161 1162 avformat_codec_context* codecContext 1163 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 1164 fHeader.u.raw_audio.channel_count = codecContext->channels; 1165 fHeader.u.raw_audio.frame_rate = codecContext->sample_rate; 1166 } 1167 1168 1169 /*! \brief Decodes next video frame. 1170 1171 We decode exactly one video frame into fDecodedData. To achieve this goal, 1172 we might need to request several chunks of encoded data resulting in a 1173 variable execution time of this function. 1174 1175 The length of the decoded video frame is stored in 1176 fDecodedDataSizeInBytes. If this variable is greater than zero, you can 1177 assert that there is a valid video frame available in fDecodedData. 1178 1179 The decoded video frame in fDecodedData has color space conversion and 1180 deinterlacing already applied. 1181 1182 To every decoded video frame there is a media_header populated in 1183 fHeader, containing the corresponding video frame properties. 1184 1185 Normally every decoded video frame has a start_time field populated in the 1186 associated fHeader, that determines the presentation time of the frame. 1187 This relationship will only hold true, when each data chunk that is 1188 provided via GetNextChunk() contains data for exactly one encoded video 1189 frame (one complete frame) - not more and not less. 1190 1191 We can decode data chunks that contain partial video frame data, too. In 1192 that case, you cannot trust the value of the start_time field in fHeader. 1193 We simply have no logic in place to establish a meaningful relationship 1194 between an incomplete frame and the start time it should be presented. 1195 Though this might change in the future. 1196 1197 We can decode data chunks that contain more than one video frame, too. In 1198 that case, you cannot trust the value of the start_time field in fHeader. 1199 We simply have no logic in place to track the start_time across multiple 1200 video frames. So a meaningful relationship between the 2nd, 3rd, ... frame 1201 and the start time it should be presented isn't established at the moment. 1202 Though this might change in the future. 1203 1204 More over the fOutputFrameRate variable is updated for every decoded video 1205 frame. 1206 1207 On first call the member variables fSwsContext / fFormatConversionFunc are 1208 initialized. 1209 1210 \returns B_OK when we successfully decoded one video frame 1211 \returns B_LAST_BUFFER_ERROR when there are no more video frames available. 1212 \returns B_NO_MEMORY when we have no memory left for correct operation. 1213 \returns Other Errors 1214 */ 1215 status_t 1216 AVCodecDecoder::_DecodeNextVideoFrame() 1217 { 1218 #if 0 1219 // Well, I heard this was not supposed to happen, but it does 1220 // (for example with http://thud.us/videos/misc/xvid-samples/flyby-divx.avi 1221 // see #11409). Since that video otherwise plays fine when removing the 1222 // assert, I'm assuming we are being overcautious here and commenting it 1223 // out. 1224 assert(fTempPacket.size >= 0); 1225 #endif 1226 1227 while (true) { 1228 status_t loadingChunkStatus 1229 = _LoadNextChunkIfNeededAndAssignStartTime(); 1230 if (loadingChunkStatus == B_LAST_BUFFER_ERROR) 1231 return _FlushOneVideoFrameFromDecoderBuffer(); 1232 if (loadingChunkStatus != B_OK) { 1233 TRACE("AVCodecDecoder::_DecodeNextVideoFrame(): error from " 1234 "GetNextChunk(): %s\n", strerror(loadingChunkStatus)); 1235 return loadingChunkStatus; 1236 } 1237 1238 #if DO_PROFILING 1239 bigtime_t startTime = system_time(); 1240 #endif 1241 1242 // NOTE: In the FFMPEG 0.10.2 code example decoding_encoding.c, the 1243 // length returned by avcodec_decode_video2() is used to update the 1244 // packet buffer size (here it is fTempPacket.size). This way the 1245 // packet buffer is allowed to contain incomplete frames so we are 1246 // required to buffer the packets between different calls to 1247 // _DecodeNextVideoFrame(). 1248 int gotVideoFrame = 0; 1249 int encodedDataSizeInBytes = avcodec_decode_video2(fContext, 1250 fRawDecodedPicture, &gotVideoFrame, &fTempPacket); 1251 if (encodedDataSizeInBytes < 0) { 1252 TRACE("[v] AVCodecDecoder: ignoring error in decoding frame %lld:" 1253 " %d\n", fFrame, encodedDataSizeInBytes); 1254 // NOTE: An error from avcodec_decode_video2() is ignored by the 1255 // FFMPEG 0.10.2 example decoding_encoding.c. Only the packet 1256 // buffers are flushed accordingly 1257 fTempPacket.data = NULL; 1258 fTempPacket.size = 0; 1259 continue; 1260 } 1261 1262 fTempPacket.size -= encodedDataSizeInBytes; 1263 fTempPacket.data += encodedDataSizeInBytes; 1264 1265 bool gotNoVideoFrame = gotVideoFrame == 0; 1266 if (gotNoVideoFrame) { 1267 TRACE("frame %lld - no picture yet, encodedDataSizeInBytes: %d, " 1268 "chunk size: %ld\n", fFrame, encodedDataSizeInBytes, 1269 fChunkBufferSize); 1270 continue; 1271 } 1272 1273 #if DO_PROFILING 1274 bigtime_t formatConversionStart = system_time(); 1275 #endif 1276 1277 status_t handleStatus = _HandleNewVideoFrameAndUpdateSystemState(); 1278 if (handleStatus != B_OK) 1279 return handleStatus; 1280 1281 #if DO_PROFILING 1282 bigtime_t doneTime = system_time(); 1283 decodingTime += formatConversionStart - startTime; 1284 conversionTime += doneTime - formatConversionStart; 1285 profileCounter++; 1286 if (!(fFrame % 5)) { 1287 printf("[v] profile: d1 = %lld, d2 = %lld (%lld) required %Ld\n", 1288 decodingTime / profileCounter, conversionTime / profileCounter, 1289 fFrame, bigtime_t(1000000LL / fOutputFrameRate)); 1290 decodingTime = 0; 1291 conversionTime = 0; 1292 profileCounter = 0; 1293 } 1294 #endif 1295 return B_OK; 1296 } 1297 } 1298 1299 1300 /*! \brief Applies all essential video input properties to fContext that were 1301 passed to AVCodecDecoder when Setup() was called. 1302 1303 Note: This function must be called before the AVCodec is opened via 1304 avcodec_open2(). Otherwise the behaviour of FFMPEG's video decoding 1305 function avcodec_decode_video2() is undefined. 1306 1307 Essential properties applied from fInputFormat.u.encoded_video.output: 1308 - display.line_width copied to fContext->width 1309 - display.line_count copied to fContext->height 1310 - pixel_width_aspect and pixel_height_aspect converted to 1311 fContext->sample_aspect_ratio 1312 - field_rate converted to fContext->time_base and 1313 fContext->ticks_per_frame 1314 1315 Other essential properties being applied: 1316 - fExtraData to fContext->extradata 1317 - fExtraDataSize to fContext->extradata_size 1318 */ 1319 void 1320 AVCodecDecoder::_ApplyEssentialVideoContainerPropertiesToContext() 1321 { 1322 media_raw_video_format containerProperties 1323 = fInputFormat.u.encoded_video.output; 1324 1325 fContext->width = containerProperties.display.line_width; 1326 fContext->height = containerProperties.display.line_count; 1327 1328 if (containerProperties.pixel_width_aspect > 0 1329 && containerProperties.pixel_height_aspect > 0) { 1330 ConvertVideoAspectWidthAndHeightToAVCodecContext( 1331 containerProperties.pixel_width_aspect, 1332 containerProperties.pixel_height_aspect, *fContext); 1333 } 1334 1335 if (containerProperties.field_rate > 0.0) { 1336 ConvertVideoFrameRateToAVCodecContext(containerProperties.field_rate, 1337 *fContext); 1338 } 1339 1340 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 1341 fContext->extradata_size = fExtraDataSize; 1342 } 1343 1344 1345 /*! \brief Loads the next chunk into fChunkBuffer and assigns it (including 1346 the start time) to fTempPacket but only if fTempPacket is empty. 1347 1348 \returns B_OK 1349 1. meaning: Next chunk is loaded. 1350 2. meaning: No need to load and assign anything. Proceed as usual. 1351 \returns B_LAST_BUFFER_ERROR No more chunks available. fChunkBuffer and 1352 fTempPacket are left untouched. 1353 \returns Other errors Caller should bail out because fChunkBuffer and 1354 fTempPacket are in unknown states. Normal operation cannot be 1355 guaranteed. 1356 */ 1357 status_t 1358 AVCodecDecoder::_LoadNextChunkIfNeededAndAssignStartTime() 1359 { 1360 if (fTempPacket.size > 0) 1361 return B_OK; 1362 1363 const void* chunkBuffer = NULL; 1364 size_t chunkBufferSize = 0; 1365 // In the case that GetNextChunk() returns an error fChunkBufferSize 1366 // should be left untouched. 1367 media_header chunkMediaHeader; 1368 1369 status_t getNextChunkStatus = GetNextChunk(&chunkBuffer, &chunkBufferSize, 1370 &chunkMediaHeader); 1371 if (getNextChunkStatus != B_OK) 1372 return getNextChunkStatus; 1373 1374 status_t chunkBufferPaddingStatus 1375 = _CopyChunkToChunkBufferAndAddPadding(chunkBuffer, chunkBufferSize); 1376 if (chunkBufferPaddingStatus != B_OK) 1377 return chunkBufferPaddingStatus; 1378 1379 fTempPacket.data = fChunkBuffer; 1380 fTempPacket.size = fChunkBufferSize; 1381 fTempPacket.dts = chunkMediaHeader.start_time; 1382 // Let FFMPEG handle the correct relationship between start_time and 1383 // decoded a/v frame. By doing so we are simply copying the way how it 1384 // is implemented in ffplay.c for video frames (for audio frames it 1385 // works, too, but isn't used by ffplay.c). 1386 // \see http://git.videolan.org/?p=ffmpeg.git;a=blob;f=ffplay.c;h=09623db374e5289ed20b7cc28c262c4375a8b2e4;hb=9153b33a742c4e2a85ff6230aea0e75f5a8b26c2#l1502 1387 // 1388 // FIXME: Research how to establish a meaningful relationship between 1389 // start_time and decoded a/v frame when the received chunk buffer 1390 // contains partial a/v frames. Maybe some data formats do contain time 1391 // stamps (ake pts / dts fields) that can be evaluated by FFMPEG. But 1392 // as long as I don't have such video data to test it, it makes no 1393 // sense trying to implement it. 1394 // 1395 // FIXME: Implement tracking start_time of video frames originating in 1396 // data chunks that encode more than one video frame at a time. In that 1397 // case on would increment the start_time for each consecutive frame of 1398 // such a data chunk (like it is done for audio frame decoding). But as 1399 // long as I don't have such video data to test it, it makes no sense 1400 // to implement it. 1401 1402 #ifdef LOG_STREAM_TO_FILE 1403 BFile* logFile = fIsAudio ? &sAudioStreamLogFile : &sVideoStreamLogFile; 1404 if (sDumpedPackets < 100) { 1405 logFile->Write(chunkBuffer, fChunkBufferSize); 1406 printf("wrote %ld bytes\n", fChunkBufferSize); 1407 sDumpedPackets++; 1408 } else if (sDumpedPackets == 100) 1409 logFile->Unset(); 1410 #endif 1411 1412 return B_OK; 1413 } 1414 1415 1416 /*! \brief Copies a chunk into fChunkBuffer and adds a "safety net" of 1417 additional memory as required by FFMPEG for input buffers to video 1418 decoders. 1419 1420 This is needed so that some decoders can read safely a predefined number of 1421 bytes at a time for performance optimization purposes. 1422 1423 The additional memory has a size of FF_INPUT_BUFFER_PADDING_SIZE as defined 1424 in avcodec.h. 1425 1426 Ownership of fChunkBuffer memory is with the class so it needs to be freed 1427 at the right times (on destruction, on seeking). 1428 1429 Also update fChunkBufferSize to reflect the size of the contained data 1430 (leaving out the padding). 1431 1432 \param chunk The chunk to copy. 1433 \param chunkSize Size of the chunk in bytes 1434 1435 \returns B_OK Padding was successful. You are responsible for releasing the 1436 allocated memory. fChunkBufferSize is set to chunkSize. 1437 \returns B_NO_MEMORY Padding failed. 1438 fChunkBuffer is set to NULL making it safe to call free() on it. 1439 fChunkBufferSize is set to 0 to reflect the size of fChunkBuffer. 1440 */ 1441 status_t 1442 AVCodecDecoder::_CopyChunkToChunkBufferAndAddPadding(const void* chunk, 1443 size_t chunkSize) 1444 { 1445 fChunkBuffer = static_cast<uint8_t*>(realloc(fChunkBuffer, 1446 chunkSize + FF_INPUT_BUFFER_PADDING_SIZE)); 1447 if (fChunkBuffer == NULL) { 1448 fChunkBufferSize = 0; 1449 return B_NO_MEMORY; 1450 } 1451 1452 memcpy(fChunkBuffer, chunk, chunkSize); 1453 memset(fChunkBuffer + chunkSize, 0, FF_INPUT_BUFFER_PADDING_SIZE); 1454 // Establish safety net, by zero'ing the padding area. 1455 1456 fChunkBufferSize = chunkSize; 1457 1458 return B_OK; 1459 } 1460 1461 1462 /*! \brief Executes all steps needed for a freshly decoded video frame. 1463 1464 \see _UpdateMediaHeaderForVideoFrame() and 1465 \see _DeinterlaceAndColorConvertVideoFrame() for when you are allowed to 1466 call this method. 1467 1468 \returns B_OK when video frame was handled successfully 1469 \returnb B_NO_MEMORY when no memory is left for correct operation. 1470 */ 1471 status_t 1472 AVCodecDecoder::_HandleNewVideoFrameAndUpdateSystemState() 1473 { 1474 _UpdateMediaHeaderForVideoFrame(); 1475 status_t postProcessStatus = _DeinterlaceAndColorConvertVideoFrame(); 1476 if (postProcessStatus != B_OK) 1477 return postProcessStatus; 1478 1479 ConvertAVCodecContextToVideoFrameRate(*fContext, fOutputFrameRate); 1480 1481 #ifdef DEBUG 1482 dump_ffframe_video(fRawDecodedPicture, "ffpict"); 1483 #endif 1484 1485 fFrame++; 1486 1487 return B_OK; 1488 } 1489 1490 1491 /*! \brief Flushes one video frame - if any - still buffered by the decoder. 1492 1493 Some FFMPEG decoder are buffering video frames. To retrieve those buffered 1494 frames the decoder needs to be told so. 1495 1496 The intended use of this method is to call it, once there are no more data 1497 chunks for decoding left. Reframed in other words: Once GetNextChunk() 1498 returns with status B_LAST_BUFFER_ERROR it is time to start flushing. 1499 1500 \returns B_OK Retrieved one video frame, handled it accordingly and updated 1501 the system state accordingly. 1502 There maybe more video frames left. So it is valid for the client of 1503 AVCodecDecoder to call it one more time. 1504 1505 \returns B_LAST_BUFFER_ERROR No video frame left. 1506 The client of the AVCodecDecoder should stop calling it now. 1507 1508 \returns B_NO_MEMORY No memory left for correct operation. 1509 */ 1510 status_t 1511 AVCodecDecoder::_FlushOneVideoFrameFromDecoderBuffer() 1512 { 1513 // Create empty fTempPacket to tell the video decoder it is time to flush 1514 fTempPacket.data = NULL; 1515 fTempPacket.size = 0; 1516 1517 int gotVideoFrame = 0; 1518 avcodec_decode_video2(fContext, fRawDecodedPicture, &gotVideoFrame, 1519 &fTempPacket); 1520 // We are only interested in complete frames now, so ignore the return 1521 // value. 1522 1523 bool gotNoVideoFrame = gotVideoFrame == 0; 1524 if (gotNoVideoFrame) { 1525 // video buffer is flushed successfully 1526 return B_LAST_BUFFER_ERROR; 1527 } 1528 1529 return _HandleNewVideoFrameAndUpdateSystemState(); 1530 } 1531 1532 1533 /*! \brief Updates relevant fields of the class member fHeader with the 1534 properties of the most recently decoded video frame. 1535 1536 It is assumed that this function is called only when the following asserts 1537 hold true: 1538 1. We actually got a new picture decoded by the video decoder. 1539 2. fHeader wasn't updated for the new picture yet. You MUST call this 1540 method only once per decoded video frame. 1541 3. This function MUST be called after 1542 _DeinterlaceAndColorConvertVideoFrame() as it relys on an updated 1543 fDecodedDataSizeInBytes. 1544 4. There will be at maximumn only one decoded video frame in our cache 1545 at any single point in time. Otherwise you couldn't tell to which 1546 cached decoded video frame the properties in fHeader relate to. 1547 5. AVCodecContext is still valid for this video frame (This is the case 1548 when this function is called after avcodec_decode_video2() and 1549 before the next call to avcodec_decode_video2(). 1550 */ 1551 void 1552 AVCodecDecoder::_UpdateMediaHeaderForVideoFrame() 1553 { 1554 fHeader.type = B_MEDIA_RAW_VIDEO; 1555 fHeader.file_pos = 0; 1556 fHeader.orig_size = 0; 1557 fHeader.start_time = fRawDecodedPicture->pkt_dts; 1558 fHeader.size_used = avpicture_get_size( 1559 colorspace_to_pixfmt(fOutputColorSpace), fRawDecodedPicture->width, 1560 fRawDecodedPicture->height); 1561 fHeader.u.raw_video.display_line_width = fRawDecodedPicture->width; 1562 fHeader.u.raw_video.display_line_count = fRawDecodedPicture->height; 1563 fHeader.u.raw_video.bytes_per_row 1564 = CalculateBytesPerRowWithColorSpaceAndVideoWidth(fOutputColorSpace, 1565 fRawDecodedPicture->width); 1566 fHeader.u.raw_video.field_gamma = 1.0; 1567 fHeader.u.raw_video.field_sequence = fFrame; 1568 fHeader.u.raw_video.field_number = 0; 1569 fHeader.u.raw_video.pulldown_number = 0; 1570 fHeader.u.raw_video.first_active_line = 1; 1571 fHeader.u.raw_video.line_count = fRawDecodedPicture->height; 1572 1573 ConvertAVCodecContextToVideoAspectWidthAndHeight(*fContext, 1574 fHeader.u.raw_video.pixel_width_aspect, 1575 fHeader.u.raw_video.pixel_height_aspect); 1576 1577 TRACE("[v] start_time=%02d:%02d.%02d field_sequence=%lu\n", 1578 int((fHeader.start_time / 60000000) % 60), 1579 int((fHeader.start_time / 1000000) % 60), 1580 int((fHeader.start_time / 10000) % 100), 1581 fHeader.u.raw_video.field_sequence); 1582 } 1583 1584 1585 /*! \brief This function applies deinterlacing (only if needed) and color 1586 conversion to the video frame in fRawDecodedPicture. 1587 1588 It is assumed that fRawDecodedPicture wasn't deinterlaced and color 1589 converted yet (otherwise this function behaves in unknown manners). 1590 1591 This function MUST be called after _UpdateMediaHeaderForVideoFrame() as it 1592 relys on the fHeader.size_used and fHeader.u.raw_video.bytes_per_row fields 1593 for correct operation 1594 1595 You should only call this function when you got a new picture decoded by 1596 the video decoder. 1597 1598 When this function finishes the postprocessed video frame will be available 1599 in fPostProcessedDecodedPicture and fDecodedData (fDecodedDataSizeInBytes 1600 will be set accordingly). 1601 1602 \returns B_OK video frame successfully deinterlaced and color converted. 1603 \returns B_NO_MEMORY Not enough memory available for correct operation. 1604 */ 1605 status_t 1606 AVCodecDecoder::_DeinterlaceAndColorConvertVideoFrame() 1607 { 1608 int displayWidth = fRawDecodedPicture->width; 1609 int displayHeight = fRawDecodedPicture->height; 1610 AVPicture deinterlacedPicture; 1611 bool useDeinterlacedPicture = false; 1612 1613 if (fRawDecodedPicture->interlaced_frame) { 1614 AVPicture rawPicture; 1615 rawPicture.data[0] = fRawDecodedPicture->data[0]; 1616 rawPicture.data[1] = fRawDecodedPicture->data[1]; 1617 rawPicture.data[2] = fRawDecodedPicture->data[2]; 1618 rawPicture.data[3] = fRawDecodedPicture->data[3]; 1619 rawPicture.linesize[0] = fRawDecodedPicture->linesize[0]; 1620 rawPicture.linesize[1] = fRawDecodedPicture->linesize[1]; 1621 rawPicture.linesize[2] = fRawDecodedPicture->linesize[2]; 1622 rawPicture.linesize[3] = fRawDecodedPicture->linesize[3]; 1623 1624 avpicture_alloc(&deinterlacedPicture, fContext->pix_fmt, displayWidth, 1625 displayHeight); 1626 1627 #if LIBAVCODEC_VERSION_INT < ((57 << 16) | (0 << 8)) 1628 if (avpicture_deinterlace(&deinterlacedPicture, &rawPicture, 1629 fContext->pix_fmt, displayWidth, displayHeight) < 0) { 1630 TRACE("[v] avpicture_deinterlace() - error\n"); 1631 } else 1632 useDeinterlacedPicture = true; 1633 #else 1634 // avpicture_deinterlace is gone 1635 // TODO: implement alternate deinterlace using avfilter 1636 TRACE("[v] avpicture_deinterlace() - not implemented\n"); 1637 #endif 1638 } 1639 1640 // Some decoders do not set pix_fmt until they have decoded 1 frame 1641 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1642 if (fSwsContext == NULL) { 1643 fSwsContext = sws_getContext(displayWidth, displayHeight, 1644 fContext->pix_fmt, displayWidth, displayHeight, 1645 colorspace_to_pixfmt(fOutputColorSpace), 1646 SWS_FAST_BILINEAR, NULL, NULL, NULL); 1647 } 1648 #else 1649 if (fFormatConversionFunc == NULL) { 1650 fFormatConversionFunc = resolve_colorspace(fOutputColorSpace, 1651 fContext->pix_fmt, displayWidth, displayHeight); 1652 } 1653 #endif 1654 1655 fDecodedDataSizeInBytes = fHeader.size_used; 1656 1657 if (fDecodedData == NULL) { 1658 const size_t kOptimalAlignmentForColorConversion = 32; 1659 posix_memalign(reinterpret_cast<void**>(&fDecodedData), 1660 kOptimalAlignmentForColorConversion, fDecodedDataSizeInBytes); 1661 } 1662 if (fDecodedData == NULL) 1663 return B_NO_MEMORY; 1664 1665 fPostProcessedDecodedPicture->data[0] = fDecodedData; 1666 fPostProcessedDecodedPicture->linesize[0] 1667 = fHeader.u.raw_video.bytes_per_row; 1668 1669 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1670 if (fSwsContext != NULL) { 1671 #else 1672 if (fFormatConversionFunc != NULL) { 1673 #endif 1674 if (useDeinterlacedPicture) { 1675 AVFrame deinterlacedFrame; 1676 deinterlacedFrame.data[0] = deinterlacedPicture.data[0]; 1677 deinterlacedFrame.data[1] = deinterlacedPicture.data[1]; 1678 deinterlacedFrame.data[2] = deinterlacedPicture.data[2]; 1679 deinterlacedFrame.data[3] = deinterlacedPicture.data[3]; 1680 deinterlacedFrame.linesize[0] 1681 = deinterlacedPicture.linesize[0]; 1682 deinterlacedFrame.linesize[1] 1683 = deinterlacedPicture.linesize[1]; 1684 deinterlacedFrame.linesize[2] 1685 = deinterlacedPicture.linesize[2]; 1686 deinterlacedFrame.linesize[3] 1687 = deinterlacedPicture.linesize[3]; 1688 1689 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1690 sws_scale(fSwsContext, deinterlacedFrame.data, 1691 deinterlacedFrame.linesize, 0, displayHeight, 1692 fPostProcessedDecodedPicture->data, 1693 fPostProcessedDecodedPicture->linesize); 1694 #else 1695 (*fFormatConversionFunc)(&deinterlacedFrame, 1696 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1697 #endif 1698 } else { 1699 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1700 sws_scale(fSwsContext, fRawDecodedPicture->data, 1701 fRawDecodedPicture->linesize, 0, displayHeight, 1702 fPostProcessedDecodedPicture->data, 1703 fPostProcessedDecodedPicture->linesize); 1704 #else 1705 (*fFormatConversionFunc)(fRawDecodedPicture, 1706 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1707 #endif 1708 } 1709 } 1710 1711 if (fRawDecodedPicture->interlaced_frame) 1712 avpicture_free(&deinterlacedPicture); 1713 1714 return B_OK; 1715 } 1716