1 /* 2 * Copyright (C) 2001 Carlos Hasan 3 * Copyright (C) 2001 François Revol 4 * Copyright (C) 2001 Axel Dörfler 5 * Copyright (C) 2004 Marcus Overhagen 6 * Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de> 7 * Copyright (C) 2014 Colin Günther <coling@gmx.de> 8 * Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk> 9 * 10 * All rights reserved. Distributed under the terms of the MIT License. 11 */ 12 13 //! libavcodec based decoder for Haiku 14 15 16 #include "AVCodecDecoder.h" 17 18 #include <new> 19 20 #include <assert.h> 21 #include <string.h> 22 23 #include <Bitmap.h> 24 #include <Debug.h> 25 26 #include "Utilities.h" 27 28 29 #undef TRACE 30 //#define TRACE_AV_CODEC 31 #ifdef TRACE_AV_CODEC 32 # define TRACE(x...) printf(x) 33 # define TRACE_AUDIO(x...) printf(x) 34 # define TRACE_VIDEO(x...) printf(x) 35 #else 36 # define TRACE(x...) 37 # define TRACE_AUDIO(x...) 38 # define TRACE_VIDEO(x...) 39 #endif 40 41 //#define LOG_STREAM_TO_FILE 42 #ifdef LOG_STREAM_TO_FILE 43 # include <File.h> 44 static BFile sAudioStreamLogFile( 45 "/boot/home/Desktop/AVCodecDebugAudioStream.raw", 46 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 47 static BFile sVideoStreamLogFile( 48 "/boot/home/Desktop/AVCodecDebugVideoStream.raw", 49 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 50 static int sDumpedPackets = 0; 51 #endif 52 53 54 #if LIBAVCODEC_VERSION_INT > ((54 << 16) | (50 << 8)) 55 typedef AVCodecID CodecID; 56 #endif 57 #if LIBAVCODEC_VERSION_INT < ((55 << 16) | (45 << 8)) 58 #define av_frame_alloc avcodec_alloc_frame 59 #define av_frame_unref avcodec_get_frame_defaults 60 #define av_frame_free avcodec_free_frame 61 #endif 62 63 64 struct wave_format_ex { 65 uint16 format_tag; 66 uint16 channels; 67 uint32 frames_per_sec; 68 uint32 avg_bytes_per_sec; 69 uint16 block_align; 70 uint16 bits_per_sample; 71 uint16 extra_size; 72 // extra_data[extra_size] 73 } _PACKED; 74 75 struct avformat_codec_context { 76 int sample_rate; 77 int channels; 78 }; 79 80 81 // profiling related globals 82 #define DO_PROFILING 0 83 84 static bigtime_t decodingTime = 0; 85 static bigtime_t conversionTime = 0; 86 static long profileCounter = 0; 87 88 89 AVCodecDecoder::AVCodecDecoder() 90 : 91 fHeader(), 92 fInputFormat(), 93 fFrame(0), 94 fIsAudio(false), 95 fCodec(NULL), 96 fContext(avcodec_alloc_context3(NULL)), 97 fResampleContext(NULL), 98 fDecodedData(NULL), 99 fDecodedDataSizeInBytes(0), 100 fPostProcessedDecodedPicture(av_frame_alloc()), 101 fRawDecodedPicture(av_frame_alloc()), 102 fRawDecodedAudio(av_frame_alloc()), 103 104 fCodecInitDone(false), 105 106 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 107 fSwsContext(NULL), 108 #else 109 fFormatConversionFunc(NULL), 110 #endif 111 112 fExtraData(NULL), 113 fExtraDataSize(0), 114 fBlockAlign(0), 115 116 fOutputColorSpace(B_NO_COLOR_SPACE), 117 fOutputFrameCount(0), 118 fOutputFrameRate(1.0), 119 fOutputFrameSize(0), 120 fInputFrameSize(0), 121 122 fChunkBuffer(NULL), 123 fChunkBufferSize(0), 124 fAudioDecodeError(false), 125 126 fDecodedDataBuffer(av_frame_alloc()), 127 fDecodedDataBufferOffset(0), 128 fDecodedDataBufferSize(0) 129 { 130 TRACE("AVCodecDecoder::AVCodecDecoder()\n"); 131 132 system_info info; 133 get_system_info(&info); 134 135 fContext->err_recognition = AV_EF_CAREFUL; 136 fContext->error_concealment = 3; 137 fContext->thread_count = info.cpu_count; 138 } 139 140 141 AVCodecDecoder::~AVCodecDecoder() 142 { 143 TRACE("[%c] AVCodecDecoder::~AVCodecDecoder()\n", fIsAudio?('a'):('v')); 144 145 #if DO_PROFILING 146 if (profileCounter > 0) { 147 printf("[%c] profile: d1 = %lld, d2 = %lld (%Ld)\n", 148 fIsAudio?('a'):('v'), decodingTime / profileCounter, 149 conversionTime / profileCounter, fFrame); 150 } 151 #endif 152 153 if (fCodecInitDone) 154 avcodec_close(fContext); 155 156 swr_free(&fResampleContext); 157 free(fChunkBuffer); 158 free(fDecodedData); 159 160 av_free(fPostProcessedDecodedPicture); 161 av_free(fRawDecodedPicture); 162 av_free(fRawDecodedAudio->opaque); 163 av_free(fRawDecodedAudio); 164 av_free(fContext); 165 av_free(fDecodedDataBuffer); 166 167 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 168 if (fSwsContext != NULL) 169 sws_freeContext(fSwsContext); 170 #endif 171 172 delete[] fExtraData; 173 } 174 175 176 void 177 AVCodecDecoder::GetCodecInfo(media_codec_info* mci) 178 { 179 snprintf(mci->short_name, 32, "%s", fCodec->name); 180 snprintf(mci->pretty_name, 96, "%s", fCodec->long_name); 181 mci->id = 0; 182 mci->sub_id = fCodec->id; 183 } 184 185 186 status_t 187 AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer, 188 size_t infoSize) 189 { 190 if (ioEncodedFormat->type != B_MEDIA_ENCODED_AUDIO 191 && ioEncodedFormat->type != B_MEDIA_ENCODED_VIDEO) 192 return B_ERROR; 193 194 fIsAudio = (ioEncodedFormat->type == B_MEDIA_ENCODED_AUDIO); 195 TRACE("[%c] AVCodecDecoder::Setup()\n", fIsAudio?('a'):('v')); 196 197 #ifdef TRACE_AV_CODEC 198 char buffer[1024]; 199 string_for_format(*ioEncodedFormat, buffer, sizeof(buffer)); 200 TRACE("[%c] input_format = %s\n", fIsAudio?('a'):('v'), buffer); 201 TRACE("[%c] infoSize = %ld\n", fIsAudio?('a'):('v'), infoSize); 202 TRACE("[%c] user_data_type = %08lx\n", fIsAudio?('a'):('v'), 203 ioEncodedFormat->user_data_type); 204 TRACE("[%c] meta_data_size = %ld\n", fIsAudio?('a'):('v'), 205 ioEncodedFormat->MetaDataSize()); 206 #endif 207 208 media_format_description description; 209 if (BMediaFormats().GetCodeFor(*ioEncodedFormat, 210 B_MISC_FORMAT_FAMILY, &description) == B_OK) { 211 if (description.u.misc.file_format != 'ffmp') 212 return B_NOT_SUPPORTED; 213 fCodec = avcodec_find_decoder(static_cast<CodecID>( 214 description.u.misc.codec)); 215 if (fCodec == NULL) { 216 TRACE(" unable to find the correct FFmpeg " 217 "decoder (id = %lu)\n", description.u.misc.codec); 218 return B_ERROR; 219 } 220 TRACE(" found decoder %s\n", fCodec->name); 221 222 const void* extraData = infoBuffer; 223 fExtraDataSize = infoSize; 224 if (description.family == B_WAV_FORMAT_FAMILY 225 && infoSize >= sizeof(wave_format_ex)) { 226 TRACE(" trying to use wave_format_ex\n"); 227 // Special case extra data in B_WAV_FORMAT_FAMILY 228 const wave_format_ex* waveFormatData 229 = (const wave_format_ex*)infoBuffer; 230 231 size_t waveFormatSize = infoSize; 232 if (waveFormatData != NULL && waveFormatSize > 0) { 233 fBlockAlign = waveFormatData->block_align; 234 TRACE(" found block align: %d\n", fBlockAlign); 235 fExtraDataSize = waveFormatData->extra_size; 236 // skip the wave_format_ex from the extra data. 237 extraData = waveFormatData + 1; 238 } 239 } else { 240 if (fIsAudio) { 241 fBlockAlign 242 = ioEncodedFormat->u.encoded_audio.output.buffer_size; 243 TRACE(" using buffer_size as block align: %d\n", 244 fBlockAlign); 245 } 246 } 247 if (extraData != NULL && fExtraDataSize > 0) { 248 TRACE("AVCodecDecoder: extra data size %ld\n", infoSize); 249 delete[] fExtraData; 250 fExtraData = new(std::nothrow) char[fExtraDataSize]; 251 if (fExtraData != NULL) 252 memcpy(fExtraData, infoBuffer, fExtraDataSize); 253 else 254 fExtraDataSize = 0; 255 } 256 257 fInputFormat = *ioEncodedFormat; 258 return B_OK; 259 } else { 260 TRACE("AVCodecDecoder: BMediaFormats().GetCodeFor() failed.\n"); 261 } 262 263 printf("AVCodecDecoder::Setup failed!\n"); 264 return B_ERROR; 265 } 266 267 268 status_t 269 AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time) 270 { 271 status_t ret = B_OK; 272 // Reset the FFmpeg codec to flush buffers, so we keep the sync 273 if (fCodecInitDone) { 274 avcodec_flush_buffers(fContext); 275 _ResetTempPacket(); 276 } 277 278 // Flush internal buffers as well. 279 free(fChunkBuffer); 280 fChunkBuffer = NULL; 281 fChunkBufferSize = 0; 282 fDecodedDataBufferOffset = 0; 283 fDecodedDataBufferSize = 0; 284 fDecodedDataSizeInBytes = 0; 285 286 fFrame = frame; 287 288 return ret; 289 } 290 291 292 status_t 293 AVCodecDecoder::NegotiateOutputFormat(media_format* inOutFormat) 294 { 295 TRACE("AVCodecDecoder::NegotiateOutputFormat() [%c] \n", 296 fIsAudio?('a'):('v')); 297 298 #ifdef TRACE_AV_CODEC 299 char buffer[1024]; 300 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 301 TRACE(" [%c] requested format = %s\n", fIsAudio?('a'):('v'), buffer); 302 #endif 303 304 if (fIsAudio) 305 return _NegotiateAudioOutputFormat(inOutFormat); 306 else 307 return _NegotiateVideoOutputFormat(inOutFormat); 308 } 309 310 311 status_t 312 AVCodecDecoder::Decode(void* outBuffer, int64* outFrameCount, 313 media_header* mediaHeader, media_decode_info* info) 314 { 315 if (!fCodecInitDone) 316 return B_NO_INIT; 317 318 status_t ret; 319 if (fIsAudio) 320 ret = _DecodeAudio(outBuffer, outFrameCount, mediaHeader, info); 321 else 322 ret = _DecodeVideo(outBuffer, outFrameCount, mediaHeader, info); 323 324 return ret; 325 } 326 327 328 // #pragma mark - 329 330 331 void 332 AVCodecDecoder::_ResetTempPacket() 333 { 334 av_init_packet(&fTempPacket); 335 fTempPacket.size = 0; 336 fTempPacket.data = NULL; 337 } 338 339 340 status_t 341 AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat) 342 { 343 TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n"); 344 345 _ApplyEssentialAudioContainerPropertiesToContext(); 346 // This makes audio formats play that encode the audio properties in 347 // the audio container (e.g. WMA) and not in the audio frames 348 // themself (e.g. MP3). 349 // Note: Doing this step unconditionally is OK, because the first call 350 // to _DecodeNextAudioFrameChunk() will update the essential audio 351 // format properties accordingly regardless of the settings here. 352 353 // close any previous instance 354 if (fCodecInitDone) { 355 fCodecInitDone = false; 356 avcodec_close(fContext); 357 } 358 359 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 360 fCodecInitDone = true; 361 else { 362 TRACE("avcodec_open() failed to init codec!\n"); 363 return B_ERROR; 364 } 365 366 free(fChunkBuffer); 367 fChunkBuffer = NULL; 368 fChunkBufferSize = 0; 369 fAudioDecodeError = false; 370 fDecodedDataBufferOffset = 0; 371 fDecodedDataBufferSize = 0; 372 373 _ResetTempPacket(); 374 375 status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk(); 376 if (statusOfDecodingFirstFrameChunk != B_OK) { 377 TRACE("[a] decoding first audio frame chunk failed\n"); 378 return B_ERROR; 379 } 380 381 media_multi_audio_format outputAudioFormat; 382 outputAudioFormat = media_raw_audio_format::wildcard; 383 outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN; 384 outputAudioFormat.frame_rate = fContext->sample_rate; 385 outputAudioFormat.channel_count = fContext->channels; 386 ConvertAVSampleFormatToRawAudioFormat(fContext->sample_fmt, 387 outputAudioFormat.format); 388 // Check that format is not still a wild card! 389 if (outputAudioFormat.format == 0) { 390 TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n"); 391 outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT; 392 } 393 outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size; 394 // Check that buffer_size has a sane value 395 size_t sampleSize = outputAudioFormat.format 396 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 397 if (outputAudioFormat.buffer_size == 0) { 398 outputAudioFormat.buffer_size = 512 * sampleSize 399 * outputAudioFormat.channel_count; 400 } 401 402 inOutFormat->type = B_MEDIA_RAW_AUDIO; 403 inOutFormat->u.raw_audio = outputAudioFormat; 404 inOutFormat->require_flags = 0; 405 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 406 407 // Initialize variables needed to manage decoding as much audio frames as 408 // needed to fill the buffer_size. 409 fOutputFrameSize = sampleSize * outputAudioFormat.channel_count; 410 fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize; 411 fOutputFrameRate = outputAudioFormat.frame_rate; 412 if (av_sample_fmt_is_planar(fContext->sample_fmt)) 413 fInputFrameSize = sampleSize; 414 else 415 fInputFrameSize = fOutputFrameSize; 416 417 fRawDecodedAudio->opaque 418 = av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context)); 419 if (fRawDecodedAudio->opaque == NULL) 420 return B_NO_MEMORY; 421 422 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 423 fResampleContext = swr_alloc_set_opts(NULL, 424 fContext->channel_layout, fContext->request_sample_fmt, 425 fContext->sample_rate, 426 fContext->channel_layout, fContext->sample_fmt, fContext->sample_rate, 427 0, NULL); 428 swr_init(fResampleContext); 429 } 430 431 TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, " 432 "output frame size: %d, count: %ld, rate: %.2f\n", 433 fContext->bit_rate, fContext->sample_rate, fContext->channels, 434 fOutputFrameSize, fOutputFrameCount, fOutputFrameRate); 435 436 return B_OK; 437 } 438 439 440 status_t 441 AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat) 442 { 443 TRACE("AVCodecDecoder::_NegotiateVideoOutputFormat()\n"); 444 445 TRACE(" requested video format 0x%x\n", 446 inOutFormat->u.raw_video.display.format); 447 448 _ApplyEssentialVideoContainerPropertiesToContext(); 449 // This makes video formats play that encode the video properties in 450 // the video container (e.g. WMV) and not in the video frames 451 // themself (e.g. MPEG2). 452 // Note: Doing this step unconditionally is OK, because the first call 453 // to _DecodeNextVideoFrame() will update the essential video format 454 // properties accordingly regardless of the settings here. 455 456 bool codecCanHandleIncompleteFrames 457 = (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0; 458 if (codecCanHandleIncompleteFrames) { 459 // Expect and handle video frames to be splitted across consecutive 460 // data chunks. 461 fContext->flags |= CODEC_FLAG_TRUNCATED; 462 } 463 464 // close any previous instance 465 if (fCodecInitDone) { 466 fCodecInitDone = false; 467 avcodec_close(fContext); 468 } 469 470 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 471 fCodecInitDone = true; 472 else { 473 TRACE("avcodec_open() failed to init codec!\n"); 474 return B_ERROR; 475 } 476 477 // Make MediaPlayer happy (if not in rgb32 screen depth and no overlay, 478 // it will only ask for YCbCr, which DrawBitmap doesn't handle, so the 479 // default colordepth is RGB32). 480 if (inOutFormat->u.raw_video.display.format == B_YCbCr422) 481 fOutputColorSpace = B_YCbCr422; 482 else 483 fOutputColorSpace = B_RGB32; 484 485 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 486 if (fSwsContext != NULL) 487 sws_freeContext(fSwsContext); 488 fSwsContext = NULL; 489 #else 490 fFormatConversionFunc = 0; 491 #endif 492 493 free(fChunkBuffer); 494 fChunkBuffer = NULL; 495 fChunkBufferSize = 0; 496 497 _ResetTempPacket(); 498 499 status_t statusOfDecodingFirstFrame = _DecodeNextVideoFrame(); 500 if (statusOfDecodingFirstFrame != B_OK) { 501 TRACE("[v] decoding first video frame failed\n"); 502 return B_ERROR; 503 } 504 505 // Note: fSwsContext / fFormatConversionFunc should have been initialized 506 // by first call to _DecodeNextVideoFrame() above. 507 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 508 if (fSwsContext == NULL) { 509 TRACE("No SWS Scale context or decoder has not set the pixel format " 510 "yet!\n"); 511 } 512 #else 513 if (fFormatConversionFunc == NULL) { 514 TRACE("no pixel format conversion function found or decoder has " 515 "not set the pixel format yet!\n"); 516 } 517 #endif 518 519 inOutFormat->type = B_MEDIA_RAW_VIDEO; 520 inOutFormat->require_flags = 0; 521 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 522 inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output; 523 inOutFormat->u.raw_video.interlace = 1; 524 // Progressive (non-interlaced) video frames are delivered 525 inOutFormat->u.raw_video.first_active 526 = fHeader.u.raw_video.first_active_line; 527 inOutFormat->u.raw_video.last_active = fHeader.u.raw_video.line_count; 528 inOutFormat->u.raw_video.pixel_width_aspect 529 = fHeader.u.raw_video.pixel_width_aspect; 530 inOutFormat->u.raw_video.pixel_height_aspect 531 = fHeader.u.raw_video.pixel_height_aspect; 532 #if 0 533 // This was added by Colin Günther in order to handle streams with a 534 // variable frame rate. fOutputFrameRate is computed from the stream 535 // time_base, but it actually assumes a timebase equal to the FPS. As far 536 // as I can see, a stream with a variable frame rate would have a higher 537 // resolution time_base and increment the pts (presentation time) of each 538 // frame by a value bigger than one. 539 // 540 // Fixed rate stream: 541 // time_base = 1/50s, frame PTS = 1, 2, 3... (for 50Hz) 542 // 543 // Variable rate stream: 544 // time_base = 1/300s, frame PTS = 6, 12, 18, ... (for 50Hz) 545 // time_base = 1/300s, frame PTS = 5, 10, 15, ... (for 60Hz) 546 // 547 // The fOutputFrameRate currently does not take this into account and 548 // ignores the PTS. This results in playing the above sample at 300Hz 549 // instead of 50 or 60. 550 // 551 // However, comparing the PTS for two consecutive implies we have already 552 // decoded 2 frames, which may not be the case when this method is first 553 // called. 554 inOutFormat->u.raw_video.field_rate = fOutputFrameRate; 555 // Was calculated by first call to _DecodeNextVideoFrame() 556 #endif 557 inOutFormat->u.raw_video.display.format = fOutputColorSpace; 558 inOutFormat->u.raw_video.display.line_width 559 = fHeader.u.raw_video.display_line_width; 560 inOutFormat->u.raw_video.display.line_count 561 = fHeader.u.raw_video.display_line_count; 562 inOutFormat->u.raw_video.display.bytes_per_row 563 = fHeader.u.raw_video.bytes_per_row; 564 565 #ifdef TRACE_AV_CODEC 566 char buffer[1024]; 567 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 568 TRACE("[v] outFormat = %s\n", buffer); 569 TRACE(" returned video format 0x%x\n", 570 inOutFormat->u.raw_video.display.format); 571 #endif 572 573 return B_OK; 574 } 575 576 577 /*! \brief Fills the outBuffer with one or more already decoded audio frames. 578 579 Besides the main duty described above, this method also fills out the other 580 output parameters as documented below. 581 582 \param outBuffer Pointer to the output buffer to copy the decoded audio 583 frames to. 584 \param outFrameCount Pointer to the output variable to assign the number of 585 copied audio frames (usually several audio frames at once). 586 \param mediaHeader Pointer to the output media header that contains the 587 properties of the decoded audio frame being the first in the outBuffer. 588 \param info Specifies additional decoding parameters. (Note: unused). 589 590 \returns B_OK Decoding audio frames succeeded. 591 \returns B_LAST_BUFFER_ERROR There are no more audio frames available. 592 \returns Other error codes 593 */ 594 status_t 595 AVCodecDecoder::_DecodeAudio(void* outBuffer, int64* outFrameCount, 596 media_header* mediaHeader, media_decode_info* info) 597 { 598 TRACE_AUDIO("AVCodecDecoder::_DecodeAudio(audio start_time %.6fs)\n", 599 mediaHeader->start_time / 1000000.0); 600 601 status_t audioDecodingStatus 602 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextAudioFrame(); 603 604 if (audioDecodingStatus != B_OK) 605 return audioDecodingStatus; 606 607 *outFrameCount = fDecodedDataSizeInBytes / fOutputFrameSize; 608 *mediaHeader = fHeader; 609 memcpy(outBuffer, fDecodedData, fDecodedDataSizeInBytes); 610 611 fDecodedDataSizeInBytes = 0; 612 613 return B_OK; 614 } 615 616 617 /*! \brief Fills the outBuffer with an already decoded video frame. 618 619 Besides the main duty described above, this method also fills out the other 620 output parameters as documented below. 621 622 \param outBuffer Pointer to the output buffer to copy the decoded video 623 frame to. 624 \param outFrameCount Pointer to the output variable to assign the number of 625 copied video frames (usually one video frame). 626 \param mediaHeader Pointer to the output media header that contains the 627 decoded video frame properties. 628 \param info Specifies additional decoding parameters. (Note: unused). 629 630 \returns B_OK Decoding a video frame succeeded. 631 \returns B_LAST_BUFFER_ERROR There are no more video frames available. 632 \returns Other error codes 633 */ 634 status_t 635 AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount, 636 media_header* mediaHeader, media_decode_info* info) 637 { 638 status_t videoDecodingStatus 639 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextVideoFrame(); 640 641 if (videoDecodingStatus != B_OK) 642 return videoDecodingStatus; 643 644 *outFrameCount = 1; 645 *mediaHeader = fHeader; 646 memcpy(outBuffer, fDecodedData, mediaHeader->size_used); 647 648 fDecodedDataSizeInBytes = 0; 649 650 return B_OK; 651 } 652 653 654 /*! \brief Decodes next audio frame. 655 656 We decode at least one audio frame into fDecodedData. To achieve this goal, 657 we might need to request several chunks of encoded data resulting in a 658 variable execution time of this function. 659 660 The length of the decoded audio frame(s) is stored in 661 fDecodedDataSizeInBytes. If this variable is greater than zero you can 662 assert that all audio frames in fDecodedData are valid. 663 664 It is assumed that the number of expected audio frames is stored in 665 fOutputFrameCount. So _DecodeNextAudioFrame() must be called only after 666 fOutputFrameCount has been set. 667 668 Note: fOutputFrameCount contains the maximum number of frames a caller 669 of BMediaDecoder::Decode() expects to receive. There is a direct 670 relationship between fOutputFrameCount and the buffer size a caller of 671 BMediaDecoder::Decode() will provide so we make sure to respect this limit 672 for fDecodedDataSizeInBytes. 673 674 On return with status code B_OK the following conditions hold true: 675 1. fDecodedData contains as much audio frames as the caller of 676 BMediaDecoder::Decode() expects. 677 2. fDecodedData contains lesser audio frames as the caller of 678 BMediaDecoder::Decode() expects only when one of the following 679 conditions hold true: 680 i No more audio frames left. Consecutive calls to 681 _DecodeNextAudioFrame() will then result in the return of 682 status code B_LAST_BUFFER_ERROR. 683 ii TODO: A change in the size of the audio frames. 684 3. fHeader is populated with the audio frame properties of the first 685 audio frame in fDecodedData. Especially the start_time field of 686 fHeader relates to that first audio frame. Start times of 687 consecutive audio frames in fDecodedData have to be calculated 688 manually (using the frame rate and the frame duration) if the 689 caller needs them. 690 691 TODO: Handle change of channel_count. Such a change results in a change of 692 the audio frame size and thus has different buffer requirements. 693 The most sane approach for implementing this is to return the audio frames 694 that were still decoded with the previous channel_count and inform the 695 client of BMediaDecoder::Decode() about the change so that it can adapt to 696 it. Furthermore we need to adapt our fDecodedData to the new buffer size 697 requirements accordingly. 698 699 \returns B_OK when we successfully decoded enough audio frames 700 \returns B_LAST_BUFFER_ERROR when there are no more audio frames available. 701 \returns Other Errors 702 */ 703 status_t 704 AVCodecDecoder::_DecodeNextAudioFrame() 705 { 706 assert(fTempPacket.size >= 0); 707 assert(fDecodedDataSizeInBytes == 0); 708 // _DecodeNextAudioFrame needs to be called on empty fDecodedData only! 709 // If this assert holds wrong we have a bug somewhere. 710 711 status_t resetStatus = _ResetRawDecodedAudio(); 712 if (resetStatus != B_OK) 713 return resetStatus; 714 715 while (fRawDecodedAudio->nb_samples < fOutputFrameCount) { 716 _CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow(); 717 718 bool decodedDataBufferHasData = fDecodedDataBufferSize > 0; 719 if (decodedDataBufferHasData) { 720 _MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes(); 721 continue; 722 } 723 724 status_t decodeAudioChunkStatus = _DecodeNextAudioFrameChunk(); 725 if (decodeAudioChunkStatus != B_OK) 726 return decodeAudioChunkStatus; 727 } 728 729 fFrame += fRawDecodedAudio->nb_samples; 730 fDecodedDataSizeInBytes = fRawDecodedAudio->linesize[0]; 731 732 _UpdateMediaHeaderForAudioFrame(); 733 734 #ifdef DEBUG 735 dump_ffframe_audio(fRawDecodedAudio, "ffaudi"); 736 #endif 737 738 TRACE_AUDIO(" frame count: %ld current: %lld\n", 739 fRawDecodedAudio->nb_samples, fFrame); 740 741 return B_OK; 742 } 743 744 745 /*! \brief Applies all essential audio input properties to fContext that were 746 passed to AVCodecDecoder when Setup() was called. 747 748 Note: This function must be called before the AVCodec is opened via 749 avcodec_open2(). Otherwise the behaviour of FFMPEG's audio decoding 750 function avcodec_decode_audio4() is undefined. 751 752 Essential properties applied from fInputFormat.u.encoded_audio: 753 - bit_rate copied to fContext->bit_rate 754 - frame_size copied to fContext->frame_size 755 - output.format converted to fContext->sample_fmt 756 - output.frame_rate copied to fContext->sample_rate 757 - output.channel_count copied to fContext->channels 758 759 Other essential properties being applied: 760 - fBlockAlign to fContext->block_align 761 - fExtraData to fContext->extradata 762 - fExtraDataSize to fContext->extradata_size 763 764 TODO: Either the following documentation section should be removed or this 765 TODO when it is clear whether fInputFormat.MetaData() and 766 fInputFormat.MetaDataSize() have to be applied to fContext. See the related 767 TODO in the method implementation. 768 Only applied when fInputFormat.MetaDataSize() is greater than zero: 769 - fInputFormat.MetaData() to fContext->extradata 770 - fInputFormat.MetaDataSize() to fContext->extradata_size 771 */ 772 void 773 AVCodecDecoder::_ApplyEssentialAudioContainerPropertiesToContext() 774 { 775 media_encoded_audio_format containerProperties 776 = fInputFormat.u.encoded_audio; 777 778 fContext->bit_rate 779 = static_cast<int>(containerProperties.bit_rate); 780 fContext->frame_size 781 = static_cast<int>(containerProperties.frame_size); 782 ConvertRawAudioFormatToAVSampleFormat( 783 containerProperties.output.format, fContext->sample_fmt); 784 #if LIBAVCODEC_VERSION_INT > ((52 << 16) | (114 << 8)) 785 ConvertRawAudioFormatToAVSampleFormat( 786 containerProperties.output.format, fContext->request_sample_fmt); 787 #endif 788 fContext->sample_rate 789 = static_cast<int>(containerProperties.output.frame_rate); 790 fContext->channels 791 = static_cast<int>(containerProperties.output.channel_count); 792 // Check that channel count is not still a wild card! 793 if (fContext->channels == 0) { 794 TRACE(" channel_count still a wild-card, assuming stereo.\n"); 795 fContext->channels = 2; 796 } 797 798 fContext->block_align = fBlockAlign; 799 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 800 fContext->extradata_size = fExtraDataSize; 801 802 // TODO: This probably needs to go away, there is some misconception 803 // about extra data / info buffer and meta data. See 804 // Reader::GetStreamInfo(). The AVFormatReader puts extradata and 805 // extradata_size into media_format::MetaData(), but used to ignore 806 // the infoBuffer passed to GetStreamInfo(). I think this may be why 807 // the code below was added. 808 if (fInputFormat.MetaDataSize() > 0) { 809 fContext->extradata = static_cast<uint8_t*>( 810 const_cast<void*>(fInputFormat.MetaData())); 811 fContext->extradata_size = fInputFormat.MetaDataSize(); 812 } 813 814 TRACE(" bit_rate %d, sample_rate %d, channels %d, block_align %d, " 815 "extradata_size %d\n", fContext->bit_rate, fContext->sample_rate, 816 fContext->channels, fContext->block_align, fContext->extradata_size); 817 } 818 819 820 /*! \brief Resets important fields in fRawDecodedVideo to their default values. 821 822 Note: Also initializes fDecodedData if not done already. 823 824 \returns B_OK Resetting successfully completed. 825 \returns B_NO_MEMORY No memory left for correct operation. 826 */ 827 status_t 828 AVCodecDecoder::_ResetRawDecodedAudio() 829 { 830 if (fDecodedData == NULL) { 831 size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize; 832 fDecodedData 833 = static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData)); 834 } 835 if (fDecodedData == NULL) 836 return B_NO_MEMORY; 837 838 fRawDecodedAudio->data[0] = fDecodedData; 839 fRawDecodedAudio->linesize[0] = 0; 840 fRawDecodedAudio->format = AV_SAMPLE_FMT_NONE; 841 fRawDecodedAudio->pkt_dts = AV_NOPTS_VALUE; 842 fRawDecodedAudio->nb_samples = 0; 843 memset(fRawDecodedAudio->opaque, 0, sizeof(avformat_codec_context)); 844 845 return B_OK; 846 } 847 848 849 /*! \brief Checks fDecodedDataBufferSize and fTempPacket for invalid values, 850 reports them and assigns valid values. 851 852 Note: This method is intended to be called before any code is executed that 853 deals with moving, loading or decoding any audio frames. 854 */ 855 void 856 AVCodecDecoder::_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow() 857 { 858 if (fDecodedDataBufferSize < 0) { 859 fprintf(stderr, "Decoding read past the end of the decoded data " 860 "buffer! %" B_PRId32 "\n", fDecodedDataBufferSize); 861 fDecodedDataBufferSize = 0; 862 } 863 if (fTempPacket.size < 0) { 864 fprintf(stderr, "Decoding read past the end of the temp packet! %d\n", 865 fTempPacket.size); 866 fTempPacket.size = 0; 867 } 868 } 869 870 871 /*! \brief Moves audio frames from fDecodedDataBuffer to fRawDecodedAudio (and 872 thus to fDecodedData) and updates the start times of fRawDecodedAudio, 873 fDecodedDataBuffer and fTempPacket accordingly. 874 875 When moving audio frames to fRawDecodedAudio this method also makes sure 876 that the following important fields of fRawDecodedAudio are populated and 877 updated with correct values: 878 - fRawDecodedAudio->data[0]: Points to first free byte of fDecodedData 879 - fRawDecodedAudio->linesize[0]: Total size of frames in fDecodedData 880 - fRawDecodedAudio->format: Format of first audio frame 881 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 882 - fRawDecodedAudio->nb_samples: Number of audio frames 883 - fRawDecodedAudio->opaque: Contains the following fields for the first 884 audio frame: 885 - channels: Channel count of first audio frame 886 - sample_rate: Frame rate of first audio frame 887 888 This function assumes to be called only when the following assumptions 889 hold true: 890 1. There are decoded audio frames available in fDecodedDataBuffer 891 meaning that fDecodedDataBufferSize is greater than zero. 892 2. There is space left in fRawDecodedAudio to move some audio frames 893 in. This means that fRawDecodedAudio has lesser audio frames than 894 the maximum allowed (specified by fOutputFrameCount). 895 3. The audio frame rate is known so that we can calculate the time 896 range (covered by the moved audio frames) to update the start times 897 accordingly. 898 4. The field fRawDecodedAudio->opaque points to a memory block 899 representing a structure of type avformat_codec_context. 900 901 After this function returns the caller can safely make the following 902 assumptions: 903 1. The number of decoded audio frames in fDecodedDataBuffer is 904 decreased though it may still be greater then zero. 905 2. The number of frames in fRawDecodedAudio has increased and all 906 important fields are updated (see listing above). 907 3. Start times of fDecodedDataBuffer and fTempPacket were increased 908 with the time range covered by the moved audio frames. 909 910 Note: This function raises an exception (by calling the debugger), when 911 fDecodedDataBufferSize is not a multiple of fOutputFrameSize. 912 */ 913 void 914 AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes() 915 { 916 assert(fDecodedDataBufferSize > 0); 917 assert(fRawDecodedAudio->nb_samples < fOutputFrameCount); 918 assert(fOutputFrameRate > 0); 919 920 int32 outFrames = fOutputFrameCount - fRawDecodedAudio->nb_samples; 921 int32 inFrames = fDecodedDataBufferSize; 922 923 int32 frames = min_c(outFrames, inFrames); 924 if (frames == 0) 925 debugger("fDecodedDataBufferSize not multiple of frame size!"); 926 927 // Some decoders do not support format conversion on themselves, or use 928 // "planar" audio (each channel separated instead of interleaved samples). 929 // In that case, we use swresample to convert the data 930 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 931 #if 0 932 const uint8_t* ptr[8]; 933 for (int i = 0; i < 8; i++) { 934 if (fDecodedDataBuffer->data[i] == NULL) 935 ptr[i] = NULL; 936 else 937 ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset; 938 } 939 940 // When there are more input frames than space in the output buffer, 941 // we could feed everything to swr and it would buffer the extra data. 942 // However, there is no easy way to flush that data without feeding more 943 // input, and it makes our timestamp computations fail. 944 // So, we feed only as much frames as we can get out, and handle the 945 // buffering ourselves. 946 // TODO Ideally, we should try to size our output buffer so that it can 947 // always hold all the output (swr provides helper functions for this) 948 inFrames = frames; 949 frames = swr_convert(fResampleContext, fRawDecodedAudio->data, 950 outFrames, ptr, inFrames); 951 952 if (frames < 0) 953 debugger("resampling failed"); 954 #else 955 // interleave planar audio with same format 956 uintptr_t out = (uintptr_t)fRawDecodedAudio->data[0]; 957 int32 offset = fDecodedDataBufferOffset; 958 for (int i = 0; i < frames; i++) { 959 for (int j = 0; j < fContext->channels; j++) { 960 memcpy((void*)out, fDecodedDataBuffer->data[j] 961 + offset, fInputFrameSize); 962 out += fInputFrameSize; 963 } 964 offset += fInputFrameSize; 965 } 966 outFrames = frames; 967 inFrames = frames; 968 #endif 969 } else { 970 memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0] 971 + fDecodedDataBufferOffset, frames * fOutputFrameSize); 972 outFrames = frames; 973 inFrames = frames; 974 } 975 976 size_t remainingSize = inFrames * fInputFrameSize; 977 size_t decodedSize = outFrames * fOutputFrameSize; 978 fDecodedDataBufferSize -= inFrames; 979 980 bool firstAudioFramesCopiedToRawDecodedAudio 981 = fRawDecodedAudio->data[0] != fDecodedData; 982 if (!firstAudioFramesCopiedToRawDecodedAudio) { 983 fRawDecodedAudio->format = fDecodedDataBuffer->format; 984 fRawDecodedAudio->pkt_dts = fDecodedDataBuffer->pkt_dts; 985 986 avformat_codec_context* codecContext 987 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 988 codecContext->channels = fContext->channels; 989 codecContext->sample_rate = fContext->sample_rate; 990 } 991 992 fRawDecodedAudio->data[0] += decodedSize; 993 fRawDecodedAudio->linesize[0] += decodedSize; 994 fRawDecodedAudio->nb_samples += outFrames; 995 996 fDecodedDataBufferOffset += remainingSize; 997 998 // Update start times accordingly 999 bigtime_t framesTimeInterval = static_cast<bigtime_t>( 1000 (1000000LL * frames) / fOutputFrameRate); 1001 fDecodedDataBuffer->pkt_dts += framesTimeInterval; 1002 // Start time of buffer is updated in case that it contains 1003 // more audio frames to move. 1004 fTempPacket.dts += framesTimeInterval; 1005 // Start time of fTempPacket is updated in case the fTempPacket 1006 // contains more audio frames to decode. 1007 } 1008 1009 1010 /*! \brief Decodes next chunk of audio frames. 1011 1012 This method handles all the details of loading the input buffer 1013 (fChunkBuffer) at the right time and of calling FFMPEG often engouh until 1014 some audio frames have been decoded. 1015 1016 FFMPEG decides how much audio frames belong to a chunk. Because of that 1017 it is very likely that _DecodeNextAudioFrameChunk has to be called several 1018 times to decode enough audio frames to please the caller of 1019 BMediaDecoder::Decode(). 1020 1021 This function assumes to be called only when the following assumptions 1022 hold true: 1023 1. fDecodedDataBufferSize equals zero. 1024 1025 After this function returns successfully the caller can safely make the 1026 following assumptions: 1027 1. fDecodedDataBufferSize is greater than zero. 1028 2. fDecodedDataBufferOffset is set to zero. 1029 3. fDecodedDataBuffer contains audio frames. 1030 1031 1032 \returns B_OK on successfully decoding one audio frame chunk. 1033 \returns B_LAST_BUFFER_ERROR No more audio frame chunks available. From 1034 this point on further calls will return this same error. 1035 \returns B_ERROR Decoding failed 1036 */ 1037 status_t 1038 AVCodecDecoder::_DecodeNextAudioFrameChunk() 1039 { 1040 assert(fDecodedDataBufferSize == 0); 1041 1042 while (fDecodedDataBufferSize == 0) { 1043 status_t loadingChunkStatus 1044 = _LoadNextChunkIfNeededAndAssignStartTime(); 1045 if (loadingChunkStatus != B_OK) 1046 return loadingChunkStatus; 1047 1048 status_t decodingStatus 1049 = _DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer(); 1050 if (decodingStatus != B_OK) { 1051 // Assume the audio decoded until now is broken so replace it with 1052 // some silence. 1053 memset(fDecodedData, 0, fRawDecodedAudio->linesize[0]); 1054 1055 if (!fAudioDecodeError) { 1056 // Report failure if not done already 1057 int32 chunkBufferOffset = fTempPacket.data - fChunkBuffer; 1058 printf("########### audio decode error, " 1059 "fTempPacket.size %d, fChunkBuffer data offset %" B_PRId32 1060 "\n", fTempPacket.size, chunkBufferOffset); 1061 fAudioDecodeError = true; 1062 } 1063 1064 // Assume that next audio chunk can be decoded so keep decoding. 1065 continue; 1066 } 1067 1068 fAudioDecodeError = false; 1069 } 1070 1071 return B_OK; 1072 } 1073 1074 1075 /*! \brief Tries to decode at least one audio frame and store it in the 1076 fDecodedDataBuffer. 1077 1078 This function assumes to be called only when the following assumptions 1079 hold true: 1080 1. fDecodedDataBufferSize equals zero. 1081 2. fTempPacket.size is greater than zero. 1082 1083 After this function returns successfully the caller can safely make the 1084 following assumptions: 1085 1. fDecodedDataBufferSize is greater than zero in the common case. 1086 Also see "Note" below. 1087 2. fTempPacket was updated to exclude the data chunk that was consumed 1088 by avcodec_decode_audio4(). 1089 3. fDecodedDataBufferOffset is set to zero. 1090 1091 When this function failed to decode at least one audio frame due to a 1092 decoding error the caller can safely make the following assumptions: 1093 1. fDecodedDataBufferSize equals zero. 1094 2. fTempPacket.size equals zero. 1095 1096 Note: It is possible that there wasn't any audio frame decoded into 1097 fDecodedDataBuffer after calling this function. This is normal and can 1098 happen when there was either a decoding error or there is some decoding 1099 delay in FFMPEGs audio decoder. Another call to this method is totally 1100 safe and is even expected as long as the calling assumptions hold true. 1101 1102 \returns B_OK Decoding successful. fDecodedDataBuffer contains decoded 1103 audio frames only when fDecodedDataBufferSize is greater than zero. 1104 fDecodedDataBuffer is empty, when avcodec_decode_audio4() didn't return 1105 audio frames due to delayed decoding or incomplete audio frames. 1106 \returns B_ERROR Decoding failed thus fDecodedDataBuffer contains no audio 1107 frames. 1108 */ 1109 status_t 1110 AVCodecDecoder::_DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer() 1111 { 1112 assert(fDecodedDataBufferSize == 0); 1113 1114 memset(fDecodedDataBuffer, 0, sizeof(AVFrame)); 1115 av_frame_unref(fDecodedDataBuffer); 1116 fDecodedDataBufferOffset = 0; 1117 int gotAudioFrame = 0; 1118 1119 int encodedDataSizeInBytes = avcodec_decode_audio4(fContext, 1120 fDecodedDataBuffer, &gotAudioFrame, &fTempPacket); 1121 if (encodedDataSizeInBytes <= 0) { 1122 // Error or failure to produce decompressed output. 1123 // Skip the temp packet data entirely. 1124 fTempPacket.size = 0; 1125 return B_ERROR; 1126 } 1127 1128 fTempPacket.data += encodedDataSizeInBytes; 1129 fTempPacket.size -= encodedDataSizeInBytes; 1130 1131 bool gotNoAudioFrame = gotAudioFrame == 0; 1132 if (gotNoAudioFrame) 1133 return B_OK; 1134 1135 fDecodedDataBufferSize = fDecodedDataBuffer->nb_samples; 1136 if (fDecodedDataBufferSize < 0) 1137 fDecodedDataBufferSize = 0; 1138 1139 return B_OK; 1140 } 1141 1142 1143 /*! \brief Updates relevant fields of the class member fHeader with the 1144 properties of the most recently decoded audio frame. 1145 1146 The following fields of fHeader are updated: 1147 - fHeader.type 1148 - fHeader.file_pos 1149 - fHeader.orig_size 1150 - fHeader.start_time 1151 - fHeader.size_used 1152 - fHeader.u.raw_audio.frame_rate 1153 - fHeader.u.raw_audio.channel_count 1154 1155 It is assumed that this function is called only when the following asserts 1156 hold true: 1157 1. We actually got a new audio frame decoded by the audio decoder. 1158 2. fHeader wasn't updated for the new audio frame yet. You MUST call 1159 this method only once per decoded audio frame. 1160 3. fRawDecodedAudio's fields relate to the first audio frame contained 1161 in fDecodedData. Especially the following fields are of importance: 1162 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 1163 - fRawDecodedAudio->opaque: Contains the following fields for 1164 the first audio frame: 1165 - channels: Channel count of first audio frame 1166 - sample_rate: Frame rate of first audio frame 1167 */ 1168 void 1169 AVCodecDecoder::_UpdateMediaHeaderForAudioFrame() 1170 { 1171 fHeader.type = B_MEDIA_RAW_AUDIO; 1172 fHeader.file_pos = 0; 1173 fHeader.orig_size = 0; 1174 fHeader.start_time = fRawDecodedAudio->pkt_dts; 1175 fHeader.size_used = fRawDecodedAudio->linesize[0]; 1176 1177 avformat_codec_context* codecContext 1178 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 1179 fHeader.u.raw_audio.channel_count = codecContext->channels; 1180 fHeader.u.raw_audio.frame_rate = codecContext->sample_rate; 1181 } 1182 1183 1184 /*! \brief Decodes next video frame. 1185 1186 We decode exactly one video frame into fDecodedData. To achieve this goal, 1187 we might need to request several chunks of encoded data resulting in a 1188 variable execution time of this function. 1189 1190 The length of the decoded video frame is stored in 1191 fDecodedDataSizeInBytes. If this variable is greater than zero, you can 1192 assert that there is a valid video frame available in fDecodedData. 1193 1194 The decoded video frame in fDecodedData has color space conversion and 1195 deinterlacing already applied. 1196 1197 To every decoded video frame there is a media_header populated in 1198 fHeader, containing the corresponding video frame properties. 1199 1200 Normally every decoded video frame has a start_time field populated in the 1201 associated fHeader, that determines the presentation time of the frame. 1202 This relationship will only hold true, when each data chunk that is 1203 provided via GetNextChunk() contains data for exactly one encoded video 1204 frame (one complete frame) - not more and not less. 1205 1206 We can decode data chunks that contain partial video frame data, too. In 1207 that case, you cannot trust the value of the start_time field in fHeader. 1208 We simply have no logic in place to establish a meaningful relationship 1209 between an incomplete frame and the start time it should be presented. 1210 Though this might change in the future. 1211 1212 We can decode data chunks that contain more than one video frame, too. In 1213 that case, you cannot trust the value of the start_time field in fHeader. 1214 We simply have no logic in place to track the start_time across multiple 1215 video frames. So a meaningful relationship between the 2nd, 3rd, ... frame 1216 and the start time it should be presented isn't established at the moment. 1217 Though this might change in the future. 1218 1219 More over the fOutputFrameRate variable is updated for every decoded video 1220 frame. 1221 1222 On first call the member variables fSwsContext / fFormatConversionFunc are 1223 initialized. 1224 1225 \returns B_OK when we successfully decoded one video frame 1226 \returns B_LAST_BUFFER_ERROR when there are no more video frames available. 1227 \returns B_NO_MEMORY when we have no memory left for correct operation. 1228 \returns Other Errors 1229 */ 1230 status_t 1231 AVCodecDecoder::_DecodeNextVideoFrame() 1232 { 1233 while (true) { 1234 status_t loadingChunkStatus 1235 = _LoadNextChunkIfNeededAndAssignStartTime(); 1236 if (loadingChunkStatus == B_LAST_BUFFER_ERROR) 1237 return _FlushOneVideoFrameFromDecoderBuffer(); 1238 if (loadingChunkStatus != B_OK) { 1239 TRACE("AVCodecDecoder::_DecodeNextVideoFrame(): error from " 1240 "GetNextChunk(): %s\n", strerror(loadingChunkStatus)); 1241 return loadingChunkStatus; 1242 } 1243 1244 #if DO_PROFILING 1245 bigtime_t startTime = system_time(); 1246 #endif 1247 1248 // NOTE: In the FFMPEG 0.10.2 code example decoding_encoding.c, the 1249 // length returned by avcodec_decode_video2() is used to update the 1250 // packet buffer size (here it is fTempPacket.size). This way the 1251 // packet buffer is allowed to contain incomplete frames so we are 1252 // required to buffer the packets between different calls to 1253 // _DecodeNextVideoFrame(). 1254 int gotVideoFrame = 0; 1255 int encodedDataSizeInBytes = avcodec_decode_video2(fContext, 1256 fRawDecodedPicture, &gotVideoFrame, &fTempPacket); 1257 if (encodedDataSizeInBytes < 0) { 1258 TRACE("[v] AVCodecDecoder: ignoring error in decoding frame %lld:" 1259 " %d\n", fFrame, encodedDataSizeInBytes); 1260 // NOTE: An error from avcodec_decode_video2() is ignored by the 1261 // FFMPEG 0.10.2 example decoding_encoding.c. Only the packet 1262 // buffers are flushed accordingly 1263 fTempPacket.data = NULL; 1264 fTempPacket.size = 0; 1265 continue; 1266 } 1267 1268 fTempPacket.size -= encodedDataSizeInBytes; 1269 fTempPacket.data += encodedDataSizeInBytes; 1270 1271 bool gotNoVideoFrame = gotVideoFrame == 0; 1272 if (gotNoVideoFrame) { 1273 TRACE("frame %lld - no picture yet, encodedDataSizeInBytes: %d, " 1274 "chunk size: %ld\n", fFrame, encodedDataSizeInBytes, 1275 fChunkBufferSize); 1276 continue; 1277 } 1278 1279 #if DO_PROFILING 1280 bigtime_t formatConversionStart = system_time(); 1281 #endif 1282 1283 status_t handleStatus = _HandleNewVideoFrameAndUpdateSystemState(); 1284 if (handleStatus != B_OK) 1285 return handleStatus; 1286 1287 #if DO_PROFILING 1288 bigtime_t doneTime = system_time(); 1289 decodingTime += formatConversionStart - startTime; 1290 conversionTime += doneTime - formatConversionStart; 1291 profileCounter++; 1292 if (!(fFrame % 5)) { 1293 printf("[v] profile: d1 = %lld, d2 = %lld (%lld) required %Ld\n", 1294 decodingTime / profileCounter, conversionTime / profileCounter, 1295 fFrame, bigtime_t(1000000LL / fOutputFrameRate)); 1296 decodingTime = 0; 1297 conversionTime = 0; 1298 profileCounter = 0; 1299 } 1300 #endif 1301 return B_OK; 1302 } 1303 } 1304 1305 1306 /*! \brief Applies all essential video input properties to fContext that were 1307 passed to AVCodecDecoder when Setup() was called. 1308 1309 Note: This function must be called before the AVCodec is opened via 1310 avcodec_open2(). Otherwise the behaviour of FFMPEG's video decoding 1311 function avcodec_decode_video2() is undefined. 1312 1313 Essential properties applied from fInputFormat.u.encoded_video.output: 1314 - display.line_width copied to fContext->width 1315 - display.line_count copied to fContext->height 1316 - pixel_width_aspect and pixel_height_aspect converted to 1317 fContext->sample_aspect_ratio 1318 - field_rate converted to fContext->time_base and 1319 fContext->ticks_per_frame 1320 1321 Other essential properties being applied: 1322 - fExtraData to fContext->extradata 1323 - fExtraDataSize to fContext->extradata_size 1324 */ 1325 void 1326 AVCodecDecoder::_ApplyEssentialVideoContainerPropertiesToContext() 1327 { 1328 media_raw_video_format containerProperties 1329 = fInputFormat.u.encoded_video.output; 1330 1331 fContext->width = containerProperties.display.line_width; 1332 fContext->height = containerProperties.display.line_count; 1333 1334 if (containerProperties.pixel_width_aspect > 0 1335 && containerProperties.pixel_height_aspect > 0) { 1336 ConvertVideoAspectWidthAndHeightToAVCodecContext( 1337 containerProperties.pixel_width_aspect, 1338 containerProperties.pixel_height_aspect, *fContext); 1339 } 1340 1341 if (containerProperties.field_rate > 0.0) { 1342 ConvertVideoFrameRateToAVCodecContext(containerProperties.field_rate, 1343 *fContext); 1344 } 1345 1346 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 1347 fContext->extradata_size = fExtraDataSize; 1348 } 1349 1350 1351 /*! \brief Loads the next chunk into fChunkBuffer and assigns it (including 1352 the start time) to fTempPacket but only if fTempPacket is empty. 1353 1354 \returns B_OK 1355 1. meaning: Next chunk is loaded. 1356 2. meaning: No need to load and assign anything. Proceed as usual. 1357 \returns B_LAST_BUFFER_ERROR No more chunks available. fChunkBuffer and 1358 fTempPacket are left untouched. 1359 \returns Other errors Caller should bail out because fChunkBuffer and 1360 fTempPacket are in unknown states. Normal operation cannot be 1361 guaranteed. 1362 */ 1363 status_t 1364 AVCodecDecoder::_LoadNextChunkIfNeededAndAssignStartTime() 1365 { 1366 if (fTempPacket.size > 0) 1367 return B_OK; 1368 1369 const void* chunkBuffer = NULL; 1370 size_t chunkBufferSize = 0; 1371 // In the case that GetNextChunk() returns an error fChunkBufferSize 1372 // should be left untouched. 1373 media_header chunkMediaHeader; 1374 1375 status_t getNextChunkStatus = GetNextChunk(&chunkBuffer, &chunkBufferSize, 1376 &chunkMediaHeader); 1377 if (getNextChunkStatus != B_OK) 1378 return getNextChunkStatus; 1379 1380 status_t chunkBufferPaddingStatus 1381 = _CopyChunkToChunkBufferAndAddPadding(chunkBuffer, chunkBufferSize); 1382 if (chunkBufferPaddingStatus != B_OK) 1383 return chunkBufferPaddingStatus; 1384 1385 fTempPacket.data = fChunkBuffer; 1386 fTempPacket.size = fChunkBufferSize; 1387 fTempPacket.dts = chunkMediaHeader.start_time; 1388 // Let FFMPEG handle the correct relationship between start_time and 1389 // decoded a/v frame. By doing so we are simply copying the way how it 1390 // is implemented in ffplay.c for video frames (for audio frames it 1391 // works, too, but isn't used by ffplay.c). 1392 // \see http://git.videolan.org/?p=ffmpeg.git;a=blob;f=ffplay.c;h=09623db374e5289ed20b7cc28c262c4375a8b2e4;hb=9153b33a742c4e2a85ff6230aea0e75f5a8b26c2#l1502 1393 // 1394 // FIXME: Research how to establish a meaningful relationship between 1395 // start_time and decoded a/v frame when the received chunk buffer 1396 // contains partial a/v frames. Maybe some data formats do contain time 1397 // stamps (ake pts / dts fields) that can be evaluated by FFMPEG. But 1398 // as long as I don't have such video data to test it, it makes no 1399 // sense trying to implement it. 1400 // 1401 // FIXME: Implement tracking start_time of video frames originating in 1402 // data chunks that encode more than one video frame at a time. In that 1403 // case on would increment the start_time for each consecutive frame of 1404 // such a data chunk (like it is done for audio frame decoding). But as 1405 // long as I don't have such video data to test it, it makes no sense 1406 // to implement it. 1407 1408 #ifdef LOG_STREAM_TO_FILE 1409 BFile* logFile = fIsAudio ? &sAudioStreamLogFile : &sVideoStreamLogFile; 1410 if (sDumpedPackets < 100) { 1411 logFile->Write(chunkBuffer, fChunkBufferSize); 1412 printf("wrote %ld bytes\n", fChunkBufferSize); 1413 sDumpedPackets++; 1414 } else if (sDumpedPackets == 100) 1415 logFile->Unset(); 1416 #endif 1417 1418 return B_OK; 1419 } 1420 1421 1422 /*! \brief Copies a chunk into fChunkBuffer and adds a "safety net" of 1423 additional memory as required by FFMPEG for input buffers to video 1424 decoders. 1425 1426 This is needed so that some decoders can read safely a predefined number of 1427 bytes at a time for performance optimization purposes. 1428 1429 The additional memory has a size of FF_INPUT_BUFFER_PADDING_SIZE as defined 1430 in avcodec.h. 1431 1432 Ownership of fChunkBuffer memory is with the class so it needs to be freed 1433 at the right times (on destruction, on seeking). 1434 1435 Also update fChunkBufferSize to reflect the size of the contained data 1436 (leaving out the padding). 1437 1438 \param chunk The chunk to copy. 1439 \param chunkSize Size of the chunk in bytes 1440 1441 \returns B_OK Padding was successful. You are responsible for releasing the 1442 allocated memory. fChunkBufferSize is set to chunkSize. 1443 \returns B_NO_MEMORY Padding failed. 1444 fChunkBuffer is set to NULL making it safe to call free() on it. 1445 fChunkBufferSize is set to 0 to reflect the size of fChunkBuffer. 1446 */ 1447 status_t 1448 AVCodecDecoder::_CopyChunkToChunkBufferAndAddPadding(const void* chunk, 1449 size_t chunkSize) 1450 { 1451 fChunkBuffer = static_cast<uint8_t*>(realloc(fChunkBuffer, 1452 chunkSize + FF_INPUT_BUFFER_PADDING_SIZE)); 1453 if (fChunkBuffer == NULL) { 1454 fChunkBufferSize = 0; 1455 return B_NO_MEMORY; 1456 } 1457 1458 memcpy(fChunkBuffer, chunk, chunkSize); 1459 memset(fChunkBuffer + chunkSize, 0, FF_INPUT_BUFFER_PADDING_SIZE); 1460 // Establish safety net, by zero'ing the padding area. 1461 1462 fChunkBufferSize = chunkSize; 1463 1464 return B_OK; 1465 } 1466 1467 1468 /*! \brief Executes all steps needed for a freshly decoded video frame. 1469 1470 \see _UpdateMediaHeaderForVideoFrame() and 1471 \see _DeinterlaceAndColorConvertVideoFrame() for when you are allowed to 1472 call this method. 1473 1474 \returns B_OK when video frame was handled successfully 1475 \returnb B_NO_MEMORY when no memory is left for correct operation. 1476 */ 1477 status_t 1478 AVCodecDecoder::_HandleNewVideoFrameAndUpdateSystemState() 1479 { 1480 _UpdateMediaHeaderForVideoFrame(); 1481 status_t postProcessStatus = _DeinterlaceAndColorConvertVideoFrame(); 1482 if (postProcessStatus != B_OK) 1483 return postProcessStatus; 1484 1485 ConvertAVCodecContextToVideoFrameRate(*fContext, fOutputFrameRate); 1486 1487 #ifdef DEBUG 1488 dump_ffframe_video(fRawDecodedPicture, "ffpict"); 1489 #endif 1490 1491 fFrame++; 1492 1493 return B_OK; 1494 } 1495 1496 1497 /*! \brief Flushes one video frame - if any - still buffered by the decoder. 1498 1499 Some FFMPEG decoder are buffering video frames. To retrieve those buffered 1500 frames the decoder needs to be told so. 1501 1502 The intended use of this method is to call it, once there are no more data 1503 chunks for decoding left. Reframed in other words: Once GetNextChunk() 1504 returns with status B_LAST_BUFFER_ERROR it is time to start flushing. 1505 1506 \returns B_OK Retrieved one video frame, handled it accordingly and updated 1507 the system state accordingly. 1508 There maybe more video frames left. So it is valid for the client of 1509 AVCodecDecoder to call it one more time. 1510 1511 \returns B_LAST_BUFFER_ERROR No video frame left. 1512 The client of the AVCodecDecoder should stop calling it now. 1513 1514 \returns B_NO_MEMORY No memory left for correct operation. 1515 */ 1516 status_t 1517 AVCodecDecoder::_FlushOneVideoFrameFromDecoderBuffer() 1518 { 1519 // Create empty fTempPacket to tell the video decoder it is time to flush 1520 fTempPacket.data = NULL; 1521 fTempPacket.size = 0; 1522 1523 int gotVideoFrame = 0; 1524 avcodec_decode_video2(fContext, fRawDecodedPicture, &gotVideoFrame, 1525 &fTempPacket); 1526 // We are only interested in complete frames now, so ignore the return 1527 // value. 1528 1529 bool gotNoVideoFrame = gotVideoFrame == 0; 1530 if (gotNoVideoFrame) { 1531 // video buffer is flushed successfully 1532 return B_LAST_BUFFER_ERROR; 1533 } 1534 1535 return _HandleNewVideoFrameAndUpdateSystemState(); 1536 } 1537 1538 1539 /*! \brief Updates relevant fields of the class member fHeader with the 1540 properties of the most recently decoded video frame. 1541 1542 It is assumed that this function is called only when the following asserts 1543 hold true: 1544 1. We actually got a new picture decoded by the video decoder. 1545 2. fHeader wasn't updated for the new picture yet. You MUST call this 1546 method only once per decoded video frame. 1547 3. This function MUST be called after 1548 _DeinterlaceAndColorConvertVideoFrame() as it relys on an updated 1549 fDecodedDataSizeInBytes. 1550 4. There will be at maximumn only one decoded video frame in our cache 1551 at any single point in time. Otherwise you couldn't tell to which 1552 cached decoded video frame the properties in fHeader relate to. 1553 5. AVCodecContext is still valid for this video frame (This is the case 1554 when this function is called after avcodec_decode_video2() and 1555 before the next call to avcodec_decode_video2(). 1556 */ 1557 void 1558 AVCodecDecoder::_UpdateMediaHeaderForVideoFrame() 1559 { 1560 fHeader.type = B_MEDIA_RAW_VIDEO; 1561 fHeader.file_pos = 0; 1562 fHeader.orig_size = 0; 1563 fHeader.start_time = fRawDecodedPicture->pkt_dts; 1564 fHeader.size_used = avpicture_get_size( 1565 colorspace_to_pixfmt(fOutputColorSpace), fRawDecodedPicture->width, 1566 fRawDecodedPicture->height); 1567 fHeader.u.raw_video.display_line_width = fRawDecodedPicture->width; 1568 fHeader.u.raw_video.display_line_count = fRawDecodedPicture->height; 1569 fHeader.u.raw_video.bytes_per_row 1570 = CalculateBytesPerRowWithColorSpaceAndVideoWidth(fOutputColorSpace, 1571 fRawDecodedPicture->width); 1572 fHeader.u.raw_video.field_gamma = 1.0; 1573 fHeader.u.raw_video.field_sequence = fFrame; 1574 fHeader.u.raw_video.field_number = 0; 1575 fHeader.u.raw_video.pulldown_number = 0; 1576 fHeader.u.raw_video.first_active_line = 1; 1577 fHeader.u.raw_video.line_count = fRawDecodedPicture->height; 1578 1579 ConvertAVCodecContextToVideoAspectWidthAndHeight(*fContext, 1580 fHeader.u.raw_video.pixel_width_aspect, 1581 fHeader.u.raw_video.pixel_height_aspect); 1582 1583 TRACE("[v] start_time=%02d:%02d.%02d field_sequence=%lu\n", 1584 int((fHeader.start_time / 60000000) % 60), 1585 int((fHeader.start_time / 1000000) % 60), 1586 int((fHeader.start_time / 10000) % 100), 1587 fHeader.u.raw_video.field_sequence); 1588 } 1589 1590 1591 /*! \brief This function applies deinterlacing (only if needed) and color 1592 conversion to the video frame in fRawDecodedPicture. 1593 1594 It is assumed that fRawDecodedPicture wasn't deinterlaced and color 1595 converted yet (otherwise this function behaves in unknown manners). 1596 1597 This function MUST be called after _UpdateMediaHeaderForVideoFrame() as it 1598 relys on the fHeader.size_used and fHeader.u.raw_video.bytes_per_row fields 1599 for correct operation 1600 1601 You should only call this function when you got a new picture decoded by 1602 the video decoder. 1603 1604 When this function finishes the postprocessed video frame will be available 1605 in fPostProcessedDecodedPicture and fDecodedData (fDecodedDataSizeInBytes 1606 will be set accordingly). 1607 1608 \returns B_OK video frame successfully deinterlaced and color converted. 1609 \returns B_NO_MEMORY Not enough memory available for correct operation. 1610 */ 1611 status_t 1612 AVCodecDecoder::_DeinterlaceAndColorConvertVideoFrame() 1613 { 1614 int displayWidth = fRawDecodedPicture->width; 1615 int displayHeight = fRawDecodedPicture->height; 1616 AVPicture deinterlacedPicture; 1617 bool useDeinterlacedPicture = false; 1618 1619 if (fRawDecodedPicture->interlaced_frame) { 1620 AVPicture rawPicture; 1621 rawPicture.data[0] = fRawDecodedPicture->data[0]; 1622 rawPicture.data[1] = fRawDecodedPicture->data[1]; 1623 rawPicture.data[2] = fRawDecodedPicture->data[2]; 1624 rawPicture.data[3] = fRawDecodedPicture->data[3]; 1625 rawPicture.linesize[0] = fRawDecodedPicture->linesize[0]; 1626 rawPicture.linesize[1] = fRawDecodedPicture->linesize[1]; 1627 rawPicture.linesize[2] = fRawDecodedPicture->linesize[2]; 1628 rawPicture.linesize[3] = fRawDecodedPicture->linesize[3]; 1629 1630 avpicture_alloc(&deinterlacedPicture, fContext->pix_fmt, displayWidth, 1631 displayHeight); 1632 1633 #if LIBAVCODEC_VERSION_INT < ((57 << 16) | (0 << 8)) 1634 if (avpicture_deinterlace(&deinterlacedPicture, &rawPicture, 1635 fContext->pix_fmt, displayWidth, displayHeight) < 0) { 1636 TRACE("[v] avpicture_deinterlace() - error\n"); 1637 } else 1638 useDeinterlacedPicture = true; 1639 #else 1640 // avpicture_deinterlace is gone 1641 // TODO: implement alternate deinterlace using avfilter 1642 TRACE("[v] avpicture_deinterlace() - not implemented\n"); 1643 #endif 1644 } 1645 1646 // Some decoders do not set pix_fmt until they have decoded 1 frame 1647 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1648 if (fSwsContext == NULL) { 1649 fSwsContext = sws_getContext(displayWidth, displayHeight, 1650 fContext->pix_fmt, displayWidth, displayHeight, 1651 colorspace_to_pixfmt(fOutputColorSpace), 1652 SWS_FAST_BILINEAR, NULL, NULL, NULL); 1653 } 1654 #else 1655 if (fFormatConversionFunc == NULL) { 1656 fFormatConversionFunc = resolve_colorspace(fOutputColorSpace, 1657 fContext->pix_fmt, displayWidth, displayHeight); 1658 } 1659 #endif 1660 1661 fDecodedDataSizeInBytes = fHeader.size_used; 1662 1663 if (fDecodedData == NULL) { 1664 const size_t kOptimalAlignmentForColorConversion = 32; 1665 posix_memalign(reinterpret_cast<void**>(&fDecodedData), 1666 kOptimalAlignmentForColorConversion, fDecodedDataSizeInBytes); 1667 } 1668 if (fDecodedData == NULL) 1669 return B_NO_MEMORY; 1670 1671 fPostProcessedDecodedPicture->data[0] = fDecodedData; 1672 fPostProcessedDecodedPicture->linesize[0] 1673 = fHeader.u.raw_video.bytes_per_row; 1674 1675 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1676 if (fSwsContext != NULL) { 1677 #else 1678 if (fFormatConversionFunc != NULL) { 1679 #endif 1680 if (useDeinterlacedPicture) { 1681 AVFrame deinterlacedFrame; 1682 deinterlacedFrame.data[0] = deinterlacedPicture.data[0]; 1683 deinterlacedFrame.data[1] = deinterlacedPicture.data[1]; 1684 deinterlacedFrame.data[2] = deinterlacedPicture.data[2]; 1685 deinterlacedFrame.data[3] = deinterlacedPicture.data[3]; 1686 deinterlacedFrame.linesize[0] 1687 = deinterlacedPicture.linesize[0]; 1688 deinterlacedFrame.linesize[1] 1689 = deinterlacedPicture.linesize[1]; 1690 deinterlacedFrame.linesize[2] 1691 = deinterlacedPicture.linesize[2]; 1692 deinterlacedFrame.linesize[3] 1693 = deinterlacedPicture.linesize[3]; 1694 1695 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1696 sws_scale(fSwsContext, deinterlacedFrame.data, 1697 deinterlacedFrame.linesize, 0, displayHeight, 1698 fPostProcessedDecodedPicture->data, 1699 fPostProcessedDecodedPicture->linesize); 1700 #else 1701 (*fFormatConversionFunc)(&deinterlacedFrame, 1702 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1703 #endif 1704 } else { 1705 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1706 sws_scale(fSwsContext, fRawDecodedPicture->data, 1707 fRawDecodedPicture->linesize, 0, displayHeight, 1708 fPostProcessedDecodedPicture->data, 1709 fPostProcessedDecodedPicture->linesize); 1710 #else 1711 (*fFormatConversionFunc)(fRawDecodedPicture, 1712 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1713 #endif 1714 } 1715 } 1716 1717 if (fRawDecodedPicture->interlaced_frame) 1718 avpicture_free(&deinterlacedPicture); 1719 1720 return B_OK; 1721 } 1722