1 /* 2 * Copyright (C) 2001 Carlos Hasan 3 * Copyright (C) 2001 François Revol 4 * Copyright (C) 2001 Axel Dörfler 5 * Copyright (C) 2004 Marcus Overhagen 6 * Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de> 7 * Copyright (C) 2014 Colin Günther <coling@gmx.de> 8 * Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk> 9 * 10 * All rights reserved. Distributed under the terms of the MIT License. 11 */ 12 13 //! libavcodec based decoder for Haiku 14 15 16 #include "AVCodecDecoder.h" 17 18 #include <new> 19 20 #include <assert.h> 21 #include <string.h> 22 23 #include <Bitmap.h> 24 #include <Debug.h> 25 26 #include "Utilities.h" 27 28 29 #undef TRACE 30 //#define TRACE_AV_CODEC 31 #ifdef TRACE_AV_CODEC 32 # define TRACE(x...) printf(x) 33 # define TRACE_AUDIO(x...) printf(x) 34 # define TRACE_VIDEO(x...) printf(x) 35 #else 36 # define TRACE(x...) 37 # define TRACE_AUDIO(x...) 38 # define TRACE_VIDEO(x...) 39 #endif 40 41 //#define LOG_STREAM_TO_FILE 42 #ifdef LOG_STREAM_TO_FILE 43 # include <File.h> 44 static BFile sAudioStreamLogFile( 45 "/boot/home/Desktop/AVCodecDebugAudioStream.raw", 46 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 47 static BFile sVideoStreamLogFile( 48 "/boot/home/Desktop/AVCodecDebugVideoStream.raw", 49 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 50 static int sDumpedPackets = 0; 51 #endif 52 53 54 #if LIBAVCODEC_VERSION_INT > ((54 << 16) | (50 << 8)) 55 typedef AVCodecID CodecID; 56 #endif 57 #if LIBAVCODEC_VERSION_INT < ((55 << 16) | (45 << 8)) 58 #define av_frame_alloc avcodec_alloc_frame 59 #define av_frame_unref avcodec_get_frame_defaults 60 #define av_frame_free avcodec_free_frame 61 #endif 62 63 64 struct wave_format_ex { 65 uint16 format_tag; 66 uint16 channels; 67 uint32 frames_per_sec; 68 uint32 avg_bytes_per_sec; 69 uint16 block_align; 70 uint16 bits_per_sample; 71 uint16 extra_size; 72 // extra_data[extra_size] 73 } _PACKED; 74 75 struct avformat_codec_context { 76 int sample_rate; 77 int channels; 78 }; 79 80 81 // profiling related globals 82 #define DO_PROFILING 0 83 84 static bigtime_t decodingTime = 0; 85 static bigtime_t conversionTime = 0; 86 static long profileCounter = 0; 87 88 89 AVCodecDecoder::AVCodecDecoder() 90 : 91 fHeader(), 92 fInputFormat(), 93 fFrame(0), 94 fIsAudio(false), 95 fCodec(NULL), 96 fContext(avcodec_alloc_context3(NULL)), 97 fResampleContext(NULL), 98 fDecodedData(NULL), 99 fDecodedDataSizeInBytes(0), 100 fPostProcessedDecodedPicture(av_frame_alloc()), 101 fRawDecodedPicture(av_frame_alloc()), 102 fRawDecodedAudio(av_frame_alloc()), 103 104 fCodecInitDone(false), 105 106 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 107 fSwsContext(NULL), 108 #else 109 fFormatConversionFunc(NULL), 110 #endif 111 112 fExtraData(NULL), 113 fExtraDataSize(0), 114 fBlockAlign(0), 115 116 fOutputColorSpace(B_NO_COLOR_SPACE), 117 fOutputFrameCount(0), 118 fOutputFrameRate(1.0), 119 fOutputFrameSize(0), 120 fInputFrameSize(0), 121 122 fChunkBuffer(NULL), 123 fChunkBufferSize(0), 124 fAudioDecodeError(false), 125 126 fDecodedDataBuffer(av_frame_alloc()), 127 fDecodedDataBufferOffset(0), 128 fDecodedDataBufferSize(0) 129 { 130 TRACE("AVCodecDecoder::AVCodecDecoder()\n"); 131 132 system_info info; 133 get_system_info(&info); 134 135 fContext->err_recognition = AV_EF_CAREFUL; 136 fContext->error_concealment = 3; 137 fContext->thread_count = info.cpu_count; 138 } 139 140 141 AVCodecDecoder::~AVCodecDecoder() 142 { 143 TRACE("[%c] AVCodecDecoder::~AVCodecDecoder()\n", fIsAudio?('a'):('v')); 144 145 #if DO_PROFILING 146 if (profileCounter > 0) { 147 printf("[%c] profile: d1 = %lld, d2 = %lld (%Ld)\n", 148 fIsAudio?('a'):('v'), decodingTime / profileCounter, 149 conversionTime / profileCounter, fFrame); 150 } 151 #endif 152 153 if (fCodecInitDone) 154 avcodec_close(fContext); 155 156 swr_free(&fResampleContext); 157 free(fChunkBuffer); 158 free(fDecodedData); 159 160 av_free(fPostProcessedDecodedPicture); 161 av_free(fRawDecodedPicture); 162 av_free(fRawDecodedAudio->opaque); 163 av_free(fRawDecodedAudio); 164 av_free(fContext); 165 av_free(fDecodedDataBuffer); 166 167 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 168 if (fSwsContext != NULL) 169 sws_freeContext(fSwsContext); 170 #endif 171 172 delete[] fExtraData; 173 } 174 175 176 void 177 AVCodecDecoder::GetCodecInfo(media_codec_info* mci) 178 { 179 snprintf(mci->short_name, 32, "%s", fCodec->name); 180 snprintf(mci->pretty_name, 96, "%s", fCodec->long_name); 181 mci->id = 0; 182 mci->sub_id = fCodec->id; 183 } 184 185 186 status_t 187 AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer, 188 size_t infoSize) 189 { 190 if (ioEncodedFormat->type != B_MEDIA_ENCODED_AUDIO 191 && ioEncodedFormat->type != B_MEDIA_ENCODED_VIDEO) 192 return B_ERROR; 193 194 fIsAudio = (ioEncodedFormat->type == B_MEDIA_ENCODED_AUDIO); 195 TRACE("[%c] AVCodecDecoder::Setup()\n", fIsAudio?('a'):('v')); 196 197 #ifdef TRACE_AV_CODEC 198 char buffer[1024]; 199 string_for_format(*ioEncodedFormat, buffer, sizeof(buffer)); 200 TRACE("[%c] input_format = %s\n", fIsAudio?('a'):('v'), buffer); 201 TRACE("[%c] infoSize = %ld\n", fIsAudio?('a'):('v'), infoSize); 202 TRACE("[%c] user_data_type = %08lx\n", fIsAudio?('a'):('v'), 203 ioEncodedFormat->user_data_type); 204 TRACE("[%c] meta_data_size = %ld\n", fIsAudio?('a'):('v'), 205 ioEncodedFormat->MetaDataSize()); 206 #endif 207 208 media_format_description description; 209 if (BMediaFormats().GetCodeFor(*ioEncodedFormat, 210 B_MISC_FORMAT_FAMILY, &description) == B_OK) { 211 if (description.u.misc.file_format != 'ffmp') 212 return B_NOT_SUPPORTED; 213 fCodec = avcodec_find_decoder(static_cast<CodecID>( 214 description.u.misc.codec)); 215 if (fCodec == NULL) { 216 TRACE(" unable to find the correct FFmpeg " 217 "decoder (id = %lu)\n", description.u.misc.codec); 218 return B_ERROR; 219 } 220 TRACE(" found decoder %s\n", fCodec->name); 221 222 const void* extraData = infoBuffer; 223 fExtraDataSize = infoSize; 224 if (description.family == B_WAV_FORMAT_FAMILY 225 && infoSize >= sizeof(wave_format_ex)) { 226 TRACE(" trying to use wave_format_ex\n"); 227 // Special case extra data in B_WAV_FORMAT_FAMILY 228 const wave_format_ex* waveFormatData 229 = (const wave_format_ex*)infoBuffer; 230 231 size_t waveFormatSize = infoSize; 232 if (waveFormatData != NULL && waveFormatSize > 0) { 233 fBlockAlign = waveFormatData->block_align; 234 TRACE(" found block align: %d\n", fBlockAlign); 235 fExtraDataSize = waveFormatData->extra_size; 236 // skip the wave_format_ex from the extra data. 237 extraData = waveFormatData + 1; 238 } 239 } else { 240 if (fIsAudio) { 241 fBlockAlign 242 = ioEncodedFormat->u.encoded_audio.output.buffer_size; 243 TRACE(" using buffer_size as block align: %d\n", 244 fBlockAlign); 245 } 246 } 247 if (extraData != NULL && fExtraDataSize > 0) { 248 TRACE("AVCodecDecoder: extra data size %ld\n", infoSize); 249 delete[] fExtraData; 250 fExtraData = new(std::nothrow) char[fExtraDataSize]; 251 if (fExtraData != NULL) 252 memcpy(fExtraData, infoBuffer, fExtraDataSize); 253 else 254 fExtraDataSize = 0; 255 } 256 257 fInputFormat = *ioEncodedFormat; 258 return B_OK; 259 } else { 260 TRACE("AVCodecDecoder: BMediaFormats().GetCodeFor() failed.\n"); 261 } 262 263 printf("AVCodecDecoder::Setup failed!\n"); 264 return B_ERROR; 265 } 266 267 268 status_t 269 AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time) 270 { 271 status_t ret = B_OK; 272 // Reset the FFmpeg codec to flush buffers, so we keep the sync 273 if (fCodecInitDone) { 274 avcodec_flush_buffers(fContext); 275 _ResetTempPacket(); 276 } 277 278 // Flush internal buffers as well. 279 free(fChunkBuffer); 280 fChunkBuffer = NULL; 281 fChunkBufferSize = 0; 282 fDecodedDataBufferOffset = 0; 283 fDecodedDataBufferSize = 0; 284 fDecodedDataSizeInBytes = 0; 285 286 fFrame = frame; 287 288 return ret; 289 } 290 291 292 status_t 293 AVCodecDecoder::NegotiateOutputFormat(media_format* inOutFormat) 294 { 295 TRACE("AVCodecDecoder::NegotiateOutputFormat() [%c] \n", 296 fIsAudio?('a'):('v')); 297 298 #ifdef TRACE_AV_CODEC 299 char buffer[1024]; 300 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 301 TRACE(" [%c] requested format = %s\n", fIsAudio?('a'):('v'), buffer); 302 #endif 303 304 if (fIsAudio) 305 return _NegotiateAudioOutputFormat(inOutFormat); 306 else 307 return _NegotiateVideoOutputFormat(inOutFormat); 308 } 309 310 311 status_t 312 AVCodecDecoder::Decode(void* outBuffer, int64* outFrameCount, 313 media_header* mediaHeader, media_decode_info* info) 314 { 315 if (!fCodecInitDone) 316 return B_NO_INIT; 317 318 status_t ret; 319 if (fIsAudio) 320 ret = _DecodeAudio(outBuffer, outFrameCount, mediaHeader, info); 321 else 322 ret = _DecodeVideo(outBuffer, outFrameCount, mediaHeader, info); 323 324 return ret; 325 } 326 327 328 // #pragma mark - 329 330 331 void 332 AVCodecDecoder::_ResetTempPacket() 333 { 334 av_init_packet(&fTempPacket); 335 fTempPacket.size = 0; 336 fTempPacket.data = NULL; 337 } 338 339 340 status_t 341 AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat) 342 { 343 TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n"); 344 345 _ApplyEssentialAudioContainerPropertiesToContext(); 346 // This makes audio formats play that encode the audio properties in 347 // the audio container (e.g. WMA) and not in the audio frames 348 // themself (e.g. MP3). 349 // Note: Doing this step unconditionally is OK, because the first call 350 // to _DecodeNextAudioFrameChunk() will update the essential audio 351 // format properties accordingly regardless of the settings here. 352 353 // close any previous instance 354 if (fCodecInitDone) { 355 fCodecInitDone = false; 356 avcodec_close(fContext); 357 } 358 359 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 360 fCodecInitDone = true; 361 else { 362 TRACE("avcodec_open() failed to init codec!\n"); 363 return B_ERROR; 364 } 365 366 free(fChunkBuffer); 367 fChunkBuffer = NULL; 368 fChunkBufferSize = 0; 369 fAudioDecodeError = false; 370 fDecodedDataBufferOffset = 0; 371 fDecodedDataBufferSize = 0; 372 373 _ResetTempPacket(); 374 375 status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk(); 376 if (statusOfDecodingFirstFrameChunk != B_OK) { 377 TRACE("[a] decoding first audio frame chunk failed\n"); 378 return B_ERROR; 379 } 380 381 media_multi_audio_format outputAudioFormat; 382 outputAudioFormat = media_raw_audio_format::wildcard; 383 outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN; 384 outputAudioFormat.frame_rate = fContext->sample_rate; 385 outputAudioFormat.channel_count = fContext->channels; 386 ConvertAVSampleFormatToRawAudioFormat(fContext->sample_fmt, 387 outputAudioFormat.format); 388 // Check that format is not still a wild card! 389 if (outputAudioFormat.format == 0) { 390 TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n"); 391 outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT; 392 } 393 outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size; 394 // Check that buffer_size has a sane value 395 size_t sampleSize = outputAudioFormat.format 396 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 397 if (outputAudioFormat.buffer_size == 0) { 398 outputAudioFormat.buffer_size = 512 * sampleSize 399 * outputAudioFormat.channel_count; 400 } 401 402 inOutFormat->type = B_MEDIA_RAW_AUDIO; 403 inOutFormat->u.raw_audio = outputAudioFormat; 404 inOutFormat->require_flags = 0; 405 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 406 407 // Initialize variables needed to manage decoding as much audio frames as 408 // needed to fill the buffer_size. 409 fOutputFrameSize = sampleSize * outputAudioFormat.channel_count; 410 fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize; 411 fOutputFrameRate = outputAudioFormat.frame_rate; 412 if (av_sample_fmt_is_planar(fContext->sample_fmt)) 413 fInputFrameSize = sampleSize; 414 else 415 fInputFrameSize = fOutputFrameSize; 416 417 fRawDecodedAudio->opaque 418 = av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context)); 419 if (fRawDecodedAudio->opaque == NULL) 420 return B_NO_MEMORY; 421 422 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 423 fResampleContext = swr_alloc_set_opts(NULL, 424 fContext->channel_layout, fContext->request_sample_fmt, 425 fContext->sample_rate, 426 fContext->channel_layout, fContext->sample_fmt, fContext->sample_rate, 427 0, NULL); 428 swr_init(fResampleContext); 429 } 430 431 TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, " 432 "output frame size: %d, count: %ld, rate: %.2f\n", 433 fContext->bit_rate, fContext->sample_rate, fContext->channels, 434 fOutputFrameSize, fOutputFrameCount, fOutputFrameRate); 435 436 return B_OK; 437 } 438 439 440 status_t 441 AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat) 442 { 443 TRACE("AVCodecDecoder::_NegotiateVideoOutputFormat()\n"); 444 445 TRACE(" requested video format 0x%x\n", 446 inOutFormat->u.raw_video.display.format); 447 448 _ApplyEssentialVideoContainerPropertiesToContext(); 449 // This makes video formats play that encode the video properties in 450 // the video container (e.g. WMV) and not in the video frames 451 // themself (e.g. MPEG2). 452 // Note: Doing this step unconditionally is OK, because the first call 453 // to _DecodeNextVideoFrame() will update the essential video format 454 // properties accordingly regardless of the settings here. 455 456 bool codecCanHandleIncompleteFrames 457 = (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0; 458 if (codecCanHandleIncompleteFrames) { 459 // Expect and handle video frames to be splitted across consecutive 460 // data chunks. 461 fContext->flags |= CODEC_FLAG_TRUNCATED; 462 } 463 464 // close any previous instance 465 if (fCodecInitDone) { 466 fCodecInitDone = false; 467 avcodec_close(fContext); 468 } 469 470 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 471 fCodecInitDone = true; 472 else { 473 TRACE("avcodec_open() failed to init codec!\n"); 474 return B_ERROR; 475 } 476 477 // Make MediaPlayer happy (if not in rgb32 screen depth and no overlay, 478 // it will only ask for YCbCr, which DrawBitmap doesn't handle, so the 479 // default colordepth is RGB32). 480 if (inOutFormat->u.raw_video.display.format == B_YCbCr422) 481 fOutputColorSpace = B_YCbCr422; 482 else 483 fOutputColorSpace = B_RGB32; 484 485 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 486 if (fSwsContext != NULL) 487 sws_freeContext(fSwsContext); 488 fSwsContext = NULL; 489 #else 490 fFormatConversionFunc = 0; 491 #endif 492 493 free(fChunkBuffer); 494 fChunkBuffer = NULL; 495 fChunkBufferSize = 0; 496 497 _ResetTempPacket(); 498 499 status_t statusOfDecodingFirstFrame = _DecodeNextVideoFrame(); 500 if (statusOfDecodingFirstFrame != B_OK) { 501 TRACE("[v] decoding first video frame failed\n"); 502 return B_ERROR; 503 } 504 505 // Note: fSwsContext / fFormatConversionFunc should have been initialized 506 // by first call to _DecodeNextVideoFrame() above. 507 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 508 if (fSwsContext == NULL) { 509 TRACE("No SWS Scale context or decoder has not set the pixel format " 510 "yet!\n"); 511 } 512 #else 513 if (fFormatConversionFunc == NULL) { 514 TRACE("no pixel format conversion function found or decoder has " 515 "not set the pixel format yet!\n"); 516 } 517 #endif 518 519 inOutFormat->type = B_MEDIA_RAW_VIDEO; 520 inOutFormat->require_flags = 0; 521 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 522 inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output; 523 inOutFormat->u.raw_video.interlace = 1; 524 // Progressive (non-interlaced) video frames are delivered 525 inOutFormat->u.raw_video.first_active 526 = fHeader.u.raw_video.first_active_line; 527 inOutFormat->u.raw_video.last_active = fHeader.u.raw_video.line_count; 528 inOutFormat->u.raw_video.pixel_width_aspect 529 = fHeader.u.raw_video.pixel_width_aspect; 530 inOutFormat->u.raw_video.pixel_height_aspect 531 = fHeader.u.raw_video.pixel_height_aspect; 532 #if 0 533 // This was added by Colin Günther in order to handle streams with a 534 // variable frame rate. fOutputFrameRate is computed from the stream 535 // time_base, but it actually assumes a timebase equal to the FPS. As far 536 // as I can see, a stream with a variable frame rate would have a higher 537 // resolution time_base and increment the pts (presentation time) of each 538 // frame by a value bigger than one. 539 // 540 // Fixed rate stream: 541 // time_base = 1/50s, frame PTS = 1, 2, 3... (for 50Hz) 542 // 543 // Variable rate stream: 544 // time_base = 1/300s, frame PTS = 6, 12, 18, ... (for 50Hz) 545 // time_base = 1/300s, frame PTS = 5, 10, 15, ... (for 60Hz) 546 // 547 // The fOutputFrameRate currently does not take this into account and 548 // ignores the PTS. This results in playing the above sample at 300Hz 549 // instead of 50 or 60. 550 // 551 // However, comparing the PTS for two consecutive implies we have already 552 // decoded 2 frames, which may not be the case when this method is first 553 // called. 554 inOutFormat->u.raw_video.field_rate = fOutputFrameRate; 555 // Was calculated by first call to _DecodeNextVideoFrame() 556 #endif 557 inOutFormat->u.raw_video.display.format = fOutputColorSpace; 558 inOutFormat->u.raw_video.display.line_width 559 = fHeader.u.raw_video.display_line_width; 560 inOutFormat->u.raw_video.display.line_count 561 = fHeader.u.raw_video.display_line_count; 562 inOutFormat->u.raw_video.display.bytes_per_row 563 = fHeader.u.raw_video.bytes_per_row; 564 565 #ifdef TRACE_AV_CODEC 566 char buffer[1024]; 567 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 568 TRACE("[v] outFormat = %s\n", buffer); 569 TRACE(" returned video format 0x%x\n", 570 inOutFormat->u.raw_video.display.format); 571 #endif 572 573 return B_OK; 574 } 575 576 577 /*! \brief Fills the outBuffer with one or more already decoded audio frames. 578 579 Besides the main duty described above, this method also fills out the other 580 output parameters as documented below. 581 582 \param outBuffer Pointer to the output buffer to copy the decoded audio 583 frames to. 584 \param outFrameCount Pointer to the output variable to assign the number of 585 copied audio frames (usually several audio frames at once). 586 \param mediaHeader Pointer to the output media header that contains the 587 properties of the decoded audio frame being the first in the outBuffer. 588 \param info Specifies additional decoding parameters. (Note: unused). 589 590 \returns B_OK Decoding audio frames succeeded. 591 \returns B_LAST_BUFFER_ERROR There are no more audio frames available. 592 \returns Other error codes 593 */ 594 status_t 595 AVCodecDecoder::_DecodeAudio(void* outBuffer, int64* outFrameCount, 596 media_header* mediaHeader, media_decode_info* info) 597 { 598 TRACE_AUDIO("AVCodecDecoder::_DecodeAudio(audio start_time %.6fs)\n", 599 mediaHeader->start_time / 1000000.0); 600 601 status_t audioDecodingStatus 602 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextAudioFrame(); 603 604 if (audioDecodingStatus != B_OK) 605 return audioDecodingStatus; 606 607 *outFrameCount = fDecodedDataSizeInBytes / fOutputFrameSize; 608 *mediaHeader = fHeader; 609 memcpy(outBuffer, fDecodedData, fDecodedDataSizeInBytes); 610 611 fDecodedDataSizeInBytes = 0; 612 613 return B_OK; 614 } 615 616 617 /*! \brief Fills the outBuffer with an already decoded video frame. 618 619 Besides the main duty described above, this method also fills out the other 620 output parameters as documented below. 621 622 \param outBuffer Pointer to the output buffer to copy the decoded video 623 frame to. 624 \param outFrameCount Pointer to the output variable to assign the number of 625 copied video frames (usually one video frame). 626 \param mediaHeader Pointer to the output media header that contains the 627 decoded video frame properties. 628 \param info Specifies additional decoding parameters. (Note: unused). 629 630 \returns B_OK Decoding a video frame succeeded. 631 \returns B_LAST_BUFFER_ERROR There are no more video frames available. 632 \returns Other error codes 633 */ 634 status_t 635 AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount, 636 media_header* mediaHeader, media_decode_info* info) 637 { 638 status_t videoDecodingStatus 639 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextVideoFrame(); 640 641 if (videoDecodingStatus != B_OK) 642 return videoDecodingStatus; 643 644 *outFrameCount = 1; 645 *mediaHeader = fHeader; 646 memcpy(outBuffer, fDecodedData, mediaHeader->size_used); 647 648 fDecodedDataSizeInBytes = 0; 649 650 return B_OK; 651 } 652 653 654 /*! \brief Decodes next audio frame. 655 656 We decode at least one audio frame into fDecodedData. To achieve this goal, 657 we might need to request several chunks of encoded data resulting in a 658 variable execution time of this function. 659 660 The length of the decoded audio frame(s) is stored in 661 fDecodedDataSizeInBytes. If this variable is greater than zero you can 662 assert that all audio frames in fDecodedData are valid. 663 664 It is assumed that the number of expected audio frames is stored in 665 fOutputFrameCount. So _DecodeNextAudioFrame() must be called only after 666 fOutputFrameCount has been set. 667 668 Note: fOutputFrameCount contains the maximum number of frames a caller 669 of BMediaDecoder::Decode() expects to receive. There is a direct 670 relationship between fOutputFrameCount and the buffer size a caller of 671 BMediaDecoder::Decode() will provide so we make sure to respect this limit 672 for fDecodedDataSizeInBytes. 673 674 On return with status code B_OK the following conditions hold true: 675 1. fDecodedData contains as much audio frames as the caller of 676 BMediaDecoder::Decode() expects. 677 2. fDecodedData contains lesser audio frames as the caller of 678 BMediaDecoder::Decode() expects only when one of the following 679 conditions hold true: 680 i No more audio frames left. Consecutive calls to 681 _DecodeNextAudioFrame() will then result in the return of 682 status code B_LAST_BUFFER_ERROR. 683 ii TODO: A change in the size of the audio frames. 684 3. fHeader is populated with the audio frame properties of the first 685 audio frame in fDecodedData. Especially the start_time field of 686 fHeader relates to that first audio frame. Start times of 687 consecutive audio frames in fDecodedData have to be calculated 688 manually (using the frame rate and the frame duration) if the 689 caller needs them. 690 691 TODO: Handle change of channel_count. Such a change results in a change of 692 the audio frame size and thus has different buffer requirements. 693 The most sane approach for implementing this is to return the audio frames 694 that were still decoded with the previous channel_count and inform the 695 client of BMediaDecoder::Decode() about the change so that it can adapt to 696 it. Furthermore we need to adapt our fDecodedData to the new buffer size 697 requirements accordingly. 698 699 \returns B_OK when we successfully decoded enough audio frames 700 \returns B_LAST_BUFFER_ERROR when there are no more audio frames available. 701 \returns Other Errors 702 */ 703 status_t 704 AVCodecDecoder::_DecodeNextAudioFrame() 705 { 706 assert(fTempPacket.size >= 0); 707 assert(fDecodedDataSizeInBytes == 0); 708 // _DecodeNextAudioFrame needs to be called on empty fDecodedData only! 709 // If this assert holds wrong we have a bug somewhere. 710 711 status_t resetStatus = _ResetRawDecodedAudio(); 712 if (resetStatus != B_OK) 713 return resetStatus; 714 715 while (fRawDecodedAudio->nb_samples < fOutputFrameCount) { 716 _CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow(); 717 718 bool decodedDataBufferHasData = fDecodedDataBufferSize > 0; 719 if (decodedDataBufferHasData) { 720 _MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes(); 721 continue; 722 } 723 724 status_t decodeAudioChunkStatus = _DecodeNextAudioFrameChunk(); 725 if (decodeAudioChunkStatus != B_OK) 726 return decodeAudioChunkStatus; 727 } 728 729 fFrame += fRawDecodedAudio->nb_samples; 730 fDecodedDataSizeInBytes = fRawDecodedAudio->linesize[0]; 731 732 _UpdateMediaHeaderForAudioFrame(); 733 734 #ifdef DEBUG 735 dump_ffframe_audio(fRawDecodedAudio, "ffaudi"); 736 #endif 737 738 TRACE_AUDIO(" frame count: %ld current: %lld\n", 739 fRawDecodedAudio->nb_samples, fFrame); 740 741 return B_OK; 742 } 743 744 745 /*! \brief Applies all essential audio input properties to fContext that were 746 passed to AVCodecDecoder when Setup() was called. 747 748 Note: This function must be called before the AVCodec is opened via 749 avcodec_open2(). Otherwise the behaviour of FFMPEG's audio decoding 750 function avcodec_decode_audio4() is undefined. 751 752 Essential properties applied from fInputFormat.u.encoded_audio: 753 - bit_rate copied to fContext->bit_rate 754 - frame_size copied to fContext->frame_size 755 - output.format converted to fContext->sample_fmt 756 - output.frame_rate copied to fContext->sample_rate 757 - output.channel_count copied to fContext->channels 758 759 Other essential properties being applied: 760 - fBlockAlign to fContext->block_align 761 - fExtraData to fContext->extradata 762 - fExtraDataSize to fContext->extradata_size 763 764 TODO: Either the following documentation section should be removed or this 765 TODO when it is clear whether fInputFormat.MetaData() and 766 fInputFormat.MetaDataSize() have to be applied to fContext. See the related 767 TODO in the method implementation. 768 Only applied when fInputFormat.MetaDataSize() is greater than zero: 769 - fInputFormat.MetaData() to fContext->extradata 770 - fInputFormat.MetaDataSize() to fContext->extradata_size 771 */ 772 void 773 AVCodecDecoder::_ApplyEssentialAudioContainerPropertiesToContext() 774 { 775 media_encoded_audio_format containerProperties 776 = fInputFormat.u.encoded_audio; 777 778 fContext->bit_rate 779 = static_cast<int>(containerProperties.bit_rate); 780 fContext->frame_size 781 = static_cast<int>(containerProperties.frame_size); 782 ConvertRawAudioFormatToAVSampleFormat( 783 containerProperties.output.format, fContext->sample_fmt); 784 #if LIBAVCODEC_VERSION_INT > ((52 << 16) | (114 << 8)) 785 ConvertRawAudioFormatToAVSampleFormat( 786 containerProperties.output.format, fContext->request_sample_fmt); 787 #endif 788 fContext->sample_rate 789 = static_cast<int>(containerProperties.output.frame_rate); 790 fContext->channels 791 = static_cast<int>(containerProperties.output.channel_count); 792 // Check that channel count is not still a wild card! 793 if (fContext->channels == 0) { 794 TRACE(" channel_count still a wild-card, assuming stereo.\n"); 795 fContext->channels = 2; 796 } 797 798 fContext->block_align = fBlockAlign; 799 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 800 fContext->extradata_size = fExtraDataSize; 801 802 // TODO: This probably needs to go away, there is some misconception 803 // about extra data / info buffer and meta data. See 804 // Reader::GetStreamInfo(). The AVFormatReader puts extradata and 805 // extradata_size into media_format::MetaData(), but used to ignore 806 // the infoBuffer passed to GetStreamInfo(). I think this may be why 807 // the code below was added. 808 if (fInputFormat.MetaDataSize() > 0) { 809 fContext->extradata = static_cast<uint8_t*>( 810 const_cast<void*>(fInputFormat.MetaData())); 811 fContext->extradata_size = fInputFormat.MetaDataSize(); 812 } 813 814 TRACE(" bit_rate %d, sample_rate %d, channels %d, block_align %d, " 815 "extradata_size %d\n", fContext->bit_rate, fContext->sample_rate, 816 fContext->channels, fContext->block_align, fContext->extradata_size); 817 } 818 819 820 /*! \brief Resets important fields in fRawDecodedVideo to their default values. 821 822 Note: Also initializes fDecodedData if not done already. 823 824 \returns B_OK Resetting successfully completed. 825 \returns B_NO_MEMORY No memory left for correct operation. 826 */ 827 status_t 828 AVCodecDecoder::_ResetRawDecodedAudio() 829 { 830 if (fDecodedData == NULL) { 831 size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize; 832 fDecodedData 833 = static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData)); 834 } 835 if (fDecodedData == NULL) 836 return B_NO_MEMORY; 837 838 fRawDecodedAudio->data[0] = fDecodedData; 839 fRawDecodedAudio->linesize[0] = 0; 840 fRawDecodedAudio->format = AV_SAMPLE_FMT_NONE; 841 fRawDecodedAudio->pkt_dts = AV_NOPTS_VALUE; 842 fRawDecodedAudio->nb_samples = 0; 843 memset(fRawDecodedAudio->opaque, 0, sizeof(avformat_codec_context)); 844 845 return B_OK; 846 } 847 848 849 /*! \brief Checks fDecodedDataBufferSize and fTempPacket for invalid values, 850 reports them and assigns valid values. 851 852 Note: This method is intended to be called before any code is executed that 853 deals with moving, loading or decoding any audio frames. 854 */ 855 void 856 AVCodecDecoder::_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow() 857 { 858 if (fDecodedDataBufferSize < 0) { 859 fprintf(stderr, "Decoding read past the end of the decoded data " 860 "buffer! %" B_PRId32 "\n", fDecodedDataBufferSize); 861 fDecodedDataBufferSize = 0; 862 } 863 if (fTempPacket.size < 0) { 864 fprintf(stderr, "Decoding read past the end of the temp packet! %d\n", 865 fTempPacket.size); 866 fTempPacket.size = 0; 867 } 868 } 869 870 871 /*! \brief Moves audio frames from fDecodedDataBuffer to fRawDecodedAudio (and 872 thus to fDecodedData) and updates the start times of fRawDecodedAudio, 873 fDecodedDataBuffer and fTempPacket accordingly. 874 875 When moving audio frames to fRawDecodedAudio this method also makes sure 876 that the following important fields of fRawDecodedAudio are populated and 877 updated with correct values: 878 - fRawDecodedAudio->data[0]: Points to first free byte of fDecodedData 879 - fRawDecodedAudio->linesize[0]: Total size of frames in fDecodedData 880 - fRawDecodedAudio->format: Format of first audio frame 881 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 882 - fRawDecodedAudio->nb_samples: Number of audio frames 883 - fRawDecodedAudio->opaque: Contains the following fields for the first 884 audio frame: 885 - channels: Channel count of first audio frame 886 - sample_rate: Frame rate of first audio frame 887 888 This function assumes to be called only when the following assumptions 889 hold true: 890 1. There are decoded audio frames available in fDecodedDataBuffer 891 meaning that fDecodedDataBufferSize is greater than zero. 892 2. There is space left in fRawDecodedAudio to move some audio frames 893 in. This means that fRawDecodedAudio has lesser audio frames than 894 the maximum allowed (specified by fOutputFrameCount). 895 3. The audio frame rate is known so that we can calculate the time 896 range (covered by the moved audio frames) to update the start times 897 accordingly. 898 4. The field fRawDecodedAudio->opaque points to a memory block 899 representing a structure of type avformat_codec_context. 900 901 After this function returns the caller can safely make the following 902 assumptions: 903 1. The number of decoded audio frames in fDecodedDataBuffer is 904 decreased though it may still be greater then zero. 905 2. The number of frames in fRawDecodedAudio has increased and all 906 important fields are updated (see listing above). 907 3. Start times of fDecodedDataBuffer and fTempPacket were increased 908 with the time range covered by the moved audio frames. 909 910 Note: This function raises an exception (by calling the debugger), when 911 fDecodedDataBufferSize is not a multiple of fOutputFrameSize. 912 */ 913 void 914 AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes() 915 { 916 assert(fDecodedDataBufferSize > 0); 917 assert(fRawDecodedAudio->nb_samples < fOutputFrameCount); 918 assert(fOutputFrameRate > 0); 919 920 int32 outFrames = fOutputFrameCount - fRawDecodedAudio->nb_samples; 921 int32 inFrames = fDecodedDataBufferSize; 922 923 int32 frames = min_c(outFrames, inFrames); 924 if (frames == 0) 925 debugger("fDecodedDataBufferSize not multiple of frame size!"); 926 927 // Some decoders do not support format conversion on themselves, or use 928 // "planar" audio (each channel separated instead of interleaved samples). 929 // In that case, we use swresample to convert the data 930 if (av_sample_fmt_is_planar(fContext->sample_fmt)) { 931 #if 0 932 const uint8_t* ptr[8]; 933 for (int i = 0; i < 8; i++) { 934 if (fDecodedDataBuffer->data[i] == NULL) 935 ptr[i] = NULL; 936 else 937 ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset; 938 } 939 940 // When there are more input frames than space in the output buffer, 941 // we could feed everything to swr and it would buffer the extra data. 942 // However, there is no easy way to flush that data without feeding more 943 // input, and it makes our timestamp computations fail. 944 // So, we feed only as much frames as we can get out, and handle the 945 // buffering ourselves. 946 // TODO Ideally, we should try to size our output buffer so that it can 947 // always hold all the output (swr provides helper functions for this) 948 inFrames = frames; 949 frames = swr_convert(fResampleContext, fRawDecodedAudio->data, 950 outFrames, ptr, inFrames); 951 952 if (frames < 0) 953 debugger("resampling failed"); 954 #else 955 // interleave planar audio with same format 956 uintptr_t out = (uintptr_t)fRawDecodedAudio->data[0]; 957 int32 offset = fDecodedDataBufferOffset; 958 for (int i = 0; i < frames; i++) { 959 for (int j = 0; j < fContext->channels; j++) { 960 memcpy((void*)out, fDecodedDataBuffer->data[j] 961 + offset, fInputFrameSize); 962 out += fInputFrameSize; 963 } 964 offset += fInputFrameSize; 965 } 966 outFrames = frames; 967 inFrames = frames; 968 #endif 969 } else { 970 memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0] 971 + fDecodedDataBufferOffset, frames * fOutputFrameSize); 972 outFrames = frames; 973 inFrames = frames; 974 } 975 976 size_t remainingSize = inFrames * fInputFrameSize; 977 size_t decodedSize = outFrames * fOutputFrameSize; 978 fDecodedDataBufferSize -= inFrames; 979 980 bool firstAudioFramesCopiedToRawDecodedAudio 981 = fRawDecodedAudio->data[0] != fDecodedData; 982 if (!firstAudioFramesCopiedToRawDecodedAudio) { 983 fRawDecodedAudio->format = fDecodedDataBuffer->format; 984 fRawDecodedAudio->pkt_dts = fDecodedDataBuffer->pkt_dts; 985 986 avformat_codec_context* codecContext 987 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 988 codecContext->channels = fContext->channels; 989 codecContext->sample_rate = fContext->sample_rate; 990 } 991 992 fRawDecodedAudio->data[0] += decodedSize; 993 fRawDecodedAudio->linesize[0] += decodedSize; 994 fRawDecodedAudio->nb_samples += outFrames; 995 996 fDecodedDataBufferOffset += remainingSize; 997 998 // Update start times accordingly 999 bigtime_t framesTimeInterval = static_cast<bigtime_t>( 1000 (1000000LL * frames) / fOutputFrameRate); 1001 fDecodedDataBuffer->pkt_dts += framesTimeInterval; 1002 // Start time of buffer is updated in case that it contains 1003 // more audio frames to move. 1004 fTempPacket.dts += framesTimeInterval; 1005 // Start time of fTempPacket is updated in case the fTempPacket 1006 // contains more audio frames to decode. 1007 } 1008 1009 1010 /*! \brief Decodes next chunk of audio frames. 1011 1012 This method handles all the details of loading the input buffer 1013 (fChunkBuffer) at the right time and of calling FFMPEG often engouh until 1014 some audio frames have been decoded. 1015 1016 FFMPEG decides how much audio frames belong to a chunk. Because of that 1017 it is very likely that _DecodeNextAudioFrameChunk has to be called several 1018 times to decode enough audio frames to please the caller of 1019 BMediaDecoder::Decode(). 1020 1021 This function assumes to be called only when the following assumptions 1022 hold true: 1023 1. fDecodedDataBufferSize equals zero. 1024 1025 After this function returns successfully the caller can safely make the 1026 following assumptions: 1027 1. fDecodedDataBufferSize is greater than zero. 1028 2. fDecodedDataBufferOffset is set to zero. 1029 3. fDecodedDataBuffer contains audio frames. 1030 1031 1032 \returns B_OK on successfully decoding one audio frame chunk. 1033 \returns B_LAST_BUFFER_ERROR No more audio frame chunks available. From 1034 this point on further calls will return this same error. 1035 \returns B_ERROR Decoding failed 1036 */ 1037 status_t 1038 AVCodecDecoder::_DecodeNextAudioFrameChunk() 1039 { 1040 assert(fDecodedDataBufferSize == 0); 1041 1042 while (fDecodedDataBufferSize == 0) { 1043 status_t loadingChunkStatus 1044 = _LoadNextChunkIfNeededAndAssignStartTime(); 1045 if (loadingChunkStatus != B_OK) 1046 return loadingChunkStatus; 1047 1048 status_t decodingStatus 1049 = _DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer(); 1050 if (decodingStatus != B_OK) { 1051 // Assume the audio decoded until now is broken so replace it with 1052 // some silence. 1053 memset(fDecodedData, 0, fRawDecodedAudio->linesize[0]); 1054 1055 if (!fAudioDecodeError) { 1056 // Report failure if not done already 1057 int32 chunkBufferOffset = fTempPacket.data - fChunkBuffer; 1058 printf("########### audio decode error, " 1059 "fTempPacket.size %d, fChunkBuffer data offset %" B_PRId32 1060 "\n", fTempPacket.size, chunkBufferOffset); 1061 fAudioDecodeError = true; 1062 } 1063 1064 // Assume that next audio chunk can be decoded so keep decoding. 1065 continue; 1066 } 1067 1068 fAudioDecodeError = false; 1069 } 1070 1071 return B_OK; 1072 } 1073 1074 1075 /*! \brief Tries to decode at least one audio frame and store it in the 1076 fDecodedDataBuffer. 1077 1078 This function assumes to be called only when the following assumptions 1079 hold true: 1080 1. fDecodedDataBufferSize equals zero. 1081 2. fTempPacket.size is greater than zero. 1082 1083 After this function returns successfully the caller can safely make the 1084 following assumptions: 1085 1. fDecodedDataBufferSize is greater than zero in the common case. 1086 Also see "Note" below. 1087 2. fTempPacket was updated to exclude the data chunk that was consumed 1088 by avcodec_decode_audio4(). 1089 3. fDecodedDataBufferOffset is set to zero. 1090 1091 When this function failed to decode at least one audio frame due to a 1092 decoding error the caller can safely make the following assumptions: 1093 1. fDecodedDataBufferSize equals zero. 1094 2. fTempPacket.size equals zero. 1095 1096 Note: It is possible that there wasn't any audio frame decoded into 1097 fDecodedDataBuffer after calling this function. This is normal and can 1098 happen when there was either a decoding error or there is some decoding 1099 delay in FFMPEGs audio decoder. Another call to this method is totally 1100 safe and is even expected as long as the calling assumptions hold true. 1101 1102 \returns B_OK Decoding successful. fDecodedDataBuffer contains decoded 1103 audio frames only when fDecodedDataBufferSize is greater than zero. 1104 fDecodedDataBuffer is empty, when avcodec_decode_audio4() didn't return 1105 audio frames due to delayed decoding or incomplete audio frames. 1106 \returns B_ERROR Decoding failed thus fDecodedDataBuffer contains no audio 1107 frames. 1108 */ 1109 status_t 1110 AVCodecDecoder::_DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer() 1111 { 1112 assert(fDecodedDataBufferSize == 0); 1113 assert(fTempPacket.size > 0); 1114 1115 memset(fDecodedDataBuffer, 0, sizeof(AVFrame)); 1116 av_frame_unref(fDecodedDataBuffer); 1117 fDecodedDataBufferOffset = 0; 1118 int gotAudioFrame = 0; 1119 1120 int encodedDataSizeInBytes = avcodec_decode_audio4(fContext, 1121 fDecodedDataBuffer, &gotAudioFrame, &fTempPacket); 1122 if (encodedDataSizeInBytes <= 0) { 1123 // Error or failure to produce decompressed output. 1124 // Skip the temp packet data entirely. 1125 fTempPacket.size = 0; 1126 return B_ERROR; 1127 } 1128 1129 fTempPacket.data += encodedDataSizeInBytes; 1130 fTempPacket.size -= encodedDataSizeInBytes; 1131 1132 bool gotNoAudioFrame = gotAudioFrame == 0; 1133 if (gotNoAudioFrame) 1134 return B_OK; 1135 1136 fDecodedDataBufferSize = fDecodedDataBuffer->nb_samples; 1137 if (fDecodedDataBufferSize < 0) 1138 fDecodedDataBufferSize = 0; 1139 1140 return B_OK; 1141 } 1142 1143 1144 /*! \brief Updates relevant fields of the class member fHeader with the 1145 properties of the most recently decoded audio frame. 1146 1147 The following fields of fHeader are updated: 1148 - fHeader.type 1149 - fHeader.file_pos 1150 - fHeader.orig_size 1151 - fHeader.start_time 1152 - fHeader.size_used 1153 - fHeader.u.raw_audio.frame_rate 1154 - fHeader.u.raw_audio.channel_count 1155 1156 It is assumed that this function is called only when the following asserts 1157 hold true: 1158 1. We actually got a new audio frame decoded by the audio decoder. 1159 2. fHeader wasn't updated for the new audio frame yet. You MUST call 1160 this method only once per decoded audio frame. 1161 3. fRawDecodedAudio's fields relate to the first audio frame contained 1162 in fDecodedData. Especially the following fields are of importance: 1163 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 1164 - fRawDecodedAudio->opaque: Contains the following fields for 1165 the first audio frame: 1166 - channels: Channel count of first audio frame 1167 - sample_rate: Frame rate of first audio frame 1168 */ 1169 void 1170 AVCodecDecoder::_UpdateMediaHeaderForAudioFrame() 1171 { 1172 fHeader.type = B_MEDIA_RAW_AUDIO; 1173 fHeader.file_pos = 0; 1174 fHeader.orig_size = 0; 1175 fHeader.start_time = fRawDecodedAudio->pkt_dts; 1176 fHeader.size_used = fRawDecodedAudio->linesize[0]; 1177 1178 avformat_codec_context* codecContext 1179 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 1180 fHeader.u.raw_audio.channel_count = codecContext->channels; 1181 fHeader.u.raw_audio.frame_rate = codecContext->sample_rate; 1182 } 1183 1184 1185 /*! \brief Decodes next video frame. 1186 1187 We decode exactly one video frame into fDecodedData. To achieve this goal, 1188 we might need to request several chunks of encoded data resulting in a 1189 variable execution time of this function. 1190 1191 The length of the decoded video frame is stored in 1192 fDecodedDataSizeInBytes. If this variable is greater than zero, you can 1193 assert that there is a valid video frame available in fDecodedData. 1194 1195 The decoded video frame in fDecodedData has color space conversion and 1196 deinterlacing already applied. 1197 1198 To every decoded video frame there is a media_header populated in 1199 fHeader, containing the corresponding video frame properties. 1200 1201 Normally every decoded video frame has a start_time field populated in the 1202 associated fHeader, that determines the presentation time of the frame. 1203 This relationship will only hold true, when each data chunk that is 1204 provided via GetNextChunk() contains data for exactly one encoded video 1205 frame (one complete frame) - not more and not less. 1206 1207 We can decode data chunks that contain partial video frame data, too. In 1208 that case, you cannot trust the value of the start_time field in fHeader. 1209 We simply have no logic in place to establish a meaningful relationship 1210 between an incomplete frame and the start time it should be presented. 1211 Though this might change in the future. 1212 1213 We can decode data chunks that contain more than one video frame, too. In 1214 that case, you cannot trust the value of the start_time field in fHeader. 1215 We simply have no logic in place to track the start_time across multiple 1216 video frames. So a meaningful relationship between the 2nd, 3rd, ... frame 1217 and the start time it should be presented isn't established at the moment. 1218 Though this might change in the future. 1219 1220 More over the fOutputFrameRate variable is updated for every decoded video 1221 frame. 1222 1223 On first call the member variables fSwsContext / fFormatConversionFunc are 1224 initialized. 1225 1226 \returns B_OK when we successfully decoded one video frame 1227 \returns B_LAST_BUFFER_ERROR when there are no more video frames available. 1228 \returns B_NO_MEMORY when we have no memory left for correct operation. 1229 \returns Other Errors 1230 */ 1231 status_t 1232 AVCodecDecoder::_DecodeNextVideoFrame() 1233 { 1234 #if 0 1235 // Well, I heard this was not supposed to happen, but it does 1236 // (for example with http://thud.us/videos/misc/xvid-samples/flyby-divx.avi 1237 // see #11409). Since that video otherwise plays fine when removing the 1238 // assert, I'm assuming we are being overcautious here and commenting it 1239 // out. 1240 assert(fTempPacket.size >= 0); 1241 #endif 1242 1243 while (true) { 1244 status_t loadingChunkStatus 1245 = _LoadNextChunkIfNeededAndAssignStartTime(); 1246 if (loadingChunkStatus == B_LAST_BUFFER_ERROR) 1247 return _FlushOneVideoFrameFromDecoderBuffer(); 1248 if (loadingChunkStatus != B_OK) { 1249 TRACE("AVCodecDecoder::_DecodeNextVideoFrame(): error from " 1250 "GetNextChunk(): %s\n", strerror(loadingChunkStatus)); 1251 return loadingChunkStatus; 1252 } 1253 1254 #if DO_PROFILING 1255 bigtime_t startTime = system_time(); 1256 #endif 1257 1258 // NOTE: In the FFMPEG 0.10.2 code example decoding_encoding.c, the 1259 // length returned by avcodec_decode_video2() is used to update the 1260 // packet buffer size (here it is fTempPacket.size). This way the 1261 // packet buffer is allowed to contain incomplete frames so we are 1262 // required to buffer the packets between different calls to 1263 // _DecodeNextVideoFrame(). 1264 int gotVideoFrame = 0; 1265 int encodedDataSizeInBytes = avcodec_decode_video2(fContext, 1266 fRawDecodedPicture, &gotVideoFrame, &fTempPacket); 1267 if (encodedDataSizeInBytes < 0) { 1268 TRACE("[v] AVCodecDecoder: ignoring error in decoding frame %lld:" 1269 " %d\n", fFrame, encodedDataSizeInBytes); 1270 // NOTE: An error from avcodec_decode_video2() is ignored by the 1271 // FFMPEG 0.10.2 example decoding_encoding.c. Only the packet 1272 // buffers are flushed accordingly 1273 fTempPacket.data = NULL; 1274 fTempPacket.size = 0; 1275 continue; 1276 } 1277 1278 fTempPacket.size -= encodedDataSizeInBytes; 1279 fTempPacket.data += encodedDataSizeInBytes; 1280 1281 bool gotNoVideoFrame = gotVideoFrame == 0; 1282 if (gotNoVideoFrame) { 1283 TRACE("frame %lld - no picture yet, encodedDataSizeInBytes: %d, " 1284 "chunk size: %ld\n", fFrame, encodedDataSizeInBytes, 1285 fChunkBufferSize); 1286 continue; 1287 } 1288 1289 #if DO_PROFILING 1290 bigtime_t formatConversionStart = system_time(); 1291 #endif 1292 1293 status_t handleStatus = _HandleNewVideoFrameAndUpdateSystemState(); 1294 if (handleStatus != B_OK) 1295 return handleStatus; 1296 1297 #if DO_PROFILING 1298 bigtime_t doneTime = system_time(); 1299 decodingTime += formatConversionStart - startTime; 1300 conversionTime += doneTime - formatConversionStart; 1301 profileCounter++; 1302 if (!(fFrame % 5)) { 1303 printf("[v] profile: d1 = %lld, d2 = %lld (%lld) required %Ld\n", 1304 decodingTime / profileCounter, conversionTime / profileCounter, 1305 fFrame, bigtime_t(1000000LL / fOutputFrameRate)); 1306 decodingTime = 0; 1307 conversionTime = 0; 1308 profileCounter = 0; 1309 } 1310 #endif 1311 return B_OK; 1312 } 1313 } 1314 1315 1316 /*! \brief Applies all essential video input properties to fContext that were 1317 passed to AVCodecDecoder when Setup() was called. 1318 1319 Note: This function must be called before the AVCodec is opened via 1320 avcodec_open2(). Otherwise the behaviour of FFMPEG's video decoding 1321 function avcodec_decode_video2() is undefined. 1322 1323 Essential properties applied from fInputFormat.u.encoded_video.output: 1324 - display.line_width copied to fContext->width 1325 - display.line_count copied to fContext->height 1326 - pixel_width_aspect and pixel_height_aspect converted to 1327 fContext->sample_aspect_ratio 1328 - field_rate converted to fContext->time_base and 1329 fContext->ticks_per_frame 1330 1331 Other essential properties being applied: 1332 - fExtraData to fContext->extradata 1333 - fExtraDataSize to fContext->extradata_size 1334 */ 1335 void 1336 AVCodecDecoder::_ApplyEssentialVideoContainerPropertiesToContext() 1337 { 1338 media_raw_video_format containerProperties 1339 = fInputFormat.u.encoded_video.output; 1340 1341 fContext->width = containerProperties.display.line_width; 1342 fContext->height = containerProperties.display.line_count; 1343 1344 if (containerProperties.pixel_width_aspect > 0 1345 && containerProperties.pixel_height_aspect > 0) { 1346 ConvertVideoAspectWidthAndHeightToAVCodecContext( 1347 containerProperties.pixel_width_aspect, 1348 containerProperties.pixel_height_aspect, *fContext); 1349 } 1350 1351 if (containerProperties.field_rate > 0.0) { 1352 ConvertVideoFrameRateToAVCodecContext(containerProperties.field_rate, 1353 *fContext); 1354 } 1355 1356 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 1357 fContext->extradata_size = fExtraDataSize; 1358 } 1359 1360 1361 /*! \brief Loads the next chunk into fChunkBuffer and assigns it (including 1362 the start time) to fTempPacket but only if fTempPacket is empty. 1363 1364 \returns B_OK 1365 1. meaning: Next chunk is loaded. 1366 2. meaning: No need to load and assign anything. Proceed as usual. 1367 \returns B_LAST_BUFFER_ERROR No more chunks available. fChunkBuffer and 1368 fTempPacket are left untouched. 1369 \returns Other errors Caller should bail out because fChunkBuffer and 1370 fTempPacket are in unknown states. Normal operation cannot be 1371 guaranteed. 1372 */ 1373 status_t 1374 AVCodecDecoder::_LoadNextChunkIfNeededAndAssignStartTime() 1375 { 1376 if (fTempPacket.size > 0) 1377 return B_OK; 1378 1379 const void* chunkBuffer = NULL; 1380 size_t chunkBufferSize = 0; 1381 // In the case that GetNextChunk() returns an error fChunkBufferSize 1382 // should be left untouched. 1383 media_header chunkMediaHeader; 1384 1385 status_t getNextChunkStatus = GetNextChunk(&chunkBuffer, &chunkBufferSize, 1386 &chunkMediaHeader); 1387 if (getNextChunkStatus != B_OK) 1388 return getNextChunkStatus; 1389 1390 status_t chunkBufferPaddingStatus 1391 = _CopyChunkToChunkBufferAndAddPadding(chunkBuffer, chunkBufferSize); 1392 if (chunkBufferPaddingStatus != B_OK) 1393 return chunkBufferPaddingStatus; 1394 1395 fTempPacket.data = fChunkBuffer; 1396 fTempPacket.size = fChunkBufferSize; 1397 fTempPacket.dts = chunkMediaHeader.start_time; 1398 // Let FFMPEG handle the correct relationship between start_time and 1399 // decoded a/v frame. By doing so we are simply copying the way how it 1400 // is implemented in ffplay.c for video frames (for audio frames it 1401 // works, too, but isn't used by ffplay.c). 1402 // \see http://git.videolan.org/?p=ffmpeg.git;a=blob;f=ffplay.c;h=09623db374e5289ed20b7cc28c262c4375a8b2e4;hb=9153b33a742c4e2a85ff6230aea0e75f5a8b26c2#l1502 1403 // 1404 // FIXME: Research how to establish a meaningful relationship between 1405 // start_time and decoded a/v frame when the received chunk buffer 1406 // contains partial a/v frames. Maybe some data formats do contain time 1407 // stamps (ake pts / dts fields) that can be evaluated by FFMPEG. But 1408 // as long as I don't have such video data to test it, it makes no 1409 // sense trying to implement it. 1410 // 1411 // FIXME: Implement tracking start_time of video frames originating in 1412 // data chunks that encode more than one video frame at a time. In that 1413 // case on would increment the start_time for each consecutive frame of 1414 // such a data chunk (like it is done for audio frame decoding). But as 1415 // long as I don't have such video data to test it, it makes no sense 1416 // to implement it. 1417 1418 #ifdef LOG_STREAM_TO_FILE 1419 BFile* logFile = fIsAudio ? &sAudioStreamLogFile : &sVideoStreamLogFile; 1420 if (sDumpedPackets < 100) { 1421 logFile->Write(chunkBuffer, fChunkBufferSize); 1422 printf("wrote %ld bytes\n", fChunkBufferSize); 1423 sDumpedPackets++; 1424 } else if (sDumpedPackets == 100) 1425 logFile->Unset(); 1426 #endif 1427 1428 return B_OK; 1429 } 1430 1431 1432 /*! \brief Copies a chunk into fChunkBuffer and adds a "safety net" of 1433 additional memory as required by FFMPEG for input buffers to video 1434 decoders. 1435 1436 This is needed so that some decoders can read safely a predefined number of 1437 bytes at a time for performance optimization purposes. 1438 1439 The additional memory has a size of FF_INPUT_BUFFER_PADDING_SIZE as defined 1440 in avcodec.h. 1441 1442 Ownership of fChunkBuffer memory is with the class so it needs to be freed 1443 at the right times (on destruction, on seeking). 1444 1445 Also update fChunkBufferSize to reflect the size of the contained data 1446 (leaving out the padding). 1447 1448 \param chunk The chunk to copy. 1449 \param chunkSize Size of the chunk in bytes 1450 1451 \returns B_OK Padding was successful. You are responsible for releasing the 1452 allocated memory. fChunkBufferSize is set to chunkSize. 1453 \returns B_NO_MEMORY Padding failed. 1454 fChunkBuffer is set to NULL making it safe to call free() on it. 1455 fChunkBufferSize is set to 0 to reflect the size of fChunkBuffer. 1456 */ 1457 status_t 1458 AVCodecDecoder::_CopyChunkToChunkBufferAndAddPadding(const void* chunk, 1459 size_t chunkSize) 1460 { 1461 fChunkBuffer = static_cast<uint8_t*>(realloc(fChunkBuffer, 1462 chunkSize + FF_INPUT_BUFFER_PADDING_SIZE)); 1463 if (fChunkBuffer == NULL) { 1464 fChunkBufferSize = 0; 1465 return B_NO_MEMORY; 1466 } 1467 1468 memcpy(fChunkBuffer, chunk, chunkSize); 1469 memset(fChunkBuffer + chunkSize, 0, FF_INPUT_BUFFER_PADDING_SIZE); 1470 // Establish safety net, by zero'ing the padding area. 1471 1472 fChunkBufferSize = chunkSize; 1473 1474 return B_OK; 1475 } 1476 1477 1478 /*! \brief Executes all steps needed for a freshly decoded video frame. 1479 1480 \see _UpdateMediaHeaderForVideoFrame() and 1481 \see _DeinterlaceAndColorConvertVideoFrame() for when you are allowed to 1482 call this method. 1483 1484 \returns B_OK when video frame was handled successfully 1485 \returnb B_NO_MEMORY when no memory is left for correct operation. 1486 */ 1487 status_t 1488 AVCodecDecoder::_HandleNewVideoFrameAndUpdateSystemState() 1489 { 1490 _UpdateMediaHeaderForVideoFrame(); 1491 status_t postProcessStatus = _DeinterlaceAndColorConvertVideoFrame(); 1492 if (postProcessStatus != B_OK) 1493 return postProcessStatus; 1494 1495 ConvertAVCodecContextToVideoFrameRate(*fContext, fOutputFrameRate); 1496 1497 #ifdef DEBUG 1498 dump_ffframe_video(fRawDecodedPicture, "ffpict"); 1499 #endif 1500 1501 fFrame++; 1502 1503 return B_OK; 1504 } 1505 1506 1507 /*! \brief Flushes one video frame - if any - still buffered by the decoder. 1508 1509 Some FFMPEG decoder are buffering video frames. To retrieve those buffered 1510 frames the decoder needs to be told so. 1511 1512 The intended use of this method is to call it, once there are no more data 1513 chunks for decoding left. Reframed in other words: Once GetNextChunk() 1514 returns with status B_LAST_BUFFER_ERROR it is time to start flushing. 1515 1516 \returns B_OK Retrieved one video frame, handled it accordingly and updated 1517 the system state accordingly. 1518 There maybe more video frames left. So it is valid for the client of 1519 AVCodecDecoder to call it one more time. 1520 1521 \returns B_LAST_BUFFER_ERROR No video frame left. 1522 The client of the AVCodecDecoder should stop calling it now. 1523 1524 \returns B_NO_MEMORY No memory left for correct operation. 1525 */ 1526 status_t 1527 AVCodecDecoder::_FlushOneVideoFrameFromDecoderBuffer() 1528 { 1529 // Create empty fTempPacket to tell the video decoder it is time to flush 1530 fTempPacket.data = NULL; 1531 fTempPacket.size = 0; 1532 1533 int gotVideoFrame = 0; 1534 avcodec_decode_video2(fContext, fRawDecodedPicture, &gotVideoFrame, 1535 &fTempPacket); 1536 // We are only interested in complete frames now, so ignore the return 1537 // value. 1538 1539 bool gotNoVideoFrame = gotVideoFrame == 0; 1540 if (gotNoVideoFrame) { 1541 // video buffer is flushed successfully 1542 return B_LAST_BUFFER_ERROR; 1543 } 1544 1545 return _HandleNewVideoFrameAndUpdateSystemState(); 1546 } 1547 1548 1549 /*! \brief Updates relevant fields of the class member fHeader with the 1550 properties of the most recently decoded video frame. 1551 1552 It is assumed that this function is called only when the following asserts 1553 hold true: 1554 1. We actually got a new picture decoded by the video decoder. 1555 2. fHeader wasn't updated for the new picture yet. You MUST call this 1556 method only once per decoded video frame. 1557 3. This function MUST be called after 1558 _DeinterlaceAndColorConvertVideoFrame() as it relys on an updated 1559 fDecodedDataSizeInBytes. 1560 4. There will be at maximumn only one decoded video frame in our cache 1561 at any single point in time. Otherwise you couldn't tell to which 1562 cached decoded video frame the properties in fHeader relate to. 1563 5. AVCodecContext is still valid for this video frame (This is the case 1564 when this function is called after avcodec_decode_video2() and 1565 before the next call to avcodec_decode_video2(). 1566 */ 1567 void 1568 AVCodecDecoder::_UpdateMediaHeaderForVideoFrame() 1569 { 1570 fHeader.type = B_MEDIA_RAW_VIDEO; 1571 fHeader.file_pos = 0; 1572 fHeader.orig_size = 0; 1573 fHeader.start_time = fRawDecodedPicture->pkt_dts; 1574 fHeader.size_used = avpicture_get_size( 1575 colorspace_to_pixfmt(fOutputColorSpace), fRawDecodedPicture->width, 1576 fRawDecodedPicture->height); 1577 fHeader.u.raw_video.display_line_width = fRawDecodedPicture->width; 1578 fHeader.u.raw_video.display_line_count = fRawDecodedPicture->height; 1579 fHeader.u.raw_video.bytes_per_row 1580 = CalculateBytesPerRowWithColorSpaceAndVideoWidth(fOutputColorSpace, 1581 fRawDecodedPicture->width); 1582 fHeader.u.raw_video.field_gamma = 1.0; 1583 fHeader.u.raw_video.field_sequence = fFrame; 1584 fHeader.u.raw_video.field_number = 0; 1585 fHeader.u.raw_video.pulldown_number = 0; 1586 fHeader.u.raw_video.first_active_line = 1; 1587 fHeader.u.raw_video.line_count = fRawDecodedPicture->height; 1588 1589 ConvertAVCodecContextToVideoAspectWidthAndHeight(*fContext, 1590 fHeader.u.raw_video.pixel_width_aspect, 1591 fHeader.u.raw_video.pixel_height_aspect); 1592 1593 TRACE("[v] start_time=%02d:%02d.%02d field_sequence=%lu\n", 1594 int((fHeader.start_time / 60000000) % 60), 1595 int((fHeader.start_time / 1000000) % 60), 1596 int((fHeader.start_time / 10000) % 100), 1597 fHeader.u.raw_video.field_sequence); 1598 } 1599 1600 1601 /*! \brief This function applies deinterlacing (only if needed) and color 1602 conversion to the video frame in fRawDecodedPicture. 1603 1604 It is assumed that fRawDecodedPicture wasn't deinterlaced and color 1605 converted yet (otherwise this function behaves in unknown manners). 1606 1607 This function MUST be called after _UpdateMediaHeaderForVideoFrame() as it 1608 relys on the fHeader.size_used and fHeader.u.raw_video.bytes_per_row fields 1609 for correct operation 1610 1611 You should only call this function when you got a new picture decoded by 1612 the video decoder. 1613 1614 When this function finishes the postprocessed video frame will be available 1615 in fPostProcessedDecodedPicture and fDecodedData (fDecodedDataSizeInBytes 1616 will be set accordingly). 1617 1618 \returns B_OK video frame successfully deinterlaced and color converted. 1619 \returns B_NO_MEMORY Not enough memory available for correct operation. 1620 */ 1621 status_t 1622 AVCodecDecoder::_DeinterlaceAndColorConvertVideoFrame() 1623 { 1624 int displayWidth = fRawDecodedPicture->width; 1625 int displayHeight = fRawDecodedPicture->height; 1626 AVPicture deinterlacedPicture; 1627 bool useDeinterlacedPicture = false; 1628 1629 if (fRawDecodedPicture->interlaced_frame) { 1630 AVPicture rawPicture; 1631 rawPicture.data[0] = fRawDecodedPicture->data[0]; 1632 rawPicture.data[1] = fRawDecodedPicture->data[1]; 1633 rawPicture.data[2] = fRawDecodedPicture->data[2]; 1634 rawPicture.data[3] = fRawDecodedPicture->data[3]; 1635 rawPicture.linesize[0] = fRawDecodedPicture->linesize[0]; 1636 rawPicture.linesize[1] = fRawDecodedPicture->linesize[1]; 1637 rawPicture.linesize[2] = fRawDecodedPicture->linesize[2]; 1638 rawPicture.linesize[3] = fRawDecodedPicture->linesize[3]; 1639 1640 avpicture_alloc(&deinterlacedPicture, fContext->pix_fmt, displayWidth, 1641 displayHeight); 1642 1643 #if LIBAVCODEC_VERSION_INT < ((57 << 16) | (0 << 8)) 1644 if (avpicture_deinterlace(&deinterlacedPicture, &rawPicture, 1645 fContext->pix_fmt, displayWidth, displayHeight) < 0) { 1646 TRACE("[v] avpicture_deinterlace() - error\n"); 1647 } else 1648 useDeinterlacedPicture = true; 1649 #else 1650 // avpicture_deinterlace is gone 1651 // TODO: implement alternate deinterlace using avfilter 1652 TRACE("[v] avpicture_deinterlace() - not implemented\n"); 1653 #endif 1654 } 1655 1656 // Some decoders do not set pix_fmt until they have decoded 1 frame 1657 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1658 if (fSwsContext == NULL) { 1659 fSwsContext = sws_getContext(displayWidth, displayHeight, 1660 fContext->pix_fmt, displayWidth, displayHeight, 1661 colorspace_to_pixfmt(fOutputColorSpace), 1662 SWS_FAST_BILINEAR, NULL, NULL, NULL); 1663 } 1664 #else 1665 if (fFormatConversionFunc == NULL) { 1666 fFormatConversionFunc = resolve_colorspace(fOutputColorSpace, 1667 fContext->pix_fmt, displayWidth, displayHeight); 1668 } 1669 #endif 1670 1671 fDecodedDataSizeInBytes = fHeader.size_used; 1672 1673 if (fDecodedData == NULL) { 1674 const size_t kOptimalAlignmentForColorConversion = 32; 1675 posix_memalign(reinterpret_cast<void**>(&fDecodedData), 1676 kOptimalAlignmentForColorConversion, fDecodedDataSizeInBytes); 1677 } 1678 if (fDecodedData == NULL) 1679 return B_NO_MEMORY; 1680 1681 fPostProcessedDecodedPicture->data[0] = fDecodedData; 1682 fPostProcessedDecodedPicture->linesize[0] 1683 = fHeader.u.raw_video.bytes_per_row; 1684 1685 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1686 if (fSwsContext != NULL) { 1687 #else 1688 if (fFormatConversionFunc != NULL) { 1689 #endif 1690 if (useDeinterlacedPicture) { 1691 AVFrame deinterlacedFrame; 1692 deinterlacedFrame.data[0] = deinterlacedPicture.data[0]; 1693 deinterlacedFrame.data[1] = deinterlacedPicture.data[1]; 1694 deinterlacedFrame.data[2] = deinterlacedPicture.data[2]; 1695 deinterlacedFrame.data[3] = deinterlacedPicture.data[3]; 1696 deinterlacedFrame.linesize[0] 1697 = deinterlacedPicture.linesize[0]; 1698 deinterlacedFrame.linesize[1] 1699 = deinterlacedPicture.linesize[1]; 1700 deinterlacedFrame.linesize[2] 1701 = deinterlacedPicture.linesize[2]; 1702 deinterlacedFrame.linesize[3] 1703 = deinterlacedPicture.linesize[3]; 1704 1705 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1706 sws_scale(fSwsContext, deinterlacedFrame.data, 1707 deinterlacedFrame.linesize, 0, displayHeight, 1708 fPostProcessedDecodedPicture->data, 1709 fPostProcessedDecodedPicture->linesize); 1710 #else 1711 (*fFormatConversionFunc)(&deinterlacedFrame, 1712 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1713 #endif 1714 } else { 1715 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1716 sws_scale(fSwsContext, fRawDecodedPicture->data, 1717 fRawDecodedPicture->linesize, 0, displayHeight, 1718 fPostProcessedDecodedPicture->data, 1719 fPostProcessedDecodedPicture->linesize); 1720 #else 1721 (*fFormatConversionFunc)(fRawDecodedPicture, 1722 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1723 #endif 1724 } 1725 } 1726 1727 if (fRawDecodedPicture->interlaced_frame) 1728 avpicture_free(&deinterlacedPicture); 1729 1730 return B_OK; 1731 } 1732