1 /* 2 * Copyright (C) 2001 Carlos Hasan 3 * Copyright (C) 2001 François Revol 4 * Copyright (C) 2001 Axel Dörfler 5 * Copyright (C) 2004 Marcus Overhagen 6 * Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de> 7 * Copyright (C) 2014 Colin Günther <coling@gmx.de> 8 * 9 * All rights reserved. Distributed under the terms of the MIT License. 10 */ 11 12 //! libavcodec based decoder for Haiku 13 14 #include "AVCodecDecoder.h" 15 16 #include <new> 17 18 #include <assert.h> 19 #include <string.h> 20 21 #include <Bitmap.h> 22 #include <Debug.h> 23 24 #include "Utilities.h" 25 26 27 #undef TRACE 28 //#define TRACE_AV_CODEC 29 #ifdef TRACE_AV_CODEC 30 # define TRACE(x...) printf(x) 31 # define TRACE_AUDIO(x...) printf(x) 32 # define TRACE_VIDEO(x...) printf(x) 33 #else 34 # define TRACE(x...) 35 # define TRACE_AUDIO(x...) 36 # define TRACE_VIDEO(x...) 37 #endif 38 39 //#define LOG_STREAM_TO_FILE 40 #ifdef LOG_STREAM_TO_FILE 41 # include <File.h> 42 static BFile sAudioStreamLogFile( 43 "/boot/home/Desktop/AVCodecDebugAudioStream.raw", 44 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 45 static BFile sVideoStreamLogFile( 46 "/boot/home/Desktop/AVCodecDebugVideoStream.raw", 47 B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY); 48 static int sDumpedPackets = 0; 49 #endif 50 51 #ifdef __x86_64 52 #define USE_SWS_FOR_COLOR_SPACE_CONVERSION 1 53 #else 54 #define USE_SWS_FOR_COLOR_SPACE_CONVERSION 0 55 // NOTE: David's color space conversion is much faster than the FFmpeg 56 // version. Perhaps the SWS code can be used for unsupported conversions? 57 // Otherwise the alternative code could simply be removed from this file. 58 #endif 59 60 61 struct wave_format_ex { 62 uint16 format_tag; 63 uint16 channels; 64 uint32 frames_per_sec; 65 uint32 avg_bytes_per_sec; 66 uint16 block_align; 67 uint16 bits_per_sample; 68 uint16 extra_size; 69 // extra_data[extra_size] 70 } _PACKED; 71 72 struct avformat_codec_context { 73 int sample_rate; 74 int channels; 75 }; 76 77 78 // profiling related globals 79 #define DO_PROFILING 0 80 81 static bigtime_t decodingTime = 0; 82 static bigtime_t conversionTime = 0; 83 static long profileCounter = 0; 84 85 86 AVCodecDecoder::AVCodecDecoder() 87 : 88 fHeader(), 89 fInputFormat(), 90 fFrame(0), 91 fIsAudio(false), 92 fCodec(NULL), 93 fContext(avcodec_alloc_context3(NULL)), 94 fDecodedData(NULL), 95 fDecodedDataSizeInBytes(0), 96 fPostProcessedDecodedPicture(avcodec_alloc_frame()), 97 fRawDecodedPicture(avcodec_alloc_frame()), 98 fRawDecodedAudio(avcodec_alloc_frame()), 99 100 fCodecInitDone(false), 101 102 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 103 fSwsContext(NULL), 104 #else 105 fFormatConversionFunc(NULL), 106 #endif 107 108 fExtraData(NULL), 109 fExtraDataSize(0), 110 fBlockAlign(0), 111 112 fOutputColorSpace(B_NO_COLOR_SPACE), 113 fOutputFrameCount(0), 114 fOutputFrameRate(1.0), 115 fOutputFrameSize(0), 116 117 fChunkBuffer(NULL), 118 fChunkBufferSize(0), 119 fAudioDecodeError(false), 120 121 fDecodedDataBuffer(avcodec_alloc_frame()), 122 fDecodedDataBufferOffset(0), 123 fDecodedDataBufferSize(0) 124 { 125 TRACE("AVCodecDecoder::AVCodecDecoder()\n"); 126 127 system_info info; 128 get_system_info(&info); 129 130 fContext->err_recognition = AV_EF_CAREFUL; 131 fContext->error_concealment = 3; 132 fContext->thread_count = info.cpu_count; 133 } 134 135 136 AVCodecDecoder::~AVCodecDecoder() 137 { 138 TRACE("[%c] AVCodecDecoder::~AVCodecDecoder()\n", fIsAudio?('a'):('v')); 139 140 #ifdef DO_PROFILING 141 if (profileCounter > 0) { 142 printf("[%c] profile: d1 = %lld, d2 = %lld (%Ld)\n", 143 fIsAudio?('a'):('v'), decodingTime / profileCounter, 144 conversionTime / profileCounter, fFrame); 145 } 146 #endif 147 148 if (fCodecInitDone) 149 avcodec_close(fContext); 150 151 free(fChunkBuffer); 152 free(fDecodedData); 153 154 av_free(fPostProcessedDecodedPicture); 155 av_free(fRawDecodedPicture); 156 av_free(fRawDecodedAudio->opaque); 157 av_free(fRawDecodedAudio); 158 av_free(fContext); 159 av_free(fDecodedDataBuffer); 160 161 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 162 if (fSwsContext != NULL) 163 sws_freeContext(fSwsContext); 164 #endif 165 166 delete[] fExtraData; 167 } 168 169 170 void 171 AVCodecDecoder::GetCodecInfo(media_codec_info* mci) 172 { 173 snprintf(mci->short_name, 32, "%s", fCodec->name); 174 snprintf(mci->pretty_name, 96, "%s", fCodec->long_name); 175 mci->id = 0; 176 mci->sub_id = fCodec->id; 177 } 178 179 180 status_t 181 AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer, 182 size_t infoSize) 183 { 184 if (ioEncodedFormat->type != B_MEDIA_ENCODED_AUDIO 185 && ioEncodedFormat->type != B_MEDIA_ENCODED_VIDEO) 186 return B_ERROR; 187 188 fIsAudio = (ioEncodedFormat->type == B_MEDIA_ENCODED_AUDIO); 189 TRACE("[%c] AVCodecDecoder::Setup()\n", fIsAudio?('a'):('v')); 190 191 #ifdef TRACE_AV_CODEC 192 char buffer[1024]; 193 string_for_format(*ioEncodedFormat, buffer, sizeof(buffer)); 194 TRACE("[%c] input_format = %s\n", fIsAudio?('a'):('v'), buffer); 195 TRACE("[%c] infoSize = %ld\n", fIsAudio?('a'):('v'), infoSize); 196 TRACE("[%c] user_data_type = %08lx\n", fIsAudio?('a'):('v'), 197 ioEncodedFormat->user_data_type); 198 TRACE("[%c] meta_data_size = %ld\n", fIsAudio?('a'):('v'), 199 ioEncodedFormat->MetaDataSize()); 200 #endif 201 202 media_format_description description; 203 if (BMediaFormats().GetCodeFor(*ioEncodedFormat, 204 B_MISC_FORMAT_FAMILY, &description) == B_OK) { 205 if (description.u.misc.file_format != 'ffmp') 206 return B_NOT_SUPPORTED; 207 fCodec = avcodec_find_decoder(static_cast<CodecID>( 208 description.u.misc.codec)); 209 if (fCodec == NULL) { 210 TRACE(" unable to find the correct FFmpeg " 211 "decoder (id = %lu)\n", description.u.misc.codec); 212 return B_ERROR; 213 } 214 TRACE(" found decoder %s\n", fCodec->name); 215 216 const void* extraData = infoBuffer; 217 fExtraDataSize = infoSize; 218 if (description.family == B_WAV_FORMAT_FAMILY 219 && infoSize >= sizeof(wave_format_ex)) { 220 TRACE(" trying to use wave_format_ex\n"); 221 // Special case extra data in B_WAV_FORMAT_FAMILY 222 const wave_format_ex* waveFormatData 223 = (const wave_format_ex*)infoBuffer; 224 225 size_t waveFormatSize = infoSize; 226 if (waveFormatData != NULL && waveFormatSize > 0) { 227 fBlockAlign = waveFormatData->block_align; 228 TRACE(" found block align: %d\n", fBlockAlign); 229 fExtraDataSize = waveFormatData->extra_size; 230 // skip the wave_format_ex from the extra data. 231 extraData = waveFormatData + 1; 232 } 233 } else { 234 if (fIsAudio) { 235 fBlockAlign 236 = ioEncodedFormat->u.encoded_audio.output 237 .buffer_size; 238 TRACE(" using buffer_size as block align: %d\n", 239 fBlockAlign); 240 } 241 } 242 if (extraData != NULL && fExtraDataSize > 0) { 243 TRACE("AVCodecDecoder: extra data size %ld\n", infoSize); 244 delete[] fExtraData; 245 fExtraData = new(std::nothrow) char[fExtraDataSize]; 246 if (fExtraData != NULL) 247 memcpy(fExtraData, infoBuffer, fExtraDataSize); 248 else 249 fExtraDataSize = 0; 250 } 251 252 fInputFormat = *ioEncodedFormat; 253 return B_OK; 254 } else { 255 TRACE("AVCodecDecoder: BMediaFormats().GetCodeFor() failed.\n"); 256 } 257 258 printf("AVCodecDecoder::Setup failed!\n"); 259 return B_ERROR; 260 } 261 262 263 status_t 264 AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time) 265 { 266 status_t ret = B_OK; 267 // Reset the FFmpeg codec to flush buffers, so we keep the sync 268 if (fCodecInitDone) { 269 avcodec_flush_buffers(fContext); 270 _ResetTempPacket(); 271 } 272 273 // Flush internal buffers as well. 274 free(fChunkBuffer); 275 fChunkBuffer = NULL; 276 fChunkBufferSize = 0; 277 fDecodedDataBufferOffset = 0; 278 fDecodedDataBufferSize = 0; 279 fDecodedDataSizeInBytes = 0; 280 281 fFrame = frame; 282 283 return ret; 284 } 285 286 287 status_t 288 AVCodecDecoder::NegotiateOutputFormat(media_format* inOutFormat) 289 { 290 TRACE("AVCodecDecoder::NegotiateOutputFormat() [%c] \n", 291 fIsAudio?('a'):('v')); 292 293 #ifdef TRACE_AV_CODEC 294 char buffer[1024]; 295 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 296 TRACE(" [%c] requested format = %s\n", fIsAudio?('a'):('v'), buffer); 297 #endif 298 299 if (fIsAudio) 300 return _NegotiateAudioOutputFormat(inOutFormat); 301 else 302 return _NegotiateVideoOutputFormat(inOutFormat); 303 } 304 305 306 status_t 307 AVCodecDecoder::Decode(void* outBuffer, int64* outFrameCount, 308 media_header* mediaHeader, media_decode_info* info) 309 { 310 if (!fCodecInitDone) 311 return B_NO_INIT; 312 313 status_t ret; 314 if (fIsAudio) 315 ret = _DecodeAudio(outBuffer, outFrameCount, mediaHeader, info); 316 else 317 ret = _DecodeVideo(outBuffer, outFrameCount, mediaHeader, info); 318 319 return ret; 320 } 321 322 323 // #pragma mark - 324 325 326 void 327 AVCodecDecoder::_ResetTempPacket() 328 { 329 av_init_packet(&fTempPacket); 330 fTempPacket.size = 0; 331 fTempPacket.data = NULL; 332 } 333 334 335 status_t 336 AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat) 337 { 338 TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n"); 339 340 _ApplyEssentialAudioContainerPropertiesToContext(); 341 // This makes audio formats play that encode the audio properties in 342 // the audio container (e.g. WMA) and not in the audio frames 343 // themself (e.g. MP3). 344 // Note: Doing this step unconditionally is OK, because the first call 345 // to _DecodeNextAudioFrameChunk() will update the essential audio 346 // format properties accordingly regardless of the settings here. 347 348 // close any previous instance 349 if (fCodecInitDone) { 350 fCodecInitDone = false; 351 avcodec_close(fContext); 352 } 353 354 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 355 fCodecInitDone = true; 356 else { 357 TRACE("avcodec_open() failed to init codec!\n"); 358 return B_ERROR; 359 } 360 361 free(fChunkBuffer); 362 fChunkBuffer = NULL; 363 fChunkBufferSize = 0; 364 fAudioDecodeError = false; 365 fDecodedDataBufferOffset = 0; 366 fDecodedDataBufferSize = 0; 367 368 _ResetTempPacket(); 369 370 status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk(); 371 if (statusOfDecodingFirstFrameChunk != B_OK) { 372 TRACE("[a] decoding first audio frame chunk failed\n"); 373 return B_ERROR; 374 } 375 376 media_multi_audio_format outputAudioFormat; 377 outputAudioFormat = media_raw_audio_format::wildcard; 378 outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN; 379 outputAudioFormat.frame_rate = fContext->sample_rate; 380 outputAudioFormat.channel_count = fContext->channels; 381 ConvertAVSampleFormatToRawAudioFormat(fContext->sample_fmt, 382 outputAudioFormat.format); 383 // Check that format is not still a wild card! 384 if (outputAudioFormat.format == 0) { 385 TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n"); 386 outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT; 387 } 388 outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size; 389 // Check that buffer_size has a sane value 390 size_t sampleSize = outputAudioFormat.format 391 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 392 if (outputAudioFormat.buffer_size == 0) { 393 outputAudioFormat.buffer_size = 512 * sampleSize 394 * outputAudioFormat.channel_count; 395 } 396 397 inOutFormat->type = B_MEDIA_RAW_AUDIO; 398 inOutFormat->u.raw_audio = outputAudioFormat; 399 inOutFormat->require_flags = 0; 400 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 401 402 // Initialize variables needed to manage decoding as much audio frames as 403 // needed to fill the buffer_size. 404 fOutputFrameSize = sampleSize * outputAudioFormat.channel_count; 405 fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize; 406 fOutputFrameRate = outputAudioFormat.frame_rate; 407 fRawDecodedAudio->opaque 408 = av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context)); 409 if (fRawDecodedAudio->opaque == NULL) 410 return B_NO_MEMORY; 411 412 TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, init = %d, " 413 "output frame size: %d, count: %ld, rate: %.2f\n", 414 fContext->bit_rate, fContext->sample_rate, fContext->channels, 415 result, fOutputFrameSize, fOutputFrameCount, fOutputFrameRate); 416 417 return B_OK; 418 } 419 420 421 status_t 422 AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat) 423 { 424 TRACE("AVCodecDecoder::_NegotiateVideoOutputFormat()\n"); 425 426 TRACE(" requested video format 0x%x\n", 427 inOutFormat->u.raw_video.display.format); 428 429 _ApplyEssentialVideoContainerPropertiesToContext(); 430 // This makes video formats play that encode the video properties in 431 // the video container (e.g. WMV) and not in the video frames 432 // themself (e.g. MPEG2). 433 // Note: Doing this step unconditionally is OK, because the first call 434 // to _DecodeNextVideoFrame() will update the essential video format 435 // properties accordingly regardless of the settings here. 436 437 bool codecCanHandleIncompleteFrames 438 = (fCodec->capabilities & CODEC_CAP_TRUNCATED) != 0; 439 if (codecCanHandleIncompleteFrames) { 440 // Expect and handle video frames to be splitted across consecutive 441 // data chunks. 442 fContext->flags |= CODEC_FLAG_TRUNCATED; 443 } 444 445 // close any previous instance 446 if (fCodecInitDone) { 447 fCodecInitDone = false; 448 avcodec_close(fContext); 449 } 450 451 if (avcodec_open2(fContext, fCodec, NULL) >= 0) 452 fCodecInitDone = true; 453 else { 454 TRACE("avcodec_open() failed to init codec!\n"); 455 return B_ERROR; 456 } 457 458 // Make MediaPlayer happy (if not in rgb32 screen depth and no overlay, 459 // it will only ask for YCbCr, which DrawBitmap doesn't handle, so the 460 // default colordepth is RGB32). 461 if (inOutFormat->u.raw_video.display.format == B_YCbCr422) 462 fOutputColorSpace = B_YCbCr422; 463 else 464 fOutputColorSpace = B_RGB32; 465 466 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 467 if (fSwsContext != NULL) 468 sws_freeContext(fSwsContext); 469 fSwsContext = NULL; 470 #else 471 fFormatConversionFunc = 0; 472 #endif 473 474 free(fChunkBuffer); 475 fChunkBuffer = NULL; 476 fChunkBufferSize = 0; 477 478 _ResetTempPacket(); 479 480 status_t statusOfDecodingFirstFrame = _DecodeNextVideoFrame(); 481 if (statusOfDecodingFirstFrame != B_OK) { 482 TRACE("[v] decoding first video frame failed\n"); 483 return B_ERROR; 484 } 485 486 // Note: fSwsContext / fFormatConversionFunc should have been initialized 487 // by first call to _DecodeNextVideoFrame() above. 488 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 489 if (fSwsContext == NULL) { 490 TRACE("No SWS Scale context or decoder has not set the pixel format " 491 "yet!\n"); 492 } 493 #else 494 if (fFormatConversionFunc == NULL) { 495 TRACE("no pixel format conversion function found or decoder has " 496 "not set the pixel format yet!\n"); 497 } 498 #endif 499 500 inOutFormat->type = B_MEDIA_RAW_VIDEO; 501 inOutFormat->require_flags = 0; 502 inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS; 503 inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output; 504 inOutFormat->u.raw_video.interlace = 1; 505 // Progressive (non-interlaced) video frames are delivered 506 inOutFormat->u.raw_video.first_active 507 = fHeader.u.raw_video.first_active_line; 508 inOutFormat->u.raw_video.last_active = fHeader.u.raw_video.line_count; 509 inOutFormat->u.raw_video.pixel_width_aspect 510 = fHeader.u.raw_video.pixel_width_aspect; 511 inOutFormat->u.raw_video.pixel_height_aspect 512 = fHeader.u.raw_video.pixel_height_aspect; 513 #if 0 514 // This was added by Colin Günther in order to handle streams with a 515 // variable frame rate. fOutputFrameRate is computed from the stream 516 // time_base, but it actually assumes a timebase equal to the FPS. As far 517 // as I can see, a stream with a variable frame rate would have a higher 518 // resolution time_base and increment the pts (presentation time) of each 519 // frame by a value bigger than one. 520 // 521 // Fixed rate stream: 522 // time_base = 1/50s, frame PTS = 1, 2, 3... (for 50Hz) 523 // 524 // Variable rate stream: 525 // time_base = 1/300s, frame PTS = 6, 12, 18, ... (for 50Hz) 526 // time_base = 1/300s, frame PTS = 5, 10, 15, ... (for 60Hz) 527 // 528 // The fOutputFrameRate currently does not take this into account and 529 // ignores the PTS. This results in playing the above sample at 300Hz 530 // instead of 50 or 60. 531 // 532 // However, comparing the PTS for two consecutive implies we have already 533 // decoded 2 frames, which may not be the case when this method is first 534 // called. 535 inOutFormat->u.raw_video.field_rate = fOutputFrameRate; 536 // Was calculated by first call to _DecodeNextVideoFrame() 537 #endif 538 inOutFormat->u.raw_video.display.format = fOutputColorSpace; 539 inOutFormat->u.raw_video.display.line_width 540 = fHeader.u.raw_video.display_line_width; 541 inOutFormat->u.raw_video.display.line_count 542 = fHeader.u.raw_video.display_line_count; 543 inOutFormat->u.raw_video.display.bytes_per_row 544 = fHeader.u.raw_video.bytes_per_row; 545 546 #ifdef TRACE_AV_CODEC 547 char buffer[1024]; 548 string_for_format(*inOutFormat, buffer, sizeof(buffer)); 549 TRACE("[v] outFormat = %s\n", buffer); 550 TRACE(" returned video format 0x%x\n", 551 inOutFormat->u.raw_video.display.format); 552 #endif 553 554 return B_OK; 555 } 556 557 558 /*! \brief Fills the outBuffer with one or more already decoded audio frames. 559 560 Besides the main duty described above, this method also fills out the other 561 output parameters as documented below. 562 563 \param outBuffer Pointer to the output buffer to copy the decoded audio 564 frames to. 565 \param outFrameCount Pointer to the output variable to assign the number of 566 copied audio frames (usually several audio frames at once). 567 \param mediaHeader Pointer to the output media header that contains the 568 properties of the decoded audio frame being the first in the outBuffer. 569 \param info Specifies additional decoding parameters. (Note: unused). 570 571 \returns B_OK Decoding audio frames succeeded. 572 \returns B_LAST_BUFFER_ERROR There are no more audio frames available. 573 \returns Other error codes 574 */ 575 status_t 576 AVCodecDecoder::_DecodeAudio(void* outBuffer, int64* outFrameCount, 577 media_header* mediaHeader, media_decode_info* info) 578 { 579 TRACE_AUDIO("AVCodecDecoder::_DecodeAudio(audio start_time %.6fs)\n", 580 mediaHeader->start_time / 1000000.0); 581 582 status_t audioDecodingStatus 583 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextAudioFrame(); 584 585 if (audioDecodingStatus != B_OK) 586 return audioDecodingStatus; 587 588 *outFrameCount = fDecodedDataSizeInBytes / fOutputFrameSize; 589 *mediaHeader = fHeader; 590 memcpy(outBuffer, fDecodedData, fDecodedDataSizeInBytes); 591 592 fDecodedDataSizeInBytes = 0; 593 594 return B_OK; 595 } 596 597 598 /*! \brief Fills the outBuffer with an already decoded video frame. 599 600 Besides the main duty described above, this method also fills out the other 601 output parameters as documented below. 602 603 \param outBuffer Pointer to the output buffer to copy the decoded video 604 frame to. 605 \param outFrameCount Pointer to the output variable to assign the number of 606 copied video frames (usually one video frame). 607 \param mediaHeader Pointer to the output media header that contains the 608 decoded video frame properties. 609 \param info Specifies additional decoding parameters. (Note: unused). 610 611 \returns B_OK Decoding a video frame succeeded. 612 \returns B_LAST_BUFFER_ERROR There are no more video frames available. 613 \returns Other error codes 614 */ 615 status_t 616 AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount, 617 media_header* mediaHeader, media_decode_info* info) 618 { 619 status_t videoDecodingStatus 620 = fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextVideoFrame(); 621 622 if (videoDecodingStatus != B_OK) 623 return videoDecodingStatus; 624 625 *outFrameCount = 1; 626 *mediaHeader = fHeader; 627 memcpy(outBuffer, fDecodedData, mediaHeader->size_used); 628 629 fDecodedDataSizeInBytes = 0; 630 631 return B_OK; 632 } 633 634 635 /*! \brief Decodes next audio frame. 636 637 We decode at least one audio frame into fDecodedData. To achieve this goal, 638 we might need to request several chunks of encoded data resulting in a 639 variable execution time of this function. 640 641 The length of the decoded audio frame(s) is stored in 642 fDecodedDataSizeInBytes. If this variable is greater than zero you can 643 assert that all audio frames in fDecodedData are valid. 644 645 It is assumed that the number of expected audio frames is stored in 646 fOutputFrameCount. So _DecodeNextAudioFrame() must be called only after 647 fOutputFrameCount has been set. 648 649 Note: fOutputFrameCount contains the maximum number of frames a caller 650 of BMediaDecoder::Decode() expects to receive. There is a direct 651 relationship between fOutputFrameCount and the buffer size a caller of 652 BMediaDecoder::Decode() will provide so we make sure to respect this limit 653 for fDecodedDataSizeInBytes. 654 655 On return with status code B_OK the following conditions hold true: 656 1. fDecodedData contains as much audio frames as the caller of 657 BMediaDecoder::Decode() expects. 658 2. fDecodedData contains lesser audio frames as the caller of 659 BMediaDecoder::Decode() expects only when one of the following 660 conditions hold true: 661 i No more audio frames left. Consecutive calls to 662 _DecodeNextAudioFrame() will then result in the return of 663 status code B_LAST_BUFFER_ERROR. 664 ii TODO: A change in the size of the audio frames. 665 3. fHeader is populated with the audio frame properties of the first 666 audio frame in fDecodedData. Especially the start_time field of 667 fHeader relates to that first audio frame. Start times of 668 consecutive audio frames in fDecodedData have to be calculated 669 manually (using the frame rate and the frame duration) if the 670 caller needs them. 671 672 TODO: Handle change of channel_count. Such a change results in a change of 673 the audio frame size and thus has different buffer requirements. 674 The most sane approach for implementing this is to return the audio frames 675 that were still decoded with the previous channel_count and inform the 676 client of BMediaDecoder::Decode() about the change so that it can adapt to 677 it. Furthermore we need to adapt our fDecodedData to the new buffer size 678 requirements accordingly. 679 680 \returns B_OK when we successfully decoded enough audio frames 681 \returns B_LAST_BUFFER_ERROR when there are no more audio frames available. 682 \returns Other Errors 683 */ 684 status_t 685 AVCodecDecoder::_DecodeNextAudioFrame() 686 { 687 assert(fTempPacket.size >= 0); 688 assert(fDecodedDataSizeInBytes == 0); 689 // _DecodeNextAudioFrame needs to be called on empty fDecodedData only! 690 // If this assert holds wrong we have a bug somewhere. 691 692 status_t resetStatus = _ResetRawDecodedAudio(); 693 if (resetStatus != B_OK) 694 return resetStatus; 695 696 while (fRawDecodedAudio->nb_samples < fOutputFrameCount) { 697 _CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow(); 698 699 bool decodedDataBufferHasData = fDecodedDataBufferSize > 0; 700 if (decodedDataBufferHasData) { 701 _MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes(); 702 continue; 703 } 704 705 status_t decodeAudioChunkStatus = _DecodeNextAudioFrameChunk(); 706 if (decodeAudioChunkStatus != B_OK) 707 return decodeAudioChunkStatus; 708 } 709 710 fFrame += fRawDecodedAudio->nb_samples; 711 fDecodedDataSizeInBytes = fRawDecodedAudio->linesize[0]; 712 713 _UpdateMediaHeaderForAudioFrame(); 714 715 #ifdef DEBUG 716 dump_ffframe_audio(fRawDecodedAudio, "ffaudi"); 717 #endif 718 719 TRACE_AUDIO(" frame count: %lld current: %lld\n", 720 fRawDecodedAudio->nb_samples, fFrame); 721 722 return B_OK; 723 } 724 725 726 /*! \brief Applies all essential audio input properties to fContext that were 727 passed to AVCodecDecoder when Setup() was called. 728 729 Note: This function must be called before the AVCodec is opened via 730 avcodec_open2(). Otherwise the behaviour of FFMPEG's audio decoding 731 function avcodec_decode_audio4() is undefined. 732 733 Essential properties applied from fInputFormat.u.encoded_audio: 734 - bit_rate copied to fContext->bit_rate 735 - frame_size copied to fContext->frame_size 736 - output.format converted to fContext->sample_fmt 737 - output.frame_rate copied to fContext->sample_rate 738 - output.channel_count copied to fContext->channels 739 740 Other essential properties being applied: 741 - fBlockAlign to fContext->block_align 742 - fExtraData to fContext->extradata 743 - fExtraDataSize to fContext->extradata_size 744 745 TODO: Either the following documentation section should be removed or this 746 TODO when it is clear whether fInputFormat.MetaData() and 747 fInputFormat.MetaDataSize() have to be applied to fContext. See the related 748 TODO in the method implementation. 749 Only applied when fInputFormat.MetaDataSize() is greater than zero: 750 - fInputFormat.MetaData() to fContext->extradata 751 - fInputFormat.MetaDataSize() to fContext->extradata_size 752 */ 753 void 754 AVCodecDecoder::_ApplyEssentialAudioContainerPropertiesToContext() 755 { 756 media_encoded_audio_format containerProperties 757 = fInputFormat.u.encoded_audio; 758 759 fContext->bit_rate 760 = static_cast<int>(containerProperties.bit_rate); 761 fContext->frame_size 762 = static_cast<int>(containerProperties.frame_size); 763 ConvertRawAudioFormatToAVSampleFormat( 764 containerProperties.output.format, fContext->sample_fmt); 765 fContext->sample_rate 766 = static_cast<int>(containerProperties.output.frame_rate); 767 fContext->channels 768 = static_cast<int>(containerProperties.output.channel_count); 769 // Check that channel count is not still a wild card! 770 if (fContext->channels == 0) { 771 TRACE(" channel_count still a wild-card, assuming stereo.\n"); 772 fContext->channels = 2; 773 } 774 775 fContext->block_align = fBlockAlign; 776 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 777 fContext->extradata_size = fExtraDataSize; 778 779 // TODO: This probably needs to go away, there is some misconception 780 // about extra data / info buffer and meta data. See 781 // Reader::GetStreamInfo(). The AVFormatReader puts extradata and 782 // extradata_size into media_format::MetaData(), but used to ignore 783 // the infoBuffer passed to GetStreamInfo(). I think this may be why 784 // the code below was added. 785 if (fInputFormat.MetaDataSize() > 0) { 786 fContext->extradata = static_cast<uint8_t*>( 787 const_cast<void*>(fInputFormat.MetaData())); 788 fContext->extradata_size = fInputFormat.MetaDataSize(); 789 } 790 791 TRACE(" bit_rate %d, sample_rate %d, channels %d, block_align %d, " 792 "extradata_size %d\n", fContext->bit_rate, fContext->sample_rate, 793 fContext->channels, fContext->block_align, fContext->extradata_size); 794 } 795 796 797 /*! \brief Resets important fields in fRawDecodedVideo to their default values. 798 799 Note: Also initializes fDecodedData if not done already. 800 801 \returns B_OK Resetting successfully completed. 802 \returns B_NO_MEMORY No memory left for correct operation. 803 */ 804 status_t 805 AVCodecDecoder::_ResetRawDecodedAudio() 806 { 807 if (fDecodedData == NULL) { 808 size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize; 809 fDecodedData 810 = static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData)); 811 } 812 if (fDecodedData == NULL) 813 return B_NO_MEMORY; 814 815 fRawDecodedAudio->data[0] = fDecodedData; 816 fRawDecodedAudio->linesize[0] = 0; 817 fRawDecodedAudio->format = AV_SAMPLE_FMT_NONE; 818 fRawDecodedAudio->pkt_dts = AV_NOPTS_VALUE; 819 fRawDecodedAudio->nb_samples = 0; 820 memset(fRawDecodedAudio->opaque, 0, sizeof(avformat_codec_context)); 821 822 return B_OK; 823 } 824 825 826 /*! \brief Checks fDecodedDataBufferSize and fTempPacket for invalid values, 827 reports them and assigns valid values. 828 829 Note: This method is intended to be called before any code is executed that 830 deals with moving, loading or decoding any audio frames. 831 */ 832 void 833 AVCodecDecoder::_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow() 834 { 835 if (fDecodedDataBufferSize < 0) { 836 fprintf(stderr, "Decoding read past the end of the decoded data " 837 "buffer! %ld\n", fDecodedDataBufferSize); 838 fDecodedDataBufferSize = 0; 839 } 840 if (fTempPacket.size < 0) { 841 fprintf(stderr, "Decoding read past the end of the temp packet! %d\n", 842 fTempPacket.size); 843 fTempPacket.size = 0; 844 } 845 } 846 847 848 /*! \brief Moves audio frames from fDecodedDataBuffer to fRawDecodedAudio (and 849 thus to fDecodedData) and updates the start times of fRawDecodedAudio, 850 fDecodedDataBuffer and fTempPacket accordingly. 851 852 When moving audio frames to fRawDecodedAudio this method also makes sure 853 that the following important fields of fRawDecodedAudio are populated and 854 updated with correct values: 855 - fRawDecodedAudio->data[0]: Points to first free byte of fDecodedData 856 - fRawDecodedAudio->linesize[0]: Total size of frames in fDecodedData 857 - fRawDecodedAudio->format: Format of first audio frame 858 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 859 - fRawDecodedAudio->nb_samples: Number of audio frames 860 - fRawDecodedAudio->opaque: Contains the following fields for the first 861 audio frame: 862 - channels: Channel count of first audio frame 863 - sample_rate: Frame rate of first audio frame 864 865 This function assumes to be called only when the following assumptions 866 hold true: 867 1. There are decoded audio frames available in fDecodedDataBuffer 868 meaning that fDecodedDataBufferSize is greater than zero. 869 2. There is space left in fRawDecodedAudio to move some audio frames 870 in. This means that fRawDecodedAudio has lesser audio frames than 871 the maximum allowed (specified by fOutputFrameCount). 872 3. The audio frame rate is known so that we can calculate the time 873 range (covered by the moved audio frames) to update the start times 874 accordingly. 875 4. The field fRawDecodedAudio->opaque points to a memory block 876 representing a structure of type avformat_codec_context. 877 878 After this function returns the caller can safely make the following 879 assumptions: 880 1. The number of decoded audio frames in fDecodedDataBuffer is 881 decreased though it may still be greater then zero. 882 2. The number of frames in fRawDecodedAudio has increased and all 883 important fields are updated (see listing above). 884 3. Start times of fDecodedDataBuffer and fTempPacket were increased 885 with the time range covered by the moved audio frames. 886 887 Note: This function raises an exception (by calling the debugger), when 888 fDecodedDataBufferSize is not a multiple of fOutputFrameSize. 889 */ 890 void 891 AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes() 892 { 893 assert(fDecodedDataBufferSize > 0); 894 assert(fRawDecodedAudio->nb_samples < fOutputFrameCount); 895 assert(fOutputFrameRate > 0); 896 897 int32 frames = min_c(fOutputFrameCount - fRawDecodedAudio->nb_samples, 898 fDecodedDataBufferSize / fOutputFrameSize); 899 if (frames == 0) 900 debugger("fDecodedDataBufferSize not multiple of frame size!"); 901 902 size_t remainingSize = frames * fOutputFrameSize; 903 memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0] 904 + fDecodedDataBufferOffset, remainingSize); 905 906 bool firstAudioFramesCopiedToRawDecodedAudio 907 = fRawDecodedAudio->data[0] != fDecodedData; 908 if (!firstAudioFramesCopiedToRawDecodedAudio) { 909 fRawDecodedAudio->format = fDecodedDataBuffer->format; 910 fRawDecodedAudio->pkt_dts = fDecodedDataBuffer->pkt_dts; 911 912 avformat_codec_context* codecContext 913 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 914 codecContext->channels = fContext->channels; 915 codecContext->sample_rate = fContext->sample_rate; 916 } 917 918 fRawDecodedAudio->data[0] += remainingSize; 919 fRawDecodedAudio->linesize[0] += remainingSize; 920 fRawDecodedAudio->nb_samples += frames; 921 922 fDecodedDataBufferOffset += remainingSize; 923 fDecodedDataBufferSize -= remainingSize; 924 925 // Update start times accordingly 926 bigtime_t framesTimeInterval = static_cast<bigtime_t>( 927 (1000000LL * frames) / fOutputFrameRate); 928 fDecodedDataBuffer->pkt_dts += framesTimeInterval; 929 // Start time of buffer is updated in case that it contains 930 // more audio frames to move. 931 fTempPacket.dts += framesTimeInterval; 932 // Start time of fTempPacket is updated in case the fTempPacket 933 // contains more audio frames to decode. 934 } 935 936 937 /*! \brief Decodes next chunk of audio frames. 938 939 This method handles all the details of loading the input buffer 940 (fChunkBuffer) at the right time and of calling FFMPEG often engouh until 941 some audio frames have been decoded. 942 943 FFMPEG decides how much audio frames belong to a chunk. Because of that 944 it is very likely that _DecodeNextAudioFrameChunk has to be called several 945 times to decode enough audio frames to please the caller of 946 BMediaDecoder::Decode(). 947 948 This function assumes to be called only when the following assumptions 949 hold true: 950 1. fDecodedDataBufferSize equals zero. 951 952 After this function returns successfully the caller can safely make the 953 following assumptions: 954 1. fDecodedDataBufferSize is greater than zero. 955 2. fDecodedDataBufferOffset is set to zero. 956 3. fDecodedDataBuffer contains audio frames. 957 958 \returns B_OK on successfully decoding one audio frame chunk. 959 \returns B_LAST_BUFFER_ERROR No more audio frame chunks available. From 960 this point on further calls will return this same error. 961 \returns B_ERROR Decoding failed 962 */ 963 status_t 964 AVCodecDecoder::_DecodeNextAudioFrameChunk() 965 { 966 assert(fDecodedDataBufferSize == 0); 967 968 while(fDecodedDataBufferSize == 0) { 969 status_t loadingChunkStatus 970 = _LoadNextChunkIfNeededAndAssignStartTime(); 971 if (loadingChunkStatus != B_OK) 972 return loadingChunkStatus; 973 974 status_t decodingStatus 975 = _DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer(); 976 if (decodingStatus != B_OK) { 977 // Assume the audio decoded until now is broken so replace it with 978 // some silence. 979 memset(fDecodedData, 0, fRawDecodedAudio->linesize[0]); 980 981 if (!fAudioDecodeError) { 982 // Report failure if not done already 983 int32 chunkBufferOffset = fTempPacket.data - fChunkBuffer; 984 printf("########### audio decode error, " 985 "fTempPacket.size %d, fChunkBuffer data offset %ld\n", 986 fTempPacket.size, chunkBufferOffset); 987 fAudioDecodeError = true; 988 } 989 990 // Assume that next audio chunk can be decoded so keep decoding. 991 continue; 992 } 993 994 fAudioDecodeError = false; 995 } 996 997 return B_OK; 998 } 999 1000 1001 /*! \brief Tries to decode at least one audio frame and store it in the 1002 fDecodedDataBuffer. 1003 1004 This function assumes to be called only when the following assumptions 1005 hold true: 1006 1. fDecodedDataBufferSize equals zero. 1007 2. fTempPacket.size is greater than zero. 1008 1009 After this function returns successfully the caller can safely make the 1010 following assumptions: 1011 1. fDecodedDataBufferSize is greater than zero in the common case. 1012 Also see "Note" below. 1013 2. fTempPacket was updated to exclude the data chunk that was consumed 1014 by avcodec_decode_audio4(). 1015 3. fDecodedDataBufferOffset is set to zero. 1016 1017 When this function failed to decode at least one audio frame due to a 1018 decoding error the caller can safely make the following assumptions: 1019 1. fDecodedDataBufferSize equals zero. 1020 2. fTempPacket.size equals zero. 1021 1022 Note: It is possible that there wasn't any audio frame decoded into 1023 fDecodedDataBuffer after calling this function. This is normal and can 1024 happen when there was either a decoding error or there is some decoding 1025 delay in FFMPEGs audio decoder. Another call to this method is totally 1026 safe and is even expected as long as the calling assumptions hold true. 1027 1028 \returns B_OK Decoding successful. fDecodedDataBuffer contains decoded 1029 audio frames only when fDecodedDataBufferSize is greater than zero. 1030 fDecodedDataBuffer is empty, when avcodec_decode_audio4() didn't return 1031 audio frames due to delayed decoding or incomplete audio frames. 1032 \returns B_ERROR Decoding failed thus fDecodedDataBuffer contains no audio 1033 frames. 1034 */ 1035 status_t 1036 AVCodecDecoder::_DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer() 1037 { 1038 assert(fDecodedDataBufferSize == 0); 1039 assert(fTempPacket.size > 0); 1040 1041 avcodec_get_frame_defaults(fDecodedDataBuffer); 1042 fDecodedDataBufferOffset = 0; 1043 int gotAudioFrame = 0; 1044 1045 int encodedDataSizeInBytes = avcodec_decode_audio4(fContext, 1046 fDecodedDataBuffer, &gotAudioFrame, &fTempPacket); 1047 if (encodedDataSizeInBytes <= 0) { 1048 // Error or failure to produce decompressed output. 1049 // Skip the temp packet data entirely. 1050 fTempPacket.size = 0; 1051 return B_ERROR; 1052 } 1053 1054 fTempPacket.data += encodedDataSizeInBytes; 1055 fTempPacket.size -= encodedDataSizeInBytes; 1056 1057 bool gotNoAudioFrame = gotAudioFrame == 0; 1058 if (gotNoAudioFrame) 1059 return B_OK; 1060 1061 fDecodedDataBufferSize = av_samples_get_buffer_size(NULL, 1062 fContext->channels, fDecodedDataBuffer->nb_samples, 1063 fContext->sample_fmt, 1); 1064 if (fDecodedDataBufferSize < 0) 1065 fDecodedDataBufferSize = 0; 1066 1067 return B_OK; 1068 } 1069 1070 1071 /*! \brief Updates relevant fields of the class member fHeader with the 1072 properties of the most recently decoded audio frame. 1073 1074 The following fields of fHeader are updated: 1075 - fHeader.type 1076 - fHeader.file_pos 1077 - fHeader.orig_size 1078 - fHeader.start_time 1079 - fHeader.size_used 1080 - fHeader.u.raw_audio.frame_rate 1081 - fHeader.u.raw_audio.channel_count 1082 1083 It is assumed that this function is called only when the following asserts 1084 hold true: 1085 1. We actually got a new audio frame decoded by the audio decoder. 1086 2. fHeader wasn't updated for the new audio frame yet. You MUST call 1087 this method only once per decoded audio frame. 1088 3. fRawDecodedAudio's fields relate to the first audio frame contained 1089 in fDecodedData. Especially the following fields are of importance: 1090 - fRawDecodedAudio->pkt_dts: Start time of first audio frame 1091 - fRawDecodedAudio->opaque: Contains the following fields for 1092 the first audio frame: 1093 - channels: Channel count of first audio frame 1094 - sample_rate: Frame rate of first audio frame 1095 */ 1096 void 1097 AVCodecDecoder::_UpdateMediaHeaderForAudioFrame() 1098 { 1099 fHeader.type = B_MEDIA_RAW_AUDIO; 1100 fHeader.file_pos = 0; 1101 fHeader.orig_size = 0; 1102 fHeader.start_time = fRawDecodedAudio->pkt_dts; 1103 fHeader.size_used = fRawDecodedAudio->linesize[0]; 1104 1105 avformat_codec_context* codecContext 1106 = static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque); 1107 fHeader.u.raw_audio.channel_count = codecContext->channels; 1108 fHeader.u.raw_audio.frame_rate = codecContext->sample_rate; 1109 } 1110 1111 1112 /*! \brief Decodes next video frame. 1113 1114 We decode exactly one video frame into fDecodedData. To achieve this goal, 1115 we might need to request several chunks of encoded data resulting in a 1116 variable execution time of this function. 1117 1118 The length of the decoded video frame is stored in 1119 fDecodedDataSizeInBytes. If this variable is greater than zero, you can 1120 assert that there is a valid video frame available in fDecodedData. 1121 1122 The decoded video frame in fDecodedData has color space conversion and 1123 deinterlacing already applied. 1124 1125 To every decoded video frame there is a media_header populated in 1126 fHeader, containing the corresponding video frame properties. 1127 1128 Normally every decoded video frame has a start_time field populated in the 1129 associated fHeader, that determines the presentation time of the frame. 1130 This relationship will only hold true, when each data chunk that is 1131 provided via GetNextChunk() contains data for exactly one encoded video 1132 frame (one complete frame) - not more and not less. 1133 1134 We can decode data chunks that contain partial video frame data, too. In 1135 that case, you cannot trust the value of the start_time field in fHeader. 1136 We simply have no logic in place to establish a meaningful relationship 1137 between an incomplete frame and the start time it should be presented. 1138 Though this might change in the future. 1139 1140 We can decode data chunks that contain more than one video frame, too. In 1141 that case, you cannot trust the value of the start_time field in fHeader. 1142 We simply have no logic in place to track the start_time across multiple 1143 video frames. So a meaningful relationship between the 2nd, 3rd, ... frame 1144 and the start time it should be presented isn't established at the moment. 1145 Though this might change in the future. 1146 1147 More over the fOutputFrameRate variable is updated for every decoded video 1148 frame. 1149 1150 On first call the member variables fSwsContext / fFormatConversionFunc are 1151 initialized. 1152 1153 \returns B_OK when we successfully decoded one video frame 1154 \returns B_LAST_BUFFER_ERROR when there are no more video frames available. 1155 \returns B_NO_MEMORY when we have no memory left for correct operation. 1156 \returns Other Errors 1157 */ 1158 status_t 1159 AVCodecDecoder::_DecodeNextVideoFrame() 1160 { 1161 #if 0 1162 // Well, I heard this was not supposed to happen, but it does 1163 // (for example with http://thud.us/videos/misc/xvid-samples/flyby-divx.avi 1164 // see #11409). Since that video otherwise plays fine when removing the 1165 // assert, I'm assuming we are being overcautious here and commenting it 1166 // out. 1167 assert(fTempPacket.size >= 0); 1168 #endif 1169 1170 while (true) { 1171 status_t loadingChunkStatus 1172 = _LoadNextChunkIfNeededAndAssignStartTime(); 1173 if (loadingChunkStatus == B_LAST_BUFFER_ERROR) 1174 return _FlushOneVideoFrameFromDecoderBuffer(); 1175 if (loadingChunkStatus != B_OK) { 1176 TRACE("AVCodecDecoder::_DecodeNextVideoFrame(): error from " 1177 "GetNextChunk(): %s\n", strerror(loadingChunkStatus)); 1178 return loadingChunkStatus; 1179 } 1180 1181 #if DO_PROFILING 1182 bigtime_t startTime = system_time(); 1183 #endif 1184 1185 // NOTE: In the FFMPEG 0.10.2 code example decoding_encoding.c, the 1186 // length returned by avcodec_decode_video2() is used to update the 1187 // packet buffer size (here it is fTempPacket.size). This way the 1188 // packet buffer is allowed to contain incomplete frames so we are 1189 // required to buffer the packets between different calls to 1190 // _DecodeNextVideoFrame(). 1191 int gotVideoFrame = 0; 1192 int encodedDataSizeInBytes = avcodec_decode_video2(fContext, 1193 fRawDecodedPicture, &gotVideoFrame, &fTempPacket); 1194 if (encodedDataSizeInBytes < 0) { 1195 TRACE("[v] AVCodecDecoder: ignoring error in decoding frame %lld:" 1196 " %d\n", fFrame, len); 1197 // NOTE: An error from avcodec_decode_video2() is ignored by the 1198 // FFMPEG 0.10.2 example decoding_encoding.c. Only the packet 1199 // buffers are flushed accordingly 1200 fTempPacket.data = NULL; 1201 fTempPacket.size = 0; 1202 continue; 1203 } 1204 1205 fTempPacket.size -= encodedDataSizeInBytes; 1206 fTempPacket.data += encodedDataSizeInBytes; 1207 1208 bool gotNoVideoFrame = gotVideoFrame == 0; 1209 if (gotNoVideoFrame) { 1210 TRACE("frame %lld - no picture yet, encodedDataSizeInBytes: %d, " 1211 "chunk size: %ld\n", fFrame, encodedDataSizeInBytes, 1212 fChunkBufferSize); 1213 continue; 1214 } 1215 1216 #if DO_PROFILING 1217 bigtime_t formatConversionStart = system_time(); 1218 #endif 1219 1220 status_t handleStatus = _HandleNewVideoFrameAndUpdateSystemState(); 1221 if (handleStatus != B_OK) 1222 return handleStatus; 1223 1224 #if DO_PROFILING 1225 bigtime_t doneTime = system_time(); 1226 decodingTime += formatConversionStart - startTime; 1227 conversionTime += doneTime - formatConversionStart; 1228 profileCounter++; 1229 if (!(fFrame % 5)) { 1230 printf("[v] profile: d1 = %lld, d2 = %lld (%lld) required %Ld\n", 1231 decodingTime / profileCounter, conversionTime / profileCounter, 1232 fFrame, bigtime_t(1000000LL / fOutputFrameRate)); 1233 decodingTime = 0; 1234 conversionTime = 0; 1235 profileCounter = 0; 1236 } 1237 #endif 1238 return B_OK; 1239 } 1240 } 1241 1242 1243 /*! \brief Applies all essential video input properties to fContext that were 1244 passed to AVCodecDecoder when Setup() was called. 1245 1246 Note: This function must be called before the AVCodec is opened via 1247 avcodec_open2(). Otherwise the behaviour of FFMPEG's video decoding 1248 function avcodec_decode_video2() is undefined. 1249 1250 Essential properties applied from fInputFormat.u.encoded_video.output: 1251 - display.line_width copied to fContext->width 1252 - display.line_count copied to fContext->height 1253 - pixel_width_aspect and pixel_height_aspect converted to 1254 fContext->sample_aspect_ratio 1255 - field_rate converted to fContext->time_base and 1256 fContext->ticks_per_frame 1257 1258 Other essential properties being applied: 1259 - fExtraData to fContext->extradata 1260 - fExtraDataSize to fContext->extradata_size 1261 */ 1262 void 1263 AVCodecDecoder::_ApplyEssentialVideoContainerPropertiesToContext() 1264 { 1265 media_raw_video_format containerProperties 1266 = fInputFormat.u.encoded_video.output; 1267 1268 fContext->width = containerProperties.display.line_width; 1269 fContext->height = containerProperties.display.line_count; 1270 1271 if (containerProperties.pixel_width_aspect > 0 1272 && containerProperties.pixel_height_aspect > 0) { 1273 ConvertVideoAspectWidthAndHeightToAVCodecContext( 1274 containerProperties.pixel_width_aspect, 1275 containerProperties.pixel_height_aspect, *fContext); 1276 } 1277 1278 if (containerProperties.field_rate > 0.0) { 1279 ConvertVideoFrameRateToAVCodecContext(containerProperties.field_rate, 1280 *fContext); 1281 } 1282 1283 fContext->extradata = reinterpret_cast<uint8_t*>(fExtraData); 1284 fContext->extradata_size = fExtraDataSize; 1285 } 1286 1287 1288 /*! \brief Loads the next chunk into fChunkBuffer and assigns it (including 1289 the start time) to fTempPacket but only if fTempPacket is empty. 1290 1291 \returns B_OK 1292 1. meaning: Next chunk is loaded. 1293 2. meaning: No need to load and assign anything. Proceed as usual. 1294 \returns B_LAST_BUFFER_ERROR No more chunks available. fChunkBuffer and 1295 fTempPacket are left untouched. 1296 \returns Other errors Caller should bail out because fChunkBuffer and 1297 fTempPacket are in unknown states. Normal operation cannot be 1298 guaranteed. 1299 */ 1300 status_t 1301 AVCodecDecoder::_LoadNextChunkIfNeededAndAssignStartTime() 1302 { 1303 if (fTempPacket.size > 0) 1304 return B_OK; 1305 1306 const void* chunkBuffer = NULL; 1307 size_t chunkBufferSize = 0; 1308 // In the case that GetNextChunk() returns an error fChunkBufferSize 1309 // should be left untouched. 1310 media_header chunkMediaHeader; 1311 1312 status_t getNextChunkStatus = GetNextChunk(&chunkBuffer, &chunkBufferSize, 1313 &chunkMediaHeader); 1314 if (getNextChunkStatus != B_OK) 1315 return getNextChunkStatus; 1316 1317 status_t chunkBufferPaddingStatus 1318 = _CopyChunkToChunkBufferAndAddPadding(chunkBuffer, chunkBufferSize); 1319 if (chunkBufferPaddingStatus != B_OK) 1320 return chunkBufferPaddingStatus; 1321 1322 fTempPacket.data = fChunkBuffer; 1323 fTempPacket.size = fChunkBufferSize; 1324 fTempPacket.dts = chunkMediaHeader.start_time; 1325 // Let FFMPEG handle the correct relationship between start_time and 1326 // decoded a/v frame. By doing so we are simply copying the way how it 1327 // is implemented in ffplay.c for video frames (for audio frames it 1328 // works, too, but isn't used by ffplay.c). 1329 // \see http://git.videolan.org/?p=ffmpeg.git;a=blob;f=ffplay.c;h=09623db374e5289ed20b7cc28c262c4375a8b2e4;hb=9153b33a742c4e2a85ff6230aea0e75f5a8b26c2#l1502 1330 // 1331 // FIXME: Research how to establish a meaningful relationship between 1332 // start_time and decoded a/v frame when the received chunk buffer 1333 // contains partial a/v frames. Maybe some data formats do contain time 1334 // stamps (ake pts / dts fields) that can be evaluated by FFMPEG. But 1335 // as long as I don't have such video data to test it, it makes no 1336 // sense trying to implement it. 1337 // 1338 // FIXME: Implement tracking start_time of video frames originating in 1339 // data chunks that encode more than one video frame at a time. In that 1340 // case on would increment the start_time for each consecutive frame of 1341 // such a data chunk (like it is done for audio frame decoding). But as 1342 // long as I don't have such video data to test it, it makes no sense 1343 // to implement it. 1344 1345 #ifdef LOG_STREAM_TO_FILE 1346 BFile* logFile = fIsAudio ? &sAudioStreamLogFile : &sVideoStreamLogFile; 1347 if (sDumpedPackets < 100) { 1348 logFile->Write(chunkBuffer, fChunkBufferSize); 1349 printf("wrote %ld bytes\n", fChunkBufferSize); 1350 sDumpedPackets++; 1351 } else if (sDumpedPackets == 100) 1352 logFile->Unset(); 1353 #endif 1354 1355 return B_OK; 1356 } 1357 1358 1359 /*! \brief Copies a chunk into fChunkBuffer and adds a "safety net" of 1360 additional memory as required by FFMPEG for input buffers to video 1361 decoders. 1362 1363 This is needed so that some decoders can read safely a predefined number of 1364 bytes at a time for performance optimization purposes. 1365 1366 The additional memory has a size of FF_INPUT_BUFFER_PADDING_SIZE as defined 1367 in avcodec.h. 1368 1369 Ownership of fChunkBuffer memory is with the class so it needs to be freed 1370 at the right times (on destruction, on seeking). 1371 1372 Also update fChunkBufferSize to reflect the size of the contained data 1373 (leaving out the padding). 1374 1375 \param chunk The chunk to copy. 1376 \param chunkSize Size of the chunk in bytes 1377 1378 \returns B_OK Padding was successful. You are responsible for releasing the 1379 allocated memory. fChunkBufferSize is set to chunkSize. 1380 \returns B_NO_MEMORY Padding failed. 1381 fChunkBuffer is set to NULL making it safe to call free() on it. 1382 fChunkBufferSize is set to 0 to reflect the size of fChunkBuffer. 1383 */ 1384 status_t 1385 AVCodecDecoder::_CopyChunkToChunkBufferAndAddPadding(const void* chunk, 1386 size_t chunkSize) 1387 { 1388 fChunkBuffer = static_cast<uint8_t*>(realloc(fChunkBuffer, 1389 chunkSize + FF_INPUT_BUFFER_PADDING_SIZE)); 1390 if (fChunkBuffer == NULL) { 1391 fChunkBufferSize = 0; 1392 return B_NO_MEMORY; 1393 } 1394 1395 memcpy(fChunkBuffer, chunk, chunkSize); 1396 memset(fChunkBuffer + chunkSize, 0, FF_INPUT_BUFFER_PADDING_SIZE); 1397 // Establish safety net, by zero'ing the padding area. 1398 1399 fChunkBufferSize = chunkSize; 1400 1401 return B_OK; 1402 } 1403 1404 1405 /*! \brief Executes all steps needed for a freshly decoded video frame. 1406 1407 \see _UpdateMediaHeaderForVideoFrame() and 1408 \see _DeinterlaceAndColorConvertVideoFrame() for when you are allowed to 1409 call this method. 1410 1411 \returns B_OK when video frame was handled successfully 1412 \returnb B_NO_MEMORY when no memory is left for correct operation. 1413 */ 1414 status_t 1415 AVCodecDecoder::_HandleNewVideoFrameAndUpdateSystemState() 1416 { 1417 _UpdateMediaHeaderForVideoFrame(); 1418 status_t postProcessStatus = _DeinterlaceAndColorConvertVideoFrame(); 1419 if (postProcessStatus != B_OK) 1420 return postProcessStatus; 1421 1422 ConvertAVCodecContextToVideoFrameRate(*fContext, fOutputFrameRate); 1423 1424 #ifdef DEBUG 1425 dump_ffframe_video(fRawDecodedPicture, "ffpict"); 1426 #endif 1427 1428 fFrame++; 1429 1430 return B_OK; 1431 } 1432 1433 1434 /*! \brief Flushes one video frame - if any - still buffered by the decoder. 1435 1436 Some FFMPEG decoder are buffering video frames. To retrieve those buffered 1437 frames the decoder needs to be told so. 1438 1439 The intended use of this method is to call it, once there are no more data 1440 chunks for decoding left. Reframed in other words: Once GetNextChunk() 1441 returns with status B_LAST_BUFFER_ERROR it is time to start flushing. 1442 1443 \returns B_OK Retrieved one video frame, handled it accordingly and updated 1444 the system state accordingly. 1445 There maybe more video frames left. So it is valid for the client of 1446 AVCodecDecoder to call it one more time. 1447 1448 \returns B_LAST_BUFFER_ERROR No video frame left. 1449 The client of the AVCodecDecoder should stop calling it now. 1450 1451 \returns B_NO_MEMORY No memory left for correct operation. 1452 */ 1453 status_t 1454 AVCodecDecoder::_FlushOneVideoFrameFromDecoderBuffer() 1455 { 1456 // Create empty fTempPacket to tell the video decoder it is time to flush 1457 fTempPacket.data = NULL; 1458 fTempPacket.size = 0; 1459 1460 int gotVideoFrame = 0; 1461 avcodec_decode_video2(fContext, fRawDecodedPicture, &gotVideoFrame, 1462 &fTempPacket); 1463 // We are only interested in complete frames now, so ignore the return 1464 // value. 1465 1466 bool gotNoVideoFrame = gotVideoFrame == 0; 1467 if (gotNoVideoFrame) { 1468 // video buffer is flushed successfully 1469 return B_LAST_BUFFER_ERROR; 1470 } 1471 1472 return _HandleNewVideoFrameAndUpdateSystemState(); 1473 } 1474 1475 1476 /*! \brief Updates relevant fields of the class member fHeader with the 1477 properties of the most recently decoded video frame. 1478 1479 It is assumed that this function is called only when the following asserts 1480 hold true: 1481 1. We actually got a new picture decoded by the video decoder. 1482 2. fHeader wasn't updated for the new picture yet. You MUST call this 1483 method only once per decoded video frame. 1484 3. This function MUST be called after 1485 _DeinterlaceAndColorConvertVideoFrame() as it relys on an updated 1486 fDecodedDataSizeInBytes. 1487 4. There will be at maximumn only one decoded video frame in our cache 1488 at any single point in time. Otherwise you couldn't tell to which 1489 cached decoded video frame the properties in fHeader relate to. 1490 5. AVCodecContext is still valid for this video frame (This is the case 1491 when this function is called after avcodec_decode_video2() and 1492 before the next call to avcodec_decode_video2(). 1493 */ 1494 void 1495 AVCodecDecoder::_UpdateMediaHeaderForVideoFrame() 1496 { 1497 fHeader.type = B_MEDIA_RAW_VIDEO; 1498 fHeader.file_pos = 0; 1499 fHeader.orig_size = 0; 1500 fHeader.start_time = fRawDecodedPicture->pkt_dts; 1501 fHeader.size_used = avpicture_get_size( 1502 colorspace_to_pixfmt(fOutputColorSpace), fRawDecodedPicture->width, 1503 fRawDecodedPicture->height); 1504 fHeader.u.raw_video.display_line_width = fRawDecodedPicture->width; 1505 fHeader.u.raw_video.display_line_count = fRawDecodedPicture->height; 1506 fHeader.u.raw_video.bytes_per_row 1507 = CalculateBytesPerRowWithColorSpaceAndVideoWidth(fOutputColorSpace, 1508 fRawDecodedPicture->width); 1509 fHeader.u.raw_video.field_gamma = 1.0; 1510 fHeader.u.raw_video.field_sequence = fFrame; 1511 fHeader.u.raw_video.field_number = 0; 1512 fHeader.u.raw_video.pulldown_number = 0; 1513 fHeader.u.raw_video.first_active_line = 1; 1514 fHeader.u.raw_video.line_count = fRawDecodedPicture->height; 1515 1516 ConvertAVCodecContextToVideoAspectWidthAndHeight(*fContext, 1517 fHeader.u.raw_video.pixel_width_aspect, 1518 fHeader.u.raw_video.pixel_height_aspect); 1519 1520 TRACE("[v] start_time=%02d:%02d.%02d field_sequence=%lu\n", 1521 int((fHeader.start_time / 60000000) % 60), 1522 int((fHeader.start_time / 1000000) % 60), 1523 int((fHeader.start_time / 10000) % 100), 1524 fHeader.u.raw_video.field_sequence); 1525 } 1526 1527 1528 /*! \brief This function applies deinterlacing (only if needed) and color 1529 conversion to the video frame in fRawDecodedPicture. 1530 1531 It is assumed that fRawDecodedPicture wasn't deinterlaced and color 1532 converted yet (otherwise this function behaves in unknown manners). 1533 1534 This function MUST be called after _UpdateMediaHeaderForVideoFrame() as it 1535 relys on the fHeader.size_used and fHeader.u.raw_video.bytes_per_row fields 1536 for correct operation 1537 1538 You should only call this function when you got a new picture decoded by 1539 the video decoder. 1540 1541 When this function finishes the postprocessed video frame will be available 1542 in fPostProcessedDecodedPicture and fDecodedData (fDecodedDataSizeInBytes 1543 will be set accordingly). 1544 1545 \returns B_OK video frame successfully deinterlaced and color converted. 1546 \returns B_NO_MEMORY Not enough memory available for correct operation. 1547 */ 1548 status_t 1549 AVCodecDecoder::_DeinterlaceAndColorConvertVideoFrame() 1550 { 1551 int displayWidth = fRawDecodedPicture->width; 1552 int displayHeight = fRawDecodedPicture->height; 1553 AVPicture deinterlacedPicture; 1554 bool useDeinterlacedPicture = false; 1555 1556 if (fRawDecodedPicture->interlaced_frame) { 1557 AVPicture rawPicture; 1558 rawPicture.data[0] = fRawDecodedPicture->data[0]; 1559 rawPicture.data[1] = fRawDecodedPicture->data[1]; 1560 rawPicture.data[2] = fRawDecodedPicture->data[2]; 1561 rawPicture.data[3] = fRawDecodedPicture->data[3]; 1562 rawPicture.linesize[0] = fRawDecodedPicture->linesize[0]; 1563 rawPicture.linesize[1] = fRawDecodedPicture->linesize[1]; 1564 rawPicture.linesize[2] = fRawDecodedPicture->linesize[2]; 1565 rawPicture.linesize[3] = fRawDecodedPicture->linesize[3]; 1566 1567 avpicture_alloc(&deinterlacedPicture, fContext->pix_fmt, displayWidth, 1568 displayHeight); 1569 1570 if (avpicture_deinterlace(&deinterlacedPicture, &rawPicture, 1571 fContext->pix_fmt, displayWidth, displayHeight) < 0) { 1572 TRACE("[v] avpicture_deinterlace() - error\n"); 1573 } else 1574 useDeinterlacedPicture = true; 1575 } 1576 1577 // Some decoders do not set pix_fmt until they have decoded 1 frame 1578 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1579 if (fSwsContext == NULL) { 1580 fSwsContext = sws_getContext(displayWidth, displayHeight, 1581 fContext->pix_fmt, displayWidth, displayHeight, 1582 colorspace_to_pixfmt(fOutputColorSpace), 1583 SWS_FAST_BILINEAR, NULL, NULL, NULL); 1584 } 1585 #else 1586 if (fFormatConversionFunc == NULL) { 1587 fFormatConversionFunc = resolve_colorspace(fOutputColorSpace, 1588 fContext->pix_fmt, displayWidth, displayHeight); 1589 } 1590 #endif 1591 1592 fDecodedDataSizeInBytes = fHeader.size_used; 1593 1594 if (fDecodedData == NULL) { 1595 const size_t kOptimalAlignmentForColorConversion = 32; 1596 posix_memalign(reinterpret_cast<void**>(&fDecodedData), 1597 kOptimalAlignmentForColorConversion, fDecodedDataSizeInBytes); 1598 } 1599 if (fDecodedData == NULL) 1600 return B_NO_MEMORY; 1601 1602 fPostProcessedDecodedPicture->data[0] = fDecodedData; 1603 fPostProcessedDecodedPicture->linesize[0] 1604 = fHeader.u.raw_video.bytes_per_row; 1605 1606 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1607 if (fSwsContext != NULL) { 1608 #else 1609 if (fFormatConversionFunc != NULL) { 1610 #endif 1611 if (useDeinterlacedPicture) { 1612 AVFrame deinterlacedFrame; 1613 deinterlacedFrame.data[0] = deinterlacedPicture.data[0]; 1614 deinterlacedFrame.data[1] = deinterlacedPicture.data[1]; 1615 deinterlacedFrame.data[2] = deinterlacedPicture.data[2]; 1616 deinterlacedFrame.data[3] = deinterlacedPicture.data[3]; 1617 deinterlacedFrame.linesize[0] 1618 = deinterlacedPicture.linesize[0]; 1619 deinterlacedFrame.linesize[1] 1620 = deinterlacedPicture.linesize[1]; 1621 deinterlacedFrame.linesize[2] 1622 = deinterlacedPicture.linesize[2]; 1623 deinterlacedFrame.linesize[3] 1624 = deinterlacedPicture.linesize[3]; 1625 1626 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1627 sws_scale(fSwsContext, deinterlacedFrame.data, 1628 deinterlacedFrame.linesize, 0, displayHeight, 1629 fPostProcessedDecodedPicture->data, 1630 fPostProcessedDecodedPicture->linesize); 1631 #else 1632 (*fFormatConversionFunc)(&deinterlacedFrame, 1633 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1634 #endif 1635 } else { 1636 #if USE_SWS_FOR_COLOR_SPACE_CONVERSION 1637 sws_scale(fSwsContext, fRawDecodedPicture->data, 1638 fRawDecodedPicture->linesize, 0, displayHeight, 1639 fPostProcessedDecodedPicture->data, 1640 fPostProcessedDecodedPicture->linesize); 1641 #else 1642 (*fFormatConversionFunc)(fRawDecodedPicture, 1643 fPostProcessedDecodedPicture, displayWidth, displayHeight); 1644 #endif 1645 } 1646 } 1647 1648 if (fRawDecodedPicture->interlaced_frame) 1649 avpicture_free(&deinterlacedPicture); 1650 1651 return B_OK; 1652 } 1653