1 /* 2 * Copyright 2009-2010, Stephan Amßus <superstippi@gmx.de> 3 * All rights reserved. Distributed under the terms of the MIT license. 4 */ 5 6 7 #include "AVCodecEncoder.h" 8 9 #include <new> 10 11 #include <stdio.h> 12 #include <string.h> 13 14 extern "C" { 15 #include "rational.h" 16 } 17 18 #include "EncoderTable.h" 19 #include "gfx_util.h" 20 21 22 #undef TRACE 23 //#define TRACE_AV_CODEC_ENCODER 24 #ifdef TRACE_AV_CODEC_ENCODER 25 # define TRACE printf 26 # define TRACE_IO(a...) 27 #else 28 # define TRACE(a...) 29 # define TRACE_IO(a...) 30 #endif 31 32 33 static const size_t kDefaultChunkBufferSize = 2 * 1024 * 1024; 34 35 36 AVCodecEncoder::AVCodecEncoder(uint32 codecID, int bitRateScale) 37 : 38 Encoder(), 39 fBitRateScale(bitRateScale), 40 fCodecID((enum CodecID)codecID), 41 fCodec(NULL), 42 fContext(avcodec_alloc_context()), 43 fCodecInitStatus(CODEC_INIT_NEEDED), 44 45 fFrame(avcodec_alloc_frame()), 46 fSwsContext(NULL), 47 48 fFramesWritten(0), 49 50 fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize]) 51 { 52 TRACE("AVCodecEncoder::AVCodecEncoder()\n"); 53 54 if (fCodecID > 0) { 55 fCodec = avcodec_find_encoder(fCodecID); 56 TRACE(" found AVCodec for %u: %p\n", fCodecID, fCodec); 57 } 58 59 memset(&fInputFormat, 0, sizeof(media_format)); 60 61 fAudioFifo = av_fifo_alloc(0); 62 63 fDstFrame.data[0] = NULL; 64 fDstFrame.data[1] = NULL; 65 fDstFrame.data[2] = NULL; 66 fDstFrame.data[3] = NULL; 67 68 fDstFrame.linesize[0] = 0; 69 fDstFrame.linesize[1] = 0; 70 fDstFrame.linesize[2] = 0; 71 fDstFrame.linesize[3] = 0; 72 73 // Initial parameters, so we know if the user changed them 74 fEncodeParameters.avg_field_size = 0; 75 fEncodeParameters.max_field_size = 0; 76 fEncodeParameters.quality = 1.0f; 77 } 78 79 80 AVCodecEncoder::~AVCodecEncoder() 81 { 82 TRACE("AVCodecEncoder::~AVCodecEncoder()\n"); 83 84 _CloseCodecIfNeeded(); 85 86 if (fSwsContext != NULL) 87 sws_freeContext(fSwsContext); 88 89 av_fifo_free(fAudioFifo); 90 91 avpicture_free(&fDstFrame); 92 // NOTE: Do not use avpicture_free() on fSrcFrame!! We fill the picture 93 // data on the fly with the media buffer data passed to Encode(). 94 95 if (fFrame != NULL) { 96 fFrame->data[0] = NULL; 97 fFrame->data[1] = NULL; 98 fFrame->data[2] = NULL; 99 fFrame->data[3] = NULL; 100 101 fFrame->linesize[0] = 0; 102 fFrame->linesize[1] = 0; 103 fFrame->linesize[2] = 0; 104 fFrame->linesize[3] = 0; 105 free(fFrame); 106 } 107 108 free(fContext); 109 110 delete[] fChunkBuffer; 111 } 112 113 114 status_t 115 AVCodecEncoder::AcceptedFormat(const media_format* proposedInputFormat, 116 media_format* _acceptedInputFormat) 117 { 118 TRACE("AVCodecEncoder::AcceptedFormat(%p, %p)\n", proposedInputFormat, 119 _acceptedInputFormat); 120 121 if (proposedInputFormat == NULL) 122 return B_BAD_VALUE; 123 124 if (_acceptedInputFormat != NULL) { 125 memcpy(_acceptedInputFormat, proposedInputFormat, 126 sizeof(media_format)); 127 } 128 129 return B_OK; 130 } 131 132 133 status_t 134 AVCodecEncoder::SetUp(const media_format* inputFormat) 135 { 136 TRACE("AVCodecEncoder::SetUp()\n"); 137 138 if (fContext == NULL) 139 return B_NO_INIT; 140 141 if (inputFormat == NULL) 142 return B_BAD_VALUE; 143 144 // Codec IDs for raw-formats may need to be figured out here. 145 if (fCodec == NULL && fCodecID == CODEC_ID_NONE) { 146 fCodecID = raw_audio_codec_id_for(*inputFormat); 147 if (fCodecID != CODEC_ID_NONE) 148 fCodec = avcodec_find_encoder(fCodecID); 149 } 150 if (fCodec == NULL) { 151 TRACE(" encoder not found!\n"); 152 return B_NO_INIT; 153 } 154 155 _CloseCodecIfNeeded(); 156 157 fInputFormat = *inputFormat; 158 fFramesWritten = 0; 159 160 return _Setup(); 161 } 162 163 164 status_t 165 AVCodecEncoder::GetEncodeParameters(encode_parameters* parameters) const 166 { 167 TRACE("AVCodecEncoder::GetEncodeParameters(%p)\n", parameters); 168 169 // TODO: Implement maintaining an automatically calculated bit_rate versus 170 // a user specified (via SetEncodeParameters()) bit_rate. At this point, the 171 // fContext->bit_rate may not yet have been specified (_Setup() was never 172 // called yet). So it cannot work like the code below, but in any case, it's 173 // showing how to convert between the values (albeit untested). 174 // int avgBytesPerSecond = fContext->bit_rate / 8; 175 // int maxBytesPerSecond = (fContext->bit_rate 176 // + fContext->bit_rate_tolerance) / 8; 177 // 178 // if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 179 // fEncodeParameters.avg_field_size = (int32)(avgBytesPerSecond 180 // / fInputFormat.u.raw_audio.frame_rate); 181 // fEncodeParameters.max_field_size = (int32)(maxBytesPerSecond 182 // / fInputFormat.u.raw_audio.frame_rate); 183 // } else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 184 // fEncodeParameters.avg_field_size = (int32)(avgBytesPerSecond 185 // / fInputFormat.u.raw_video.field_rate); 186 // fEncodeParameters.max_field_size = (int32)(maxBytesPerSecond 187 // / fInputFormat.u.raw_video.field_rate); 188 // } 189 190 parameters->quality = fEncodeParameters.quality; 191 192 return B_OK; 193 } 194 195 196 status_t 197 AVCodecEncoder::SetEncodeParameters(encode_parameters* parameters) 198 { 199 TRACE("AVCodecEncoder::SetEncodeParameters(%p)\n", parameters); 200 201 if (fFramesWritten > 0) 202 return B_NOT_SUPPORTED; 203 204 fEncodeParameters.quality = parameters->quality; 205 TRACE(" quality: %.1f\n", parameters->quality); 206 207 // TODO: Auto-bit_rate versus user supplied. See above. 208 // int avgBytesPerSecond = 0; 209 // int maxBytesPerSecond = 0; 210 // 211 // if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 212 // avgBytesPerSecond = (int)(parameters->avg_field_size 213 // * fInputFormat.u.raw_audio.frame_rate); 214 // maxBytesPerSecond = (int)(parameters->max_field_size 215 // * fInputFormat.u.raw_audio.frame_rate); 216 // } else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 217 // avgBytesPerSecond = (int)(parameters->avg_field_size 218 // * fInputFormat.u.raw_video.field_rate); 219 // maxBytesPerSecond = (int)(parameters->max_field_size 220 // * fInputFormat.u.raw_video.field_rate); 221 // } 222 // 223 // if (maxBytesPerSecond < avgBytesPerSecond) 224 // maxBytesPerSecond = avgBytesPerSecond; 225 // 226 // // Reset these, so we can tell the difference between uninitialized 227 // // and initialized... 228 // if (avgBytesPerSecond > 0) { 229 // fContext->bit_rate = avgBytesPerSecond * 8; 230 // fContext->bit_rate_tolerance = (maxBytesPerSecond 231 // - avgBytesPerSecond) * 8; 232 // fBitRateControlledByUser = true; 233 // } 234 235 return _Setup(); 236 } 237 238 239 status_t 240 AVCodecEncoder::Encode(const void* buffer, int64 frameCount, 241 media_encode_info* info) 242 { 243 TRACE("AVCodecEncoder::Encode(%p, %lld, %p)\n", buffer, frameCount, info); 244 245 if (!_OpenCodecIfNeeded()) 246 return B_NO_INIT; 247 248 if (fInputFormat.type == B_MEDIA_RAW_AUDIO) 249 return _EncodeAudio(buffer, frameCount, info); 250 else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) 251 return _EncodeVideo(buffer, frameCount, info); 252 else 253 return B_NO_INIT; 254 } 255 256 257 // #pragma mark - 258 259 260 status_t 261 AVCodecEncoder::_Setup() 262 { 263 TRACE("AVCodecEncoder::_Setup\n"); 264 265 if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 266 TRACE(" B_MEDIA_RAW_VIDEO\n"); 267 // frame rate 268 fContext->time_base.den = (int)fInputFormat.u.raw_video.field_rate; 269 fContext->time_base.num = 1; 270 // video size 271 fContext->width = fInputFormat.u.raw_video.display.line_width; 272 fContext->height = fInputFormat.u.raw_video.display.line_count; 273 // fContext->gop_size = 12; 274 // TODO: Fix pixel format or setup conversion method... 275 fContext->pix_fmt = PIX_FMT_YUV420P; 276 277 // TODO: Setup rate control: 278 // fContext->rate_emu = 0; 279 // fContext->rc_eq = NULL; 280 // fContext->rc_max_rate = 0; 281 // fContext->rc_min_rate = 0; 282 // TODO: Try to calculate a good bit rate... 283 int rawBitRate = (int)(fContext->width * fContext->height * 2 284 * fInputFormat.u.raw_video.field_rate) * 8; 285 int wantedBitRate = (int)(rawBitRate / fBitRateScale 286 * fEncodeParameters.quality); 287 TRACE(" rawBitRate: %d, wantedBitRate: %d (%.1f)\n", rawBitRate, 288 wantedBitRate, fEncodeParameters.quality); 289 // TODO: Support letting the user overwrite this via 290 // SetEncodeParameters(). See comments there... 291 fContext->bit_rate = wantedBitRate; 292 293 // Pixel aspect ratio 294 fContext->sample_aspect_ratio.num 295 = fInputFormat.u.raw_video.pixel_width_aspect; 296 fContext->sample_aspect_ratio.den 297 = fInputFormat.u.raw_video.pixel_height_aspect; 298 if (fContext->sample_aspect_ratio.num == 0 299 || fContext->sample_aspect_ratio.den == 0) { 300 av_reduce(&fContext->sample_aspect_ratio.num, 301 &fContext->sample_aspect_ratio.den, fContext->width, 302 fContext->height, 255); 303 } 304 305 // TODO: This should already happen in AcceptFormat() 306 if (fInputFormat.u.raw_video.display.bytes_per_row == 0) { 307 fInputFormat.u.raw_video.display.bytes_per_row 308 = fContext->width * 4; 309 } 310 311 fFrame->pts = 0; 312 313 // Allocate space for colorspace converted AVPicture 314 // TODO: Check allocations... 315 avpicture_alloc(&fDstFrame, fContext->pix_fmt, fContext->width, 316 fContext->height); 317 318 // Make the frame point to the data in the converted AVPicture 319 fFrame->data[0] = fDstFrame.data[0]; 320 fFrame->data[1] = fDstFrame.data[1]; 321 fFrame->data[2] = fDstFrame.data[2]; 322 fFrame->data[3] = fDstFrame.data[3]; 323 324 fFrame->linesize[0] = fDstFrame.linesize[0]; 325 fFrame->linesize[1] = fDstFrame.linesize[1]; 326 fFrame->linesize[2] = fDstFrame.linesize[2]; 327 fFrame->linesize[3] = fDstFrame.linesize[3]; 328 329 fSwsContext = sws_getContext(fContext->width, fContext->height, 330 colorspace_to_pixfmt(fInputFormat.u.raw_video.display.format), 331 fContext->width, fContext->height, 332 fContext->pix_fmt, SWS_FAST_BILINEAR, NULL, NULL, NULL); 333 334 } else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 335 TRACE(" B_MEDIA_RAW_AUDIO\n"); 336 // frame rate 337 fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate; 338 // NOTE: From the output_example.c, it looks like we are not supposed 339 // to set this. 340 fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate; 341 fContext->time_base.num = 1; 342 // channels 343 fContext->channels = fInputFormat.u.raw_audio.channel_count; 344 switch (fInputFormat.u.raw_audio.format) { 345 case media_raw_audio_format::B_AUDIO_FLOAT: 346 fContext->sample_fmt = SAMPLE_FMT_FLT; 347 break; 348 case media_raw_audio_format::B_AUDIO_DOUBLE: 349 fContext->sample_fmt = SAMPLE_FMT_DBL; 350 break; 351 case media_raw_audio_format::B_AUDIO_INT: 352 fContext->sample_fmt = SAMPLE_FMT_S32; 353 break; 354 case media_raw_audio_format::B_AUDIO_SHORT: 355 fContext->sample_fmt = SAMPLE_FMT_S16; 356 break; 357 case media_raw_audio_format::B_AUDIO_UCHAR: 358 fContext->sample_fmt = SAMPLE_FMT_U8; 359 break; 360 361 case media_raw_audio_format::B_AUDIO_CHAR: 362 default: 363 return B_MEDIA_BAD_FORMAT; 364 break; 365 } 366 if (fInputFormat.u.raw_audio.channel_mask == 0) { 367 // guess the channel mask... 368 switch (fInputFormat.u.raw_audio.channel_count) { 369 default: 370 case 2: 371 fContext->channel_layout = CH_LAYOUT_STEREO; 372 break; 373 case 1: 374 fContext->channel_layout = CH_LAYOUT_MONO; 375 break; 376 case 3: 377 fContext->channel_layout = CH_LAYOUT_SURROUND; 378 break; 379 case 4: 380 fContext->channel_layout = CH_LAYOUT_QUAD; 381 break; 382 case 5: 383 fContext->channel_layout = CH_LAYOUT_5POINT0; 384 break; 385 case 6: 386 fContext->channel_layout = CH_LAYOUT_5POINT1; 387 break; 388 case 8: 389 fContext->channel_layout = CH_LAYOUT_7POINT1; 390 break; 391 case 10: 392 fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE; 393 break; 394 } 395 } else { 396 // The bits match 1:1 for media_multi_channels and FFmpeg defines. 397 fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask; 398 } 399 } else { 400 TRACE(" UNSUPPORTED MEDIA TYPE!\n"); 401 return B_NOT_SUPPORTED; 402 } 403 404 // Add some known fixes from the FFmpeg API example: 405 if (fContext->codec_id == CODEC_ID_MPEG2VIDEO) { 406 // Just for testing, we also add B frames */ 407 fContext->max_b_frames = 2; 408 } else if (fContext->codec_id == CODEC_ID_MPEG1VIDEO){ 409 // Needed to avoid using macroblocks in which some coeffs overflow. 410 // This does not happen with normal video, it just happens here as 411 // the motion of the chroma plane does not match the luma plane. 412 fContext->mb_decision = 2; 413 } 414 415 // Unfortunately, we may fail later, when we try to open the codec 416 // for real... but we need to delay this because we still allow 417 // parameter/quality changes. 418 return B_OK; 419 } 420 421 422 bool 423 AVCodecEncoder::_OpenCodecIfNeeded() 424 { 425 if (fCodecInitStatus == CODEC_INIT_DONE) 426 return true; 427 428 if (fCodecInitStatus == CODEC_INIT_FAILED) 429 return false; 430 431 // Open the codec 432 int result = avcodec_open(fContext, fCodec); 433 if (result >= 0) 434 fCodecInitStatus = CODEC_INIT_DONE; 435 else 436 fCodecInitStatus = CODEC_INIT_FAILED; 437 438 TRACE(" avcodec_open(): %d\n", result); 439 440 return fCodecInitStatus == CODEC_INIT_DONE; 441 442 } 443 444 445 void 446 AVCodecEncoder::_CloseCodecIfNeeded() 447 { 448 if (fCodecInitStatus == CODEC_INIT_DONE) { 449 avcodec_close(fContext); 450 fCodecInitStatus = CODEC_INIT_NEEDED; 451 } 452 } 453 454 455 static const int64 kNoPTSValue = 0x8000000000000000LL; 456 // NOTE: For some reasons, I have trouble with the avcodec.h define: 457 // #define AV_NOPTS_VALUE INT64_C(0x8000000000000000) 458 // INT64_C is not defined here. 459 460 status_t 461 AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount, 462 media_encode_info* info) 463 { 464 TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount, 465 info); 466 467 if (fChunkBuffer == NULL) 468 return B_NO_MEMORY; 469 470 status_t ret = B_OK; 471 472 const uint8* buffer = reinterpret_cast<const uint8*>(_buffer); 473 474 size_t inputSampleSize = fInputFormat.u.raw_audio.format 475 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 476 size_t inputFrameSize = inputSampleSize 477 * fInputFormat.u.raw_audio.channel_count; 478 479 size_t bufferSize = frameCount * inputFrameSize; 480 bufferSize = min_c(bufferSize, kDefaultChunkBufferSize); 481 482 if (fContext->frame_size > 1) { 483 // Encoded audio. Things work differently from raw audio. We need 484 // the fAudioFifo to pipe data. 485 if (av_fifo_realloc2(fAudioFifo, 486 av_fifo_size(fAudioFifo) + bufferSize) < 0) { 487 TRACE(" av_fifo_realloc2() failed\n"); 488 return B_NO_MEMORY; 489 } 490 av_fifo_generic_write(fAudioFifo, const_cast<uint8*>(buffer), 491 bufferSize, NULL); 492 493 int frameBytes = fContext->frame_size * inputFrameSize; 494 uint8* tempBuffer = new(std::nothrow) uint8[frameBytes]; 495 if (tempBuffer == NULL) 496 return B_NO_MEMORY; 497 498 // Encode as many chunks as can be read from the FIFO. 499 while (av_fifo_size(fAudioFifo) >= frameBytes) { 500 av_fifo_generic_read(fAudioFifo, tempBuffer, frameBytes, NULL); 501 502 ret = _EncodeAudio(tempBuffer, frameBytes, fContext->frame_size, 503 info); 504 if (ret != B_OK) 505 break; 506 } 507 508 delete[] tempBuffer; 509 } else { 510 // Raw audio. The number of bytes returned from avcodec_encode_audio() 511 // is always the same as the number of input bytes. 512 return _EncodeAudio(buffer, bufferSize, frameCount, 513 info); 514 } 515 516 return ret; 517 } 518 519 520 status_t 521 AVCodecEncoder::_EncodeAudio(const uint8* buffer, size_t bufferSize, 522 int64 frameCount, media_encode_info* info) 523 { 524 // Encode one audio chunk/frame. The bufferSize has already been adapted 525 // to the needed size for fContext->frame_size, or we are writing raw 526 // audio. 527 int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer, 528 bufferSize, reinterpret_cast<const short*>(buffer)); 529 530 if (usedBytes < 0) { 531 TRACE(" avcodec_encode_video() failed: %d\n", usedBytes); 532 return B_ERROR; 533 } 534 if (usedBytes == 0) 535 return B_OK; 536 537 // Maybe we need to use this PTS to calculate start_time: 538 if (fContext->coded_frame->pts != kNoPTSValue) { 539 TRACE(" codec frame PTS: %lld (codec time_base: %d/%d)\n", 540 fContext->coded_frame->pts, fContext->time_base.num, 541 fContext->time_base.den); 542 } else { 543 TRACE(" codec frame PTS: N/A (codec time_base: %d/%d)\n", 544 fContext->time_base.num, fContext->time_base.den); 545 } 546 547 // Setup media_encode_info, most important is the time stamp. 548 info->start_time = (bigtime_t)(fFramesWritten * 1000000LL 549 / fInputFormat.u.raw_audio.frame_rate); 550 551 // Write the chunk 552 status_t ret = WriteChunk(fChunkBuffer, usedBytes, info); 553 if (ret != B_OK) { 554 TRACE(" error writing chunk: %s\n", strerror(ret)); 555 return ret; 556 } 557 558 fFramesWritten += frameCount; 559 560 return B_OK; 561 } 562 563 564 status_t 565 AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount, 566 media_encode_info* info) 567 { 568 TRACE_IO("AVCodecEncoder::_EncodeVideo(%p, %lld, %p)\n", buffer, frameCount, 569 info); 570 571 if (fChunkBuffer == NULL) 572 return B_NO_MEMORY; 573 574 status_t ret = B_OK; 575 576 while (frameCount > 0) { 577 size_t bpr = fInputFormat.u.raw_video.display.bytes_per_row; 578 size_t bufferSize = fInputFormat.u.raw_video.display.line_count * bpr; 579 580 // We should always get chunky bitmaps, so this code should be safe. 581 fSrcFrame.data[0] = (uint8_t*)buffer; 582 fSrcFrame.linesize[0] = bpr; 583 584 // Run the pixel format conversion 585 sws_scale(fSwsContext, fSrcFrame.data, fSrcFrame.linesize, 0, 586 fInputFormat.u.raw_video.display.line_count, fDstFrame.data, 587 fDstFrame.linesize); 588 589 // TODO: Look into this... avcodec.h says we need to set it. 590 fFrame->pts++; 591 592 // Encode one video chunk/frame. 593 int usedBytes = avcodec_encode_video(fContext, fChunkBuffer, 594 kDefaultChunkBufferSize, fFrame); 595 596 if (usedBytes < 0) { 597 TRACE(" avcodec_encode_video() failed: %d\n", usedBytes); 598 return B_ERROR; 599 } 600 601 // Maybe we need to use this PTS to calculate start_time: 602 if (fContext->coded_frame->pts != kNoPTSValue) { 603 TRACE(" codec frame PTS: %lld (codec time_base: %d/%d)\n", 604 fContext->coded_frame->pts, fContext->time_base.num, 605 fContext->time_base.den); 606 } else { 607 TRACE(" codec frame PTS: N/A (codec time_base: %d/%d)\n", 608 fContext->time_base.num, fContext->time_base.den); 609 } 610 611 // Setup media_encode_info, most important is the time stamp. 612 info->start_time = (bigtime_t)(fFramesWritten * 1000000LL 613 / fInputFormat.u.raw_video.field_rate); 614 615 // Write the chunk 616 ret = WriteChunk(fChunkBuffer, usedBytes, info); 617 if (ret != B_OK) { 618 TRACE(" error writing chunk: %s\n", strerror(ret)); 619 break; 620 } 621 622 // Skip to the next frame (but usually, there is only one to encode 623 // for video). 624 frameCount--; 625 fFramesWritten++; 626 buffer = (const void*)((const uint8*)buffer + bufferSize); 627 } 628 629 return ret; 630 } 631 632