1 /* 2 * Copyright 2009, Stephan Amßus <superstippi@gmx.de> 3 * All rights reserved. Distributed under the terms of the MIT license. 4 */ 5 6 7 #include "AVCodecEncoder.h" 8 9 #include <new> 10 11 #include <stdio.h> 12 #include <string.h> 13 14 extern "C" { 15 #include "rational.h" 16 } 17 18 #include "gfx_util.h" 19 20 21 #undef TRACE 22 //#define TRACE_AV_CODEC_ENCODER 23 #ifdef TRACE_AV_CODEC_ENCODER 24 # define TRACE printf 25 # define TRACE_IO(a...) 26 #else 27 # define TRACE(a...) 28 # define TRACE_IO(a...) 29 #endif 30 31 32 static const size_t kDefaultChunkBufferSize = 2 * 1024 * 1024; 33 34 35 AVCodecEncoder::AVCodecEncoder(uint32 codecID, int bitRateScale) 36 : 37 Encoder(), 38 fBitRateScale(bitRateScale), 39 fCodec(NULL), 40 fContext(avcodec_alloc_context()), 41 fCodecInitStatus(CODEC_INIT_NEEDED), 42 43 fFrame(avcodec_alloc_frame()), 44 fSwsContext(NULL), 45 46 fFramesWritten(0), 47 48 fChunkBuffer(new(std::nothrow) uint8[kDefaultChunkBufferSize]) 49 { 50 TRACE("AVCodecEncoder::AVCodecEncoder()\n"); 51 52 fCodec = avcodec_find_encoder((enum CodecID)codecID); 53 TRACE(" found AVCodec for %lu: %p\n", codecID, fCodec); 54 55 memset(&fInputFormat, 0, sizeof(media_format)); 56 57 fAudioFifo = av_fifo_alloc(0); 58 59 fDstFrame.data[0] = NULL; 60 fDstFrame.data[1] = NULL; 61 fDstFrame.data[2] = NULL; 62 fDstFrame.data[3] = NULL; 63 64 fDstFrame.linesize[0] = 0; 65 fDstFrame.linesize[1] = 0; 66 fDstFrame.linesize[2] = 0; 67 fDstFrame.linesize[3] = 0; 68 69 // Initial parameters, so we know if the user changed them 70 fEncodeParameters.avg_field_size = 0; 71 fEncodeParameters.max_field_size = 0; 72 fEncodeParameters.quality = 1.0f; 73 } 74 75 76 AVCodecEncoder::~AVCodecEncoder() 77 { 78 TRACE("AVCodecEncoder::~AVCodecEncoder()\n"); 79 80 _CloseCodecIfNeeded(); 81 82 if (fSwsContext != NULL) 83 sws_freeContext(fSwsContext); 84 85 av_fifo_free(fAudioFifo); 86 87 avpicture_free(&fDstFrame); 88 // NOTE: Do not use avpicture_free() on fSrcFrame!! We fill the picture 89 // data on the file with the media buffer data passed to Encode(). 90 91 if (fFrame != NULL) { 92 fFrame->data[0] = NULL; 93 fFrame->data[1] = NULL; 94 fFrame->data[2] = NULL; 95 fFrame->data[3] = NULL; 96 97 fFrame->linesize[0] = 0; 98 fFrame->linesize[1] = 0; 99 fFrame->linesize[2] = 0; 100 fFrame->linesize[3] = 0; 101 free(fFrame); 102 } 103 104 free(fContext); 105 106 delete[] fChunkBuffer; 107 } 108 109 110 status_t 111 AVCodecEncoder::AcceptedFormat(const media_format* proposedInputFormat, 112 media_format* _acceptedInputFormat) 113 { 114 TRACE("AVCodecEncoder::AcceptedFormat(%p, %p)\n", proposedInputFormat, 115 _acceptedInputFormat); 116 117 if (proposedInputFormat == NULL) 118 return B_BAD_VALUE; 119 120 if (_acceptedInputFormat != NULL) { 121 memcpy(_acceptedInputFormat, proposedInputFormat, 122 sizeof(media_format)); 123 } 124 125 return B_OK; 126 } 127 128 129 status_t 130 AVCodecEncoder::SetUp(const media_format* inputFormat) 131 { 132 TRACE("AVCodecEncoder::SetUp()\n"); 133 134 if (fContext == NULL || fCodec == NULL) 135 return B_NO_INIT; 136 137 if (inputFormat == NULL) 138 return B_BAD_VALUE; 139 140 _CloseCodecIfNeeded(); 141 142 fInputFormat = *inputFormat; 143 fFramesWritten = 0; 144 145 return _Setup(); 146 } 147 148 149 status_t 150 AVCodecEncoder::GetEncodeParameters(encode_parameters* parameters) const 151 { 152 TRACE("AVCodecEncoder::GetEncodeParameters(%p)\n", parameters); 153 154 // TODO: Implement maintaining an automatically calculated bit_rate versus 155 // a user specified (via SetEncodeParameters()) bit_rate. At this point, the 156 // fContext->bit_rate may not yet have been specified (_Setup() was never 157 // called yet). So it cannot work like the code below, but in any case, it's 158 // showing how to convert between the values (Albeit untested). 159 // int avgBytesPerSecond = fContext->bit_rate / 8; 160 // int maxBytesPerSecond = (fContext->bit_rate 161 // + fContext->bit_rate_tolerance) / 8; 162 // 163 // if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 164 // fEncodeParameters.avg_field_size = (int32)(avgBytesPerSecond 165 // / fInputFormat.u.raw_audio.frame_rate); 166 // fEncodeParameters.max_field_size = (int32)(maxBytesPerSecond 167 // / fInputFormat.u.raw_audio.frame_rate); 168 // } else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 169 // fEncodeParameters.avg_field_size = (int32)(avgBytesPerSecond 170 // / fInputFormat.u.raw_video.field_rate); 171 // fEncodeParameters.max_field_size = (int32)(maxBytesPerSecond 172 // / fInputFormat.u.raw_video.field_rate); 173 // } 174 175 parameters->quality = fEncodeParameters.quality; 176 177 return B_OK; 178 } 179 180 181 status_t 182 AVCodecEncoder::SetEncodeParameters(encode_parameters* parameters) 183 { 184 TRACE("AVCodecEncoder::SetEncodeParameters(%p)\n", parameters); 185 186 if (fFramesWritten > 0) 187 return B_NOT_SUPPORTED; 188 189 fEncodeParameters.quality = parameters->quality; 190 TRACE(" quality: %.1f\n", parameters->quality); 191 192 // TODO: Auto-bit_rate versus user supplied. See above. 193 // int avgBytesPerSecond = 0; 194 // int maxBytesPerSecond = 0; 195 // 196 // if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 197 // avgBytesPerSecond = (int)(parameters->avg_field_size 198 // * fInputFormat.u.raw_audio.frame_rate); 199 // maxBytesPerSecond = (int)(parameters->max_field_size 200 // * fInputFormat.u.raw_audio.frame_rate); 201 // } else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 202 // avgBytesPerSecond = (int)(parameters->avg_field_size 203 // * fInputFormat.u.raw_video.field_rate); 204 // maxBytesPerSecond = (int)(parameters->max_field_size 205 // * fInputFormat.u.raw_video.field_rate); 206 // } 207 // 208 // if (maxBytesPerSecond < avgBytesPerSecond) 209 // maxBytesPerSecond = avgBytesPerSecond; 210 // 211 // // Reset these, so we can tell the difference between uninitialized 212 // // and initialized... 213 // if (avgBytesPerSecond > 0) { 214 // fContext->bit_rate = avgBytesPerSecond * 8; 215 // fContext->bit_rate_tolerance = (maxBytesPerSecond 216 // - avgBytesPerSecond) * 8; 217 // fBitRateControlledByUser = true; 218 // } 219 220 return _Setup(); 221 } 222 223 224 status_t 225 AVCodecEncoder::Encode(const void* buffer, int64 frameCount, 226 media_encode_info* info) 227 { 228 TRACE("AVCodecEncoder::Encode(%p, %lld, %p)\n", buffer, frameCount, info); 229 230 if (!_OpenCodecIfNeeded()) 231 return B_NO_INIT; 232 233 if (fInputFormat.type == B_MEDIA_RAW_AUDIO) 234 return _EncodeAudio(buffer, frameCount, info); 235 else if (fInputFormat.type == B_MEDIA_RAW_VIDEO) 236 return _EncodeVideo(buffer, frameCount, info); 237 else 238 return B_NO_INIT; 239 } 240 241 242 // #pragma mark - 243 244 245 status_t 246 AVCodecEncoder::_Setup() 247 { 248 TRACE("AVCodecEncoder::_Setup\n"); 249 250 if (fInputFormat.type == B_MEDIA_RAW_VIDEO) { 251 TRACE(" B_MEDIA_RAW_VIDEO\n"); 252 // frame rate 253 fContext->time_base.den = (int)fInputFormat.u.raw_video.field_rate; 254 fContext->time_base.num = 1; 255 // video size 256 fContext->width = fInputFormat.u.raw_video.display.line_width; 257 fContext->height = fInputFormat.u.raw_video.display.line_count; 258 // fContext->gop_size = 12; 259 // TODO: Fix pixel format or setup conversion method... 260 fContext->pix_fmt = PIX_FMT_YUV420P; 261 262 // TODO: Setup rate control: 263 // fContext->rate_emu = 0; 264 // fContext->rc_eq = NULL; 265 // fContext->rc_max_rate = 0; 266 // fContext->rc_min_rate = 0; 267 // TODO: Try to calculate a good bit rate... 268 int rawBitRate = (int)(fContext->width * fContext->height * 2 269 * fInputFormat.u.raw_video.field_rate) * 8; 270 int wantedBitRate = (int)(rawBitRate / fBitRateScale 271 * fEncodeParameters.quality); 272 TRACE(" rawBitRate: %d, wantedBitRate: %d (%.1f)\n", rawBitRate, 273 wantedBitRate, fEncodeParameters.quality); 274 // TODO: Support letting the user overwrite this via 275 // SetEncodeParameters(). See comments there... 276 fContext->bit_rate = wantedBitRate; 277 278 // Pixel aspect ratio 279 fContext->sample_aspect_ratio.num 280 = fInputFormat.u.raw_video.pixel_width_aspect; 281 fContext->sample_aspect_ratio.den 282 = fInputFormat.u.raw_video.pixel_height_aspect; 283 if (fContext->sample_aspect_ratio.num == 0 284 || fContext->sample_aspect_ratio.den == 0) { 285 av_reduce(&fContext->sample_aspect_ratio.num, 286 &fContext->sample_aspect_ratio.den, fContext->width, 287 fContext->height, 255); 288 } 289 290 // TODO: This should already happen in AcceptFormat() 291 if (fInputFormat.u.raw_video.display.bytes_per_row == 0) { 292 fInputFormat.u.raw_video.display.bytes_per_row 293 = fContext->width * 4; 294 } 295 296 fFrame->pts = 0; 297 298 // Allocate space for colorspace converted AVPicture 299 // TODO: Check allocations... 300 avpicture_alloc(&fDstFrame, fContext->pix_fmt, fContext->width, 301 fContext->height); 302 303 // Make the frame point to the data in the converted AVPicture 304 fFrame->data[0] = fDstFrame.data[0]; 305 fFrame->data[1] = fDstFrame.data[1]; 306 fFrame->data[2] = fDstFrame.data[2]; 307 fFrame->data[3] = fDstFrame.data[3]; 308 309 fFrame->linesize[0] = fDstFrame.linesize[0]; 310 fFrame->linesize[1] = fDstFrame.linesize[1]; 311 fFrame->linesize[2] = fDstFrame.linesize[2]; 312 fFrame->linesize[3] = fDstFrame.linesize[3]; 313 314 fSwsContext = sws_getContext(fContext->width, fContext->height, 315 colorspace_to_pixfmt(fInputFormat.u.raw_video.display.format), 316 fContext->width, fContext->height, 317 fContext->pix_fmt, SWS_FAST_BILINEAR, NULL, NULL, NULL); 318 319 } else if (fInputFormat.type == B_MEDIA_RAW_AUDIO) { 320 TRACE(" B_MEDIA_RAW_AUDIO\n"); 321 // frame rate 322 fContext->sample_rate = (int)fInputFormat.u.raw_audio.frame_rate; 323 // NOTE: From the output_example.c, it looks like we are not supposed 324 // to set this. 325 fContext->time_base.den = (int)fInputFormat.u.raw_audio.frame_rate; 326 fContext->time_base.num = 1; 327 // channels 328 fContext->channels = fInputFormat.u.raw_audio.channel_count; 329 switch (fInputFormat.u.raw_audio.format) { 330 case media_raw_audio_format::B_AUDIO_FLOAT: 331 fContext->sample_fmt = SAMPLE_FMT_FLT; 332 break; 333 case media_raw_audio_format::B_AUDIO_DOUBLE: 334 fContext->sample_fmt = SAMPLE_FMT_DBL; 335 break; 336 case media_raw_audio_format::B_AUDIO_INT: 337 fContext->sample_fmt = SAMPLE_FMT_S32; 338 break; 339 case media_raw_audio_format::B_AUDIO_SHORT: 340 fContext->sample_fmt = SAMPLE_FMT_S16; 341 break; 342 case media_raw_audio_format::B_AUDIO_UCHAR: 343 fContext->sample_fmt = SAMPLE_FMT_U8; 344 break; 345 346 case media_raw_audio_format::B_AUDIO_CHAR: 347 default: 348 return B_MEDIA_BAD_FORMAT; 349 break; 350 } 351 if (fInputFormat.u.raw_audio.channel_mask == 0) { 352 // guess the channel mask... 353 switch (fInputFormat.u.raw_audio.channel_count) { 354 default: 355 case 2: 356 fContext->channel_layout = CH_LAYOUT_STEREO; 357 break; 358 case 1: 359 fContext->channel_layout = CH_LAYOUT_MONO; 360 break; 361 case 3: 362 fContext->channel_layout = CH_LAYOUT_SURROUND; 363 break; 364 case 4: 365 fContext->channel_layout = CH_LAYOUT_QUAD; 366 break; 367 case 5: 368 fContext->channel_layout = CH_LAYOUT_5POINT0; 369 break; 370 case 6: 371 fContext->channel_layout = CH_LAYOUT_5POINT1; 372 break; 373 case 8: 374 fContext->channel_layout = CH_LAYOUT_7POINT1; 375 break; 376 case 10: 377 fContext->channel_layout = CH_LAYOUT_7POINT1_WIDE; 378 break; 379 } 380 } else { 381 // The bits match 1:1 for media_multi_channels and FFmpeg defines. 382 fContext->channel_layout = fInputFormat.u.raw_audio.channel_mask; 383 } 384 } else { 385 TRACE(" UNSUPPORTED MEDIA TYPE!\n"); 386 return B_NOT_SUPPORTED; 387 } 388 389 // Add some known fixes from the FFmpeg API example: 390 if (fContext->codec_id == CODEC_ID_MPEG2VIDEO) { 391 // Just for testing, we also add B frames */ 392 fContext->max_b_frames = 2; 393 } else if (fContext->codec_id == CODEC_ID_MPEG1VIDEO){ 394 // Needed to avoid using macroblocks in which some coeffs overflow. 395 // This does not happen with normal video, it just happens here as 396 // the motion of the chroma plane does not match the luma plane. 397 fContext->mb_decision = 2; 398 } 399 400 // Unfortunately, we may fail later, when we try to open the codec 401 // for real... but we need to delay this because we still allow 402 // parameter/quality changes. 403 return B_OK; 404 } 405 406 407 bool 408 AVCodecEncoder::_OpenCodecIfNeeded() 409 { 410 if (fCodecInitStatus == CODEC_INIT_DONE) 411 return true; 412 413 if (fCodecInitStatus == CODEC_INIT_FAILED) 414 return false; 415 416 // Open the codec 417 int result = avcodec_open(fContext, fCodec); 418 if (result >= 0) 419 fCodecInitStatus = CODEC_INIT_DONE; 420 else 421 fCodecInitStatus = CODEC_INIT_FAILED; 422 423 TRACE(" avcodec_open(): %d\n", result); 424 425 return fCodecInitStatus == CODEC_INIT_DONE; 426 427 } 428 429 430 void 431 AVCodecEncoder::_CloseCodecIfNeeded() 432 { 433 if (fCodecInitStatus == CODEC_INIT_DONE) { 434 avcodec_close(fContext); 435 fCodecInitStatus = CODEC_INIT_NEEDED; 436 } 437 } 438 439 440 static const int64 kNoPTSValue = 0x8000000000000000LL; 441 // NOTE: For some reasons, I have trouble with the avcodec.h define: 442 // #define AV_NOPTS_VALUE INT64_C(0x8000000000000000) 443 // INT64_C is not defined here. 444 445 status_t 446 AVCodecEncoder::_EncodeAudio(const void* _buffer, int64 frameCount, 447 media_encode_info* info) 448 { 449 TRACE("AVCodecEncoder::_EncodeAudio(%p, %lld, %p)\n", _buffer, frameCount, 450 info); 451 452 if (fChunkBuffer == NULL) 453 return B_NO_MEMORY; 454 455 status_t ret = B_OK; 456 457 const uint8* buffer = reinterpret_cast<const uint8*>(_buffer); 458 459 size_t inputSampleSize = fInputFormat.u.raw_audio.format 460 & media_raw_audio_format::B_AUDIO_SIZE_MASK; 461 size_t inputFrameSize = inputSampleSize 462 * fInputFormat.u.raw_audio.channel_count; 463 464 size_t bufferSize = frameCount * inputFrameSize; 465 bufferSize = min_c(bufferSize, kDefaultChunkBufferSize); 466 467 if (fContext->frame_size > 1) { 468 // Encoded audio. Things work differently from raw audio. We need 469 // the fAudioFifo to pipe data. 470 if (av_fifo_realloc2(fAudioFifo, 471 av_fifo_size(fAudioFifo) + bufferSize) < 0) { 472 TRACE(" av_fifo_realloc2() failed\n"); 473 return B_NO_MEMORY; 474 } 475 av_fifo_generic_write(fAudioFifo, const_cast<uint8*>(buffer), 476 bufferSize, NULL); 477 478 int frameBytes = fContext->frame_size * inputFrameSize; 479 uint8* tempBuffer = new(std::nothrow) uint8[frameBytes]; 480 if (tempBuffer == NULL) 481 return B_NO_MEMORY; 482 483 // Encode as many chunks as can be read from the FIFO. 484 while (av_fifo_size(fAudioFifo) >= frameBytes) { 485 av_fifo_generic_read(fAudioFifo, tempBuffer, frameBytes, NULL); 486 487 ret = _EncodeAudio(tempBuffer, frameBytes, fContext->frame_size, 488 info); 489 if (ret != B_OK) 490 break; 491 } 492 } else { 493 // Raw audio. The number of bytes returned from avcodec_encode_audio() 494 // is always the same as the number of input bytes. 495 return _EncodeAudio(buffer, bufferSize, frameCount, 496 info); 497 } 498 499 return ret; 500 } 501 502 503 status_t 504 AVCodecEncoder::_EncodeAudio(const uint8* buffer, size_t bufferSize, 505 int64 frameCount, media_encode_info* info) 506 { 507 // Encode one audio chunk/frame. The bufferSize has already been adapted 508 // to the needed size for fContext->frame_size, or we are writing raw 509 // audio. 510 int usedBytes = avcodec_encode_audio(fContext, fChunkBuffer, 511 bufferSize, reinterpret_cast<const short*>(buffer)); 512 513 if (usedBytes < 0) { 514 TRACE(" avcodec_encode_video() failed: %d\n", usedBytes); 515 return B_ERROR; 516 } 517 518 // Maybe we need to use this PTS to calculate start_time: 519 if (fContext->coded_frame->pts != kNoPTSValue) { 520 TRACE(" codec frame PTS: %lld (codec time_base: %d/%d)\n", 521 fContext->coded_frame->pts, fContext->time_base.num, 522 fContext->time_base.den); 523 } else { 524 TRACE(" codec frame PTS: N/A (codec time_base: %d/%d)\n", 525 fContext->time_base.num, fContext->time_base.den); 526 } 527 528 // Setup media_encode_info, most important is the time stamp. 529 info->start_time = (bigtime_t)(fFramesWritten * 1000000LL 530 / fInputFormat.u.raw_audio.frame_rate); 531 532 // Write the chunk 533 status_t ret = WriteChunk(fChunkBuffer, usedBytes, info); 534 if (ret != B_OK) { 535 TRACE(" error writing chunk: %s\n", strerror(ret)); 536 return ret; 537 } 538 539 fFramesWritten += frameCount; 540 541 return B_OK; 542 } 543 544 545 status_t 546 AVCodecEncoder::_EncodeVideo(const void* buffer, int64 frameCount, 547 media_encode_info* info) 548 { 549 TRACE_IO("AVCodecEncoder::_EncodeVideo(%p, %lld, %p)\n", buffer, frameCount, 550 info); 551 552 if (fChunkBuffer == NULL) 553 return B_NO_MEMORY; 554 555 status_t ret = B_OK; 556 557 while (frameCount > 0) { 558 size_t bpr = fInputFormat.u.raw_video.display.bytes_per_row; 559 size_t bufferSize = fInputFormat.u.raw_video.display.line_count * bpr; 560 561 // We should always get chunky bitmaps, so this code should be safe. 562 fSrcFrame.data[0] = (uint8_t*)buffer; 563 fSrcFrame.linesize[0] = bpr; 564 565 // Run the pixel format conversion 566 sws_scale(fSwsContext, fSrcFrame.data, fSrcFrame.linesize, 0, 567 fInputFormat.u.raw_video.display.line_count, fDstFrame.data, 568 fDstFrame.linesize); 569 570 // TODO: Look into this... avcodec.h says we need to set it. 571 fFrame->pts++; 572 573 // Encode one video chunk/frame. 574 int usedBytes = avcodec_encode_video(fContext, fChunkBuffer, 575 kDefaultChunkBufferSize, fFrame); 576 577 if (usedBytes < 0) { 578 TRACE(" avcodec_encode_video() failed: %d\n", usedBytes); 579 return B_ERROR; 580 } 581 582 // Maybe we need to use this PTS to calculate start_time: 583 if (fContext->coded_frame->pts != kNoPTSValue) { 584 TRACE(" codec frame PTS: %lld (codec time_base: %d/%d)\n", 585 fContext->coded_frame->pts, fContext->time_base.num, 586 fContext->time_base.den); 587 } else { 588 TRACE(" codec frame PTS: N/A (codec time_base: %d/%d)\n", 589 fContext->time_base.num, fContext->time_base.den); 590 } 591 592 // Setup media_encode_info, most important is the time stamp. 593 info->start_time = (bigtime_t)(fFramesWritten * 1000000LL 594 / fInputFormat.u.raw_video.field_rate); 595 596 // Write the chunk 597 ret = WriteChunk(fChunkBuffer, usedBytes, info); 598 if (ret != B_OK) { 599 TRACE(" error writing chunk: %s\n", strerror(ret)); 600 break; 601 } 602 603 // Skip to the next frame (but usually, there is only one to encode 604 // for video). 605 frameCount--; 606 fFramesWritten++; 607 buffer = (const void*)((const uint8*)buffer + bufferSize); 608 } 609 610 return ret; 611 } 612 613