I'm trying to mux some video data to a WebM file using FFmpeg. I specify a time_base
through an AVDictionary
(detailed below), but it appears that my specified time_base
values are ignored by the muxer. Instead, it always uses a time_base
of 1/1000
, and therefore an FPS of 1000
. My initialization code is below:
HRESULT WINAPI InitializeVideoEncoding(Encoder* encoder,
LPCSTR codec, LPCSTR outputContainer, LPCSTR* options, UINT optCount)
{
// Fill the options
Log("Loading options.");
for (UINT i = 0; i < optCount; ++i)
{
int opt = i * 2;
const char* key = options[opt];
const char* value = options[opt + 1];
Log("Adding option %s: %s", key, value);
if (av_dict_set(&encoder->options, key, value, 0) < 0)
{
Log("Failed to add item to dictionary: %s %s", key, value);
}
}
// Make sure the encoder options aren't null when they should have
// been filled.
if (!encoder->options && optCount > 0)
{
Log("Failed to initialize encoder options.");
return E_FAIL;
}
// Grab the buffer size early and remove it from the dict so we don't
// get complaints from FFmpeg
{
const char* frameBufKey = "frame_buf_size";
encoder->ioBufSize = 131072;
AVDictionaryEntry* e = av_dict_get(encoder->options,
frameBufKey,
NULL, 0);
if (e)
{
// Set the value and remove from the list.
encoder->ioBufSize = strtol(e->value, NULL, 10);
av_dict_set(&encoder->options, frameBufKey, NULL, 0);
}
}
// Create the output context
avformat_alloc_output_context2(&encoder->outputFormatContext, NULL, outputContainer, NULL);
if (!encoder->outputFormatContext)
{
Log("Couldn't create output format context.");
return E_FAIL;
}
encoder->outputFormat = encoder->outputFormatContext->oformat;
// Create the output stream
encoder->outputStream = avformat_new_stream(encoder->outputFormatContext, NULL);
if (!encoder->outputStream)
{
Log("Couldn't create output stream.");
return E_FAIL;
}
encoder->outputStream->id = encoder->outputFormatContext->nb_streams - 1;
// Find the codec
encoder->codec = avcodec_find_encoder_by_name(codec);
if (!encoder->codec)
{
Log("Couldn't find encoder.");
return E_FAIL;
}
// Create the encoding context
encoder->encodingContext = avcodec_alloc_context3(encoder->codec);
if (!encoder->encodingContext)
{
Log("Couldn't create encoding context.");
return E_FAIL;
}
// Set the basics
encoder->encodingContext->width = encoder->width;
encoder->encodingContext->height = encoder->height;
// Open the codec
int result = avcodec_open2(encoder->encodingContext, encoder->codec, &encoder->options);
if (result < 0)
{
LogFFmpegError(result, "Couldn't open codec.");
return E_FAIL;
}
if (av_dict_count(encoder->options) > 0)
{
// Dump the fields we didn't fill
char* dictEntryBuf;
av_dict_get_string(encoder->options, &dictEntryBuf, ':', ',');
Log("The following provided options were unused:\n%s", dictEntryBuf);
av_freep(&dictEntryBuf);
}
// Set some params afterwards
encoder->outputStream->time_base = encoder->encodingContext->time_base;
if (encoder->outputFormat->flags & AVFMT_GLOBALHEADER)
encoder->encodingContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Copy necessary information to the stream
result = avcodec_parameters_from_context(encoder->outputStream->codecpar,
encoder->encodingContext);
if (result < 0)
{
LogFFmpegError(result, "Couldn't copy stream parameters.");
return E_FAIL;
}
av_dump_format(encoder->outputFormatContext, 0, NULL, 1);
// Initialize IO callbacks
encoder->ioBuf = (LPBYTE)av_malloc(encoder->ioBufSize);
Log("Encoder IO buffer size: %d", encoder->ioBufSize);
AVIOContext* ioContext = avio_alloc_context(encoder->ioBuf,
(int)encoder->ioBufSize,
1,
encoder,
NULL,
WriteStreamCallback,
NULL);
encoder->outputFormatContext->pb = ioContext;
result = avformat_write_header(encoder->outputFormatContext, NULL);
if (result < 0)
{
LogFFmpegError(result, "Couldn't write header.");
return E_FAIL;
}
return S_OK;
}
You'll notice the time_base
is not manually specified. Instead, I'm using the built-in dictionary functionality so I can control these parameters without having to recompile the program. The values I'm pass in are below:
const char* params[] =
{
"frame_buf_size", "131072",
"b", "2000000",
"time_base", "1:15",
"pixel_format", "yuv420p",
"speed", "6",
"tile-columns", "4",
"frame-parallel", "1",
"threads", "8",
"static-thresh", "0",
"deadline", "realtime",
"lag-in-frames", "0",
"error-resilient", "1"
};
I did some investigating, and the time_base
of my output stream is 1/15
right up until avformat_write_header
is called. It appears that something inside this function call is changing the time base.
Now, I read in the FFmpeg mailing lists that WebM requires a time_base
of 1/1000
, and I believe this is why the WebM avformat_write_header implementation rewrites the value of time_base
for the stream. I may be mistaken, but the frame rate is tied to the time-base, and a frame rate of 1000 fps is simply too large for the video player I'm using to test this (specifically, a media extensions player in a web browser).
I'm aware that the timestamp of a packet is important, so below is the code I'm using to give each packet a timestamp:
// somewhere I create a frame
encoder->yuvFrame->pts = encoder->nextPts++;
// somewhere I actually write to the muxer:
av_packet_rescale_ts(packet, encoder->encodingContext->time_base,
encoder->outputStream->time_base);
In short, no matter what time_base
I specify, it appears the muxer will overwrite it with 1/1000
. Is this an issue with FFmpeg, or am I missing some initialization steps?