Im having some hard time figuring out where to find about this..
Im building a simple recorder to learn about this video compression universe and Im facing some weird behaviors..
Before all I need to explain the scenario...
Its very simple... everytime I call av_read_frame( input_context, input_packet ) I save the pts into the last_pts variable...
So...
Whats bothering me is the fact that about 10% of my calls to av_read_frame I get input_packet.pts > last_pts
Resulting in a error message from the encoder when I try to do it... Having it in mind I decided to just drop those frames when it happens....
I think it is not a good idea to just drop frames because if I get them, its needed somehow...
So... what to do when last_pts > current_pts ?
I will paste my test code that Im using capturing the video from webcam and saving to mp4 file with libx264 encoder
#include <QCoreApplication>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavutil/avutil.h>
#include <libavdevice/avdevice.h>
}
#include <QTime>
#include <QThread>
#include <QDebug>
#define SM_DEBUG
static const double max_fps = 30;
static const double min_loop_duration = 1000 / max_fps;
static const double max_duration = 5; // in seconds
static void sleep_if_needed(const int &elapsed) {
int sleep_duration = min_loop_duration - elapsed;
if (sleep_duration > 0) {
QThread::msleep(sleep_duration);
}
}
#ifdef SM_DEBUG
static void log_packet(const AVPacket *pkt,
const AVRational &time_base,
int is_input=0)
{
qDebug() << ((is_input) ? QString(">>") : QString("<<")) << "Size:" << QString::number(pkt->size) <<
"pts:" << QString::number(pkt->pts) <<
"pts_time:" << QString::number(av_q2d(time_base) * pkt->pts) <<
"dts:" << QString::number(pkt->dts) <<
"dts_time:" << QString::number(av_q2d(time_base) * pkt->dts);
}
#endif
int main()
{
int input_w, input_h, output_w = 640, output_h = 480;
av_register_all();
avdevice_register_all();
avcodec_register_all();
#ifdef SM_DEBUG
av_log_set_level(AV_LOG_DEBUG);
#else
av_log_set_level(AV_LOG_ERROR);
#endif
AVFormatContext *ic;
AVFormatContext *oc;
AVInputFormat *ifmt;
AVDictionary *opts = 0;
AVCodecContext* dec_ctx;
AVCodecContext* enc_ctx;
AVCodec *dec;
AVCodec *enc;
AVStream* ist;
AVStream* ost;
ifmt = av_find_input_format("v4l2");
av_dict_set(&opts, "tune", "zerolatency", AV_DICT_APPEND);
ic = avformat_alloc_context();
ic->flags |= AVFMT_FLAG_NONBLOCK;
avformat_open_input(&ic, "/dev/video0", ifmt, &opts);
avformat_find_stream_info(ic, NULL);
av_dump_format(ic, 0, ic->filename, 0);
AVFrame *frame;
AVFrame *tmp_frame;
ist = ic->streams[0];
dec_ctx = ist->codec;
input_w = dec_ctx->width;
input_h = dec_ctx->height;
dec_ctx->flags |= CODEC_FLAG_LOW_DELAY;
dec = avcodec_find_decoder(dec_ctx->codec_id);
av_format_set_video_codec(ic, dec);
avcodec_open2(dec_ctx, dec, NULL);
// output
avformat_alloc_output_context2(&oc, NULL, "MP4", "/home/poste9/grava.mp4");
enc = avcodec_find_encoder(AV_CODEC_ID_H264);
ost = avformat_new_stream(oc, enc);
enc_ctx = ost->codec;
enc_ctx->codec_id = AV_CODEC_ID_H264;
enc_ctx->width = output_w;
enc_ctx->height = output_h;
ost->time_base.num = ist->time_base.num;
ost->time_base.den = ist->time_base.den;
enc_ctx->time_base = ost->time_base;
enc_ctx->gop_size = 250;
enc_ctx->keyint_min = 25;
enc_ctx->qmax = 51;
enc_ctx->qmin = 30;
enc_ctx->pix_fmt = AV_PIX_FMT_YUV422P;
enc_ctx->max_b_frames = 6;
enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
enc_ctx->flags |= CODEC_FLAG_LOW_DELAY;
avcodec_open2(enc_ctx, enc, NULL);
avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
&oc->interrupt_callback, NULL);
av_dump_format(oc, 0, oc->filename, 1);
avformat_write_header(oc, NULL);
struct SwsContext *sws_ctx;
sws_ctx = sws_getContext(input_w, input_h,
dec_ctx->pix_fmt,
output_w, output_h, enc_ctx->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
frame = av_frame_alloc();
tmp_frame = av_frame_alloc();
frame->format = enc_ctx->pix_fmt;
frame->width = output_w;
frame->height = output_h;
frame->pts = AV_NOPTS_VALUE;
av_frame_get_buffer(frame, 32);
av_frame_make_writable(frame);
int got_picture=0;
int got_packet=0;
double recording_duration = 0;
QTime timer;
AVPacket pkt_out;
av_init_packet(&pkt_out);
timer.start();
bool started_recording = false;
int64_t start_time = 0;
int64_t last_pts = INT64_MIN;
while(1) {
timer.restart();
AVPacket pkt_in;
av_read_frame(ic, &pkt_in);
if (pkt_in.size == 0) {
sleep_if_needed(timer.elapsed());
continue;
}
avcodec_decode_video2(dec_ctx, tmp_frame, &got_picture, &pkt_in);
#ifdef SM_DEBUG
log_packet(&pkt_in, ist->time_base, 1);
#endif
if (!started_recording) {
start_time = pkt_in.dts;
started_recording = true;
}
if (pkt_in.pts < last_pts) {
sleep_if_needed(timer.elapsed());
continue;
}
last_pts = pkt_in.pts;
frame->pts = (pkt_in.dts - start_time);
if (!got_picture) {
av_free_packet(&pkt_in);
sleep_if_needed(timer.elapsed());
continue;
} else {
sws_scale(sws_ctx, tmp_frame->data, tmp_frame->linesize,
0, input_h, frame->data, frame->linesize);
av_free_packet(&pkt_in);
}
av_init_packet(&pkt_out);
avcodec_encode_video2(enc_ctx, &pkt_out, frame, &got_packet);
if (got_packet) {
if (pkt_out.pts < pkt_out.dts) {
pkt_out.dts = pkt_out.pts;
}
pkt_out.stream_index = 0;
recording_duration = pkt_out.pts * av_q2d(ost->time_base);
#ifdef SM_DEBUG
log_packet(&pkt_out, ost->time_base, 0);
#endif
av_interleaved_write_frame(oc, &pkt_out);
av_free_packet(&pkt_out);
}
if (recording_duration >= max_duration) {
break;
} else {
sleep_if_needed(timer.elapsed());
}
}
av_write_trailer(oc);
av_dict_free(&opts);
av_frame_free(&frame);
av_frame_free(&tmp_frame);
sws_freeContext(sws_ctx);
avcodec_close(dec_ctx);
avcodec_close(enc_ctx);
avio_close(oc->pb);
avformat_free_context(oc);
avformat_close_input(&ic);
return 0;
}
These frames are B frames. B frames are saved to the stream in decoding order, not presentation order. If you look at the DTS it will probablly look ok. it is the job of the decoder to reorder frames into presentation order after they are decoded.
EDIT. to fix your code, use the PTS from the decoded frame, not the packet.