Decoding h264 frames from RTP stream

2020-05-24 10:23发布

问题:

I am using live555 and ffmpeg libraries to get and decode RTP H264 stream from server; Video stream was encoded by ffmpeg, using Baseline profile and

x264_param_default_preset(m_params, "veryfast", "zerolatency")

I read this topic and add SPS and PPS data in the every frame, which I receive from network;

void ClientSink::NewFrameHandler(unsigned frameSize, unsigned numTruncatedBytes,
    timeval presentationTime, unsigned durationInMicroseconds)
{
     ...
EncodedFrame tmp;
    tmp.m_frame = std::vector<unsigned char>(m_tempBuffer.data(), m_tempBuffer.data() + frameSize);
    tmp.m_duration = durationInMicroseconds;
    tmp.m_pts = presentationTime;

    //Add SPS and PPS data into the frame; TODO: some devices may send SPS and PPs data already into frame;
    tmp.m_frame.insert(tmp.m_frame.begin(), m_spsPpsData.cbegin(), m_spsPpsData.cend());

    emit newEncodedFrame( SharedEncodedFrame(tmp) );
    m_frameCounter++;

    this->continuePlaying();
}

And this frames I receive in the decoder.

bool H264Decoder::decodeFrame(SharedEncodedFrame orig_frame)
{
...
while(m_packet.size > 0)
    {
        int got_picture;
        int len = avcodec_decode_video2(m_decoderContext, m_picture, &got_picture, &m_packet);
        if (len < 0)
        {
            emit criticalError(QString("Decoding error"));
            return false;
        }
        if (got_picture)
        {
            std::vector<unsigned char> result;
            this->storePicture(result);

            if ( m_picture->format == AVPixelFormat::AV_PIX_FMT_YUV420P )
            {
                //QImage img = QImage(result.data(), m_picture->width, m_picture->height, QImage::Format_RGB888);
                Frame_t result_rgb;
                if (!convert_yuv420p_to_rgb32(result, m_picture->width, m_picture->height, result_rgb))
                {
                    emit criticalError( QString("Failed to convert YUV420p image into rgb32; can't create QImage!"));
                    return false;
                }
                unsigned char* copy_img = new unsigned char[result_rgb.size()];
//this needed because QImage shared buffer, which used, and it will crash, if i use this qimage after result_rgb deleting
                std::copy(result_rgb.cbegin(), result_rgb.cend(), copy_img);
                QImage img = QImage(copy_img, m_picture->width, m_picture->height, QImage::Format_RGB32,
                [](void* array)
                {
                    delete[] array;
                }, copy_img);
                img.save(QString("123.bmp"));
                emit newDecodedFrame(img);
            }

avcodec_decode_video2 decode frames without any error message, but decoded frames, after converting it (from yuv420p into rgb32) is invalid. Example of image available on this link

Do you have any ideas what I make wrong?

回答1:

I suspect the error is in the convert_yuv420p_to_rgb32() code. Try this:

static SwsContext *m_swsCtx = NULL;
QImage frame =  QImage ( m_picture->width, m_picture->height,
                         QImage::Format_RGB32 );
m_swsCtx = sws_getCachedContext ( m_swsCtx, m_picture->width,
                                  m_picture->height, PIX_FMT_YUV420P,
                                  m_picture->width, m_picture->height,
                                  AV_PIX_FMT_RGB32, SWS_BICUBIC,
                                  NULL, NULL, NULL );
uint8_t *dstSlice[] = { frame.bits() };
int dstStride = frame.width() * 4;
sws_scale ( m_swsCtx, &m_picture.data, &m_picture.linesize,
            0, m_picture->height, dstSlice, &dstStride );

You will need to include/link swscale if you have not does so already.

Note: you don't need SPS/PPS every frame (on keyframes is good enough). But it doesn't hurt either.



回答2:

Ok. This my simple example, which decode H264 stream, which received from liveMedia library. It can decode frames, which are truncated not only at boundary of frames.

   class H264Decoder : public AbstractDecoder
    {
        Q_OBJECT
    public:
        H264Decoder( QObject* parent = nullptr );
        virtual ~H264Decoder();

    public slots:

        virtual bool decodeFrame(SharedEncodedFrame orig_frame) override;

    signals:
        //sended when we have new decoded frame
        void newDecodedFrame( QImage img );

    protected:
        void storePicture(std::vector<unsigned char>& res);

        AVCodec* m_decoder;
        AVCodecContext* m_decoderContext;
        int m_got_picture;
        AVFrame* m_picture;
        AVPacket m_packet;
    };

And this is realization:

#include "H264Decoder.hpp"
#include "ImgConverting.hpp"
#include <QPixmap>
extern "C"
{
#include <libswscale/swscale.h>
}

using namespace std;

H264Decoder::H264Decoder( QObject *parent)
    : AbstractDecoder(parent), m_decoder(nullptr), m_decoderContext(nullptr),
      m_got_picture(0), m_picture(nullptr)
{
    avcodec_register_all();
    av_init_packet(&m_packet);
    m_decoder = avcodec_find_decoder(CODEC_ID_H264);
    if (!m_decoder)
    {
        QString str = QString("Can't find H264 decoder!");
        emit criticalError(str);
    }
    m_decoderContext = avcodec_alloc_context3(m_decoder);

    if (m_decoder->capabilities & CODEC_CAP_TRUNCATED)
        m_decoderContext->flags |= CODEC_FLAG_TRUNCATED;


    //we can receive truncated frames
    m_decoderContext->flags2 |= CODEC_FLAG2_CHUNKS;
    m_decoderContext->thread_count = 4;//TODO: random value. May be changing can make decoding faster

    AVDictionary* dictionary = nullptr;
    if (avcodec_open2(m_decoderContext, m_decoder, &dictionary) < 0)
    {
        QString str = QString("Failed to open decoder!");
        emit criticalError(str);
    }
    qDebug() << "H264 Decoder successfully opened";
    m_picture = avcodec_alloc_frame();
}

H264Decoder::~H264Decoder()
{
    qDebug() << "ACHTUNG!!! H264Decoder deleted!!!\r\n\r\n";
    if (m_decoderContext)
    {
        avcodec_close(m_decoderContext);
        delete m_decoderContext;
    }
}


bool H264Decoder::decodeFrame(SharedEncodedFrame orig_frame)
{
    Frame_t enc_frame;
    orig_frame >> enc_frame;
    m_packet.size = enc_frame.size();
    m_packet.data = enc_frame.data();

    qDebug() << "H264Decoder: received encoded frame with framesize " << enc_frame.size();

    while(m_packet.size > 0)
    {
        int got_picture;
        int len = avcodec_decode_video2(m_decoderContext, m_picture, &got_picture, &m_packet);
        if (len < 0)
        {
            QString err("Decoding error");
            qDebug() << err;
            return false;
        }
        if (got_picture)
        {
            qDebug() << "H264Decoder: frame decoded!";
            std::vector<unsigned char> result;
            this->storePicture(result);

            if ( m_picture->format == PIX_FMT_YUV420P )
            {
                static SwsContext *m_swsCtx = NULL;
                QImage frame_img = QImage(m_picture->width, m_picture->height, QImage::Format_RGB888);
                m_swsCtx = sws_getCachedContext ( m_swsCtx, m_picture->width,
                    m_picture->height, PIX_FMT_YUV420P,
                    m_picture->width, m_picture->height,
                    PIX_FMT_RGB24, SWS_GAUSS,
                    NULL, NULL, NULL );
                uint8_t *dstSlice[] = { frame_img.bits() };
                int dstStride = frame_img.width() * 3;
                if (sws_scale ( m_swsCtx, m_picture->data, m_picture->linesize,
                    0, m_picture->height, dstSlice, &dstStride ) != m_picture->height )
                {
                    qDebug() << "PIZDETS!!!";
                    exit(-5);
                }
                qDebug() << "New decoded image!";
                emit newDecodedFrame(frame_img);
            }
            else if (m_picture->format == PIX_FMT_RGB32)
            {
                QImage img = QImage(result.data(), m_picture->width, m_picture->height, QImage::Format_RGB32);
                qDebug() << "New decoded image!";
                emit newDecodedFrame(img);
            }
            else if (m_picture->format == PIX_FMT_RGB24)
            {
                QImage img = QImage(result.data(), m_picture->width, m_picture->height, QImage::Format_RGB888);
                qDebug() << "New decoded image!";
                emit newDecodedFrame(img);
            }
            else
            {
                QString err = QString( "Unsupported pixel format! Can't create QImage!");
                qDebug() << err;
                emit criticalError( err );
                return false;
            }
        }
        m_packet.size -= len;
        m_packet.data += len;
    }

    return true;
}

void H264Decoder::storePicture(std::vector<unsigned char>& res)
{
    for (size_t i = 0; i < AV_NUM_DATA_POINTERS; i++)
    {
        std::copy(m_picture->data[i], m_picture->data[i] + 
            m_picture->linesize[i]*m_picture->height, std::back_inserter(res));
    }
}

I send newDecodedFrame to GUI thread, where this QImage will be drawn at some widget.

P.S: This comment is very long for posting as comment's comment