Get the width / height of the video from H.264 NAL

2019-01-23 08:33发布

问题:

I have gotten the SPS in NALU ( AVC Decoder Configuration Record ), and trying to parse the video width / height from it.

67 64 00 15 ac c8 60 20 09 6c 04 40 00 00 03 00 40 00 00 07 a3 c5 8b 67 80 

This is my code parse the SPS but gets the wrong values. pic_width_in_mbs_minus1 is 5, and pic_height_in_map_units_minus1 is 1. Actually the video is 512 X 288px

typedef struct _SequenceParameterSet
{
private:
    const unsigned char * m_pStart;
    unsigned short m_nLength;
    int m_nCurrentBit;

    unsigned int ReadBit()
    {
        ATLASSERT(m_nCurrentBit <= m_nLength * 8);
        int nIndex = m_nCurrentBit / 8;
        int nOffset = m_nCurrentBit % 8 + 1;

        m_nCurrentBit ++;
        return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
    }

    unsigned int ReadBits(int n)
    {
        int r = 0;
        int i;
        for (i = 0; i < n; i++)
        {
            r |= ( ReadBit() << ( n - i - 1 ) );
        }
        return r;
    }

    unsigned int ReadExponentialGolombCode()
    {
        int r = 0;
        int i = 0;

        while( (ReadBit() == 0) && (i < 32) )
        {
            i++;
        }
        r = ReadBits(i);
        r += (1 << i) - 1;
        return r;
    }


    unsigned int ReadSE() 
    {
        int r = ReadExponentialGolombCode();
        if (r & 0x01)
        {
            r = (r+1)/2;
        }
        else
        {
            r = -(r/2);
        }
        return r;
    }
public:

    void Parse(const unsigned char * pStart, unsigned short nLen)
    {
        m_pStart = pStart;
        m_nLength = nLen;
        m_nCurrentBit = 0;
        int profile_idc = ReadBits(8);
        int constraint_set0_flag = ReadBit();
        int constraint_set1_flag = ReadBit();
        int constraint_set2_flag = ReadBit();
        int constraint_set3_flag = ReadBit();
        int constraint_set4_flag = ReadBit();
        int constraint_set5_flag = ReadBit();
        int reserved_zero_2bits  = ReadBits(2);
        int level_idc = ReadBits(8);
        int seq_parameter_set_id = ReadExponentialGolombCode();

        if( profile_idc == 100 || profile_idc == 110 ||
            profile_idc == 122 || profile_idc == 144 )
        {
            int chroma_format_idc = ReadExponentialGolombCode();
            if( chroma_format_idc == 3 )
            {
                int residual_colour_transform_flag = ReadBit();
            }
            int bit_depth_luma_minus8 = ReadExponentialGolombCode();
            int bit_depth_chroma_minus8 = ReadExponentialGolombCode();
            int qpprime_y_zero_transform_bypass_flag = ReadBit();
            int seq_scaling_matrix_present_flag = ReadBit();
            if( seq_scaling_matrix_present_flag )
            {
                for( int i = 0; i < 8; i++ )
                {
                    int seq_scaling_list_present_flag = ReadBit();
                    if( seq_scaling_list_present_flag )
                    {
                        /*
                        if( i < 6 )
                        {
                            read_scaling_list( b, sps->ScalingList4x4[ i ], 16,
                                sps->UseDefaultScalingMatrix4x4Flag[ i ]);
                        }
                        else
                        {
                            read_scaling_list( b, sps->ScalingList8x8[ i - 6 ], 64,
                                sps->UseDefaultScalingMatrix8x8Flag[ i - 6 ] );
                        }
                        */
                    }
                }
            }
        }
        int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
        int pic_order_cnt_type = ReadExponentialGolombCode();
        if( pic_order_cnt_type == 0 )
        {
            int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
        }
        else if( pic_order_cnt_type == 1 )
        {
            int delta_pic_order_always_zero_flag = ReadBit();
            int offset_for_non_ref_pic = ReadSE();
            int offset_for_top_to_bottom_field = ReadSE();
            int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
            for( int i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
            {
                ReadSE();
                //sps->offset_for_ref_frame[ i ] = ReadSE();
            }
        }
        int num_ref_frames = ReadExponentialGolombCode();
        int gaps_in_frame_num_value_allowed_flag = ReadBit();
        int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
        int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
        int frame_mbs_only_flag = ReadBit();
        if( !frame_mbs_only_flag )
        {
            int mb_adaptive_frame_field_flag = ReadBit();
        }
        int direct_8x8_inference_flag = ReadBit();
        int frame_cropping_flag = ReadBit();
        if( frame_cropping_flag )
        {
            int frame_crop_left_offset = ReadExponentialGolombCode();
            int frame_crop_right_offset = ReadExponentialGolombCode();
            int frame_crop_top_offset = ReadExponentialGolombCode();
            int frame_crop_bottom_offset = ReadExponentialGolombCode();
        }
        int vui_parameters_present_flag = ReadBit();

        pStart++;
    }
}SequenceParameterSet, *LPSequenceParameterSet;

This is my code parse the SPS but gets the wrong values. pic_width_in_mbs_minus1 is 5, and pic_height_in_map_units_minus1 is 1. Actually the video is 512 X 288px

Thanks

回答1:

#include <stdio.h>
#include <stdlib.h>    
#include <string.h>
#include <assert.h>

const unsigned char * m_pStart;
unsigned short m_nLength;
int m_nCurrentBit;

unsigned int ReadBit()
{
    assert(m_nCurrentBit <= m_nLength * 8);
    int nIndex = m_nCurrentBit / 8;
    int nOffset = m_nCurrentBit % 8 + 1;

    m_nCurrentBit ++;
    return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
}

unsigned int ReadBits(int n)
{
    int r = 0;
    int i;
    for (i = 0; i < n; i++)
    {
        r |= ( ReadBit() << ( n - i - 1 ) );
    }
    return r;
}

unsigned int ReadExponentialGolombCode()
{
    int r = 0;
    int i = 0;

    while( (ReadBit() == 0) && (i < 32) )
    {
        i++;
    }

    r = ReadBits(i);
    r += (1 << i) - 1;
    return r;
}

unsigned int ReadSE() 
{
    int r = ReadExponentialGolombCode();
    if (r & 0x01)
    {
        r = (r+1)/2;
    }
    else
    {
        r = -(r/2);
    }
    return r;
}

void Parse(const unsigned char * pStart, unsigned short nLen)
{
    m_pStart = pStart;
    m_nLength = nLen;
    m_nCurrentBit = 0;

    int frame_crop_left_offset=0;
    int frame_crop_right_offset=0;
    int frame_crop_top_offset=0;
    int frame_crop_bottom_offset=0;

    int profile_idc = ReadBits(8);          
    int constraint_set0_flag = ReadBit();   
    int constraint_set1_flag = ReadBit();   
    int constraint_set2_flag = ReadBit();   
    int constraint_set3_flag = ReadBit();   
    int constraint_set4_flag = ReadBit();   
    int constraint_set5_flag = ReadBit();   
    int reserved_zero_2bits  = ReadBits(2); 
    int level_idc = ReadBits(8);            
    int seq_parameter_set_id = ReadExponentialGolombCode();


    if( profile_idc == 100 || profile_idc == 110 ||
        profile_idc == 122 || profile_idc == 244 ||
        profile_idc == 44 || profile_idc == 83 ||
        profile_idc == 86 || profile_idc == 118 )
    {
        int chroma_format_idc = ReadExponentialGolombCode();

        if( chroma_format_idc == 3 )
        {
            int residual_colour_transform_flag = ReadBit();         
        }
        int bit_depth_luma_minus8 = ReadExponentialGolombCode();        
        int bit_depth_chroma_minus8 = ReadExponentialGolombCode();      
        int qpprime_y_zero_transform_bypass_flag = ReadBit();       
        int seq_scaling_matrix_present_flag = ReadBit();        

        if (seq_scaling_matrix_present_flag) 
        {
            int i=0;
            for ( i = 0; i < 8; i++) 
            {
                int seq_scaling_list_present_flag = ReadBit();
                if (seq_scaling_list_present_flag) 
                {
                    int sizeOfScalingList = (i < 6) ? 16 : 64;
                    int lastScale = 8;
                    int nextScale = 8;
                    int j=0;
                    for ( j = 0; j < sizeOfScalingList; j++) 
                    {
                        if (nextScale != 0) 
                        {
                            int delta_scale = ReadSE();
                            nextScale = (lastScale + delta_scale + 256) % 256;
                        }
                        lastScale = (nextScale == 0) ? lastScale : nextScale;
                    }
                }
            }
        }
    }

    int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
    int pic_order_cnt_type = ReadExponentialGolombCode();
    if( pic_order_cnt_type == 0 )
    {
        int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
    }
    else if( pic_order_cnt_type == 1 )
    {
        int delta_pic_order_always_zero_flag = ReadBit();
        int offset_for_non_ref_pic = ReadSE();
        int offset_for_top_to_bottom_field = ReadSE();
        int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
        int i;
        for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
        {
            ReadSE();
            //sps->offset_for_ref_frame[ i ] = ReadSE();
        }
    }
    int max_num_ref_frames = ReadExponentialGolombCode();
    int gaps_in_frame_num_value_allowed_flag = ReadBit();
    int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
    int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
    int frame_mbs_only_flag = ReadBit();
    if( !frame_mbs_only_flag )
    {
        int mb_adaptive_frame_field_flag = ReadBit();
    }
    int direct_8x8_inference_flag = ReadBit();
    int frame_cropping_flag = ReadBit();
    if( frame_cropping_flag )
    {
        frame_crop_left_offset = ReadExponentialGolombCode();
        frame_crop_right_offset = ReadExponentialGolombCode();
        frame_crop_top_offset = ReadExponentialGolombCode();
        frame_crop_bottom_offset = ReadExponentialGolombCode();
    }
    int vui_parameters_present_flag = ReadBit();
    pStart++;

    int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_bottom_offset*2 - frame_crop_top_offset*2;
    int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_right_offset * 2) - (frame_crop_left_offset * 2);

    printf("\n\nWxH = %dx%d\n\n",Width,Height); 

}

void FindJPGFileResolution(char *cpFileName, int *ipWidth, int *ipHeight) 
{   
    int i;

    FILE *fp = fopen(cpFileName,"rb");
    fseek(fp,0,SEEK_END);
    long len = ftell(fp);
    fseek(fp,0,SEEK_SET);

    unsigned char *ucpInBuffer = (unsigned char*) malloc (len+1);
    fread(ucpInBuffer,1,len,fp);
    fclose(fp);

    printf("\n\nBuffer size %ld\n", len);   
    for(i=0;i<len;i++)
    {
        //printf(" %x", ucpInBuffer[i]);    
        if( 
            (ucpInBuffer[i]==0x00) && (ucpInBuffer[i+1]==0x00) && 
            (ucpInBuffer[i+2]==0x00) && (ucpInBuffer[i+3]==0x01) 
          )
        {
            //if(ucpInBuffer[i+4] & 0x0F ==0x07)
            if(ucpInBuffer[i+4] == 0x67 || ucpInBuffer[i+4] == 0x27)
            {
                Parse(&ucpInBuffer[i+5], len);              
                break;
            }
        }       
    }

    free(ucpInBuffer);
    return;
}


int main()               
{
    int iHeight=0, iWidth=0;
    char *cpFileName = "/home/pankaj/pankil/SameSystem_H264_1920x320.264";
    FindJPGFileResolution(cpFileName, &iWidth, &iHeight);
    return 0;
}

Try this coding. This is working successfully, I have already tested with gcc compiler. Just set your file path compile and run it.



回答2:

The SPS is (excerpt):

profile_idc 100 
constraint_set0_flag 0 
constraint_set1_flag 0 
constraint_set2_flag 0 
constraint_set3_flag 0 
level_idc 21 
seq_parameter_set_id 0 
chroma_format_idc 1 
num_ref_frames 5 
gaps_in_frame_num_value_allowed_flag 0 
pic_width_in_mbs_minus1 31 
pic_height_in_map_units_minus1 17 
frame_mbs_only_flag 1 
direct_8x8_inference_flag 1 
frame_cropping_flag 0 
vui_parameters_present_flag 1 

It's 512x288 video, see details on how to decode data here: Fetching the dimensions of a H264Video stream



回答3:

I encountered an error with above sample code:

int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_bottom_offset*2 - frame_crop_top_offset*2;
int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_right_offset * 2) - (frame_crop_left_offset * 2);

Should be:

int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_right_offset *2 - frame_crop_left_offset *2;
int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_bottom_offset* 2) - (frame_crop_top_offset* 2);

Otherwise you will get wrong resolution when frame_cropping_flag is 1. Hope this help somebody else.



回答4:

I can confirm that the code of Kaidul with the correction of Alan Lu works flawlessly. Note that Parse(const unsigned char * pStart, unsigned short nLen) takes a h264 SPS nalu with an offset of 5 ( remove the '0 0 0 1' prefix and the nal_unit_type byte )