Determine compressed/uncompressed buffer size for

2019-08-27 11:41发布

问题:

I'm using Zlib v1.2.7 to read a text file then compress it and write it to an output file. My problem is in the following example how can I determine the output buffer size based on input size? (in the example gz_length) I'm using this library for an STM32 Device. I added the unzip method and I have this problem as well for uncompressed data length.

#include "zlib.h"

int unzip(unsigned char *dst, unsigned long *dst_length, unsigned char *src, unsigned long src_length)
{
    z_stream stream;
    memset(&stream, 0, sizeof(stream));

    stream.next_in = src;
    stream.avail_in = src_length;

    stream.next_out = dst;
    stream.avail_out = *dst_length;

    int rv = inflateInit2(&stream, 15 + 16);
    if (Z_OK == rv) {
        rv = inflate(&stream, Z_NO_FLUSH);
        if (Z_STREAM_END == rv) {
            inflateEnd(&stream);
            rv = Z_OK;
        }
    }

    if (Z_OK == rv) {
        *dst_length = stream.total_out;
    } else {
        *dst_length = 0;
    }

    return rv;
}


int zip(unsigned char *dst, unsigned long *dst_length, unsigned char *src, unsigned long src_length)
{
    z_stream        stream;
    memset(&stream, 0, sizeof(stream));

    stream.next_in = src;
    stream.avail_in = src_length;

    stream.next_out = Z_NULL;
    stream.avail_out = 0;

    /* add 16 to MAX_WBITS to specify gzip format - it gets taken off again in defaultInit2 */
    int rv = deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 16 + MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
    if (Z_OK == rv) {
        unsigned long dst_bound = deflateBound(&stream, stream.avail_in) + 12; /* 12 bytes for the gzip header */
        if (dst_bound > *dst_length) {
            rv = Z_MEM_ERROR;
        } else {
            stream.next_out   = dst;
            stream.avail_out = dst_bound;
        }
    }

    if (Z_OK == rv) {
        gz_header        header;
        memset(&header, 0, sizeof(header));
        rv = deflateSetHeader(&stream, &header);
    }

    if (Z_OK == rv) {
        rv = deflate(&stream, Z_FINISH);
        if (Z_STREAM_END == rv) {
            rv = deflateEnd(&stream);
        }
    }

    if (Z_OK == rv) {
        *dst_length = stream.total_out;
    } else {
        *dst_length = 0;
    }

    return rv;
}

int main()
{
    unsigned long read_size;
    unsigned char *buffer = NULL;
    unsigned char *gz_buffer = NULL;
    unsigned long input_size = 0;

    /* Open your_file in read-only mode */
    FILE *fp = fopen("/local/new.txt", "r");
    fseek(fp, 0, SEEK_END); /* Go to end of file */
    input_size = ftell(fp); /* How many bytes did we pass ? */
    printf("Filesize: %ld\n", input_size);
    /* Set position of stream to the beginning */
    rewind(fp);
    /* Allocate the buffer (no need to initialize it with calloc) */
    buffer = (unsigned char*) malloc((input_size + 1) * sizeof(*buffer)); /* input_size + 1 byte for the \0 */
    /* Read the file into the buffer */
    fread(buffer, input_size, 1, fp); /* Read 1 chunk of input_size bytes from fp into buffer */
    /* NULL-terminate the buffer */
    buffer[input_size] = '\0';
    /* Print it ! */
    printf("FileData: %s\n", buffer);
    fclose(fp);
    //PROBLEM HERE how to determine valid gz_length for output based on input_size
    unsigned long gz_length = input_size + 50;
    gz_buffer = (unsigned char*) malloc((gz_length) * sizeof(*gz_buffer));

    if (input_size > 0)
    {
        int rv = zip(gz_buffer, &gz_length, buffer, input_size);
        if (Z_OK == rv)
        {
            FILE *ofp = fopen("/local/out.gz", "w");
            if (ofp)
            {
                int bw = fwrite(gz_buffer, 1, gz_length, ofp);
                fclose(ofp);
            }
        }
        else
        {
            printf("%s:%d: %d%s", __FILE__, __LINE__, rv, newline);
        }
    }
    free(buffer);
    free(gz_buffer);
    return 0;
}

回答1:

If you want to compress in a single deflate() call (which is not necessary by the way), zlib provides the function deflateBound() for exactly this purpose.

There is no useful bound in the other direction. By not useful, I mean that the bound on the size of the decompressed output is over a thousand times the size of the compressed input.



标签: c zlib