Why am I not getting my original string after comp

2019-04-15 04:59发布

问题:

I am currently trying to use the java.util.zip.* package to perform lossless compression/Decompression.

And I have used apache's jar to encode and decode the String used as an argument in Base64 charset.

Following in my code with two static methods one each for compression and one for decompression.

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class main {

    public String compress(String stringToCompress) throws UnsupportedEncodingException
    {       
        //System.out.println("String to Be Compressed :: " + stringToCompress);
        byte[] input = Base64.decodeBase64(stringToCompress);

        Deflater compressor = new Deflater();
        compressor.setInput(input);
        compressor.finish();

        byte[] output = new byte[100];
        compressor.deflate(output);
        return Base64.encodeBase64String(output);
    }

    public String decompressToString(String stringToDecompress) throws UnsupportedEncodingException, DataFormatException
    {   
        //System.out.println("String to be Decompressed :: " + stringToDecompress);
        byte[] input = Base64.decodeBase64(stringToDecompress);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(input,0,input.length);

        byte[] output = new byte[100];
        deCompressor.inflate(output);
        deCompressor.end();

        return Base64.encodeBase64String(output);
    }


    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        main m = new main();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedString  = m.compress(strToBeCompressed) ;
        String deCompressedString = m.decompressToString(compressedString);

        System.out.println("Original :: " + strToBeCompressed);
        System.out.println("Compressed :: " + compressedString);
        System.out.println("decompressed :: " + deCompressedString);
    }
}

Here is the output.

Original :: jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed :: eJwBPQDC/44Y5LHYYH5I3bH4ZI4Y725ZGo55ZHX5r5ZLI33aL242ornYb2nY72o4L6IoGr4oKIGroLor2nX4Yo245JXcvx/9AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==
decompressed :: jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhvadjvajgvoigavigogauguivadfhijbjklQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==

If you see the output the original and decompressed string are not matching. I don't know why? Can any one tell me the reason.

回答1:

I think that you should separate concerns and treat compression, uncompression, base 64 encode and base64 decode as separate concerns in separate methods. I am not able to deduce why you have involved Base64 - perhaps there is a good reason. Maybe you want the compressed string to be Base64 encoded?

Anyway, here is a version of your code that can compress and uncompress the string without any loss (but no Base64 involved):

package dk.tbsalling.stackoverflow;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class App
{
    private byte[] compress(String stringToCompress) throws UnsupportedEncodingException
    {
        byte[] compressedData = new byte[1024];
        byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");

        Deflater compressor = new Deflater();
        compressor.setInput(stringAsBytes);
        compressor.finish();
        int compressedDataLength = compressor.deflate(compressedData);

        return Arrays.copyOf(compressedData, compressedDataLength);
    }

    private String decompressToString(byte[] compressedData) throws UnsupportedEncodingException, DataFormatException
    {   
        Inflater deCompressor = new Inflater();
        deCompressor.setInput(compressedData, 0, compressedData.length);
        byte[] output = new byte[1024];
        int decompressedDataLength = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, decompressedDataLength, "UTF-8");
    }

    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        App m = new App();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        byte[] compressedData  = m.compress(strToBeCompressed);
        String deCompressedString = m.decompressToString(compressedData);

        System.out.println("Original     :: " + strToBeCompressed.length() + " " + strToBeCompressed);
        System.out.println("Compressed   :: " + compressedData.toString());
        System.out.println("decompressed :: " + deCompressedString.length() + " " + deCompressedString);
    }
}

This produces output:

Original     :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed   :: [B@3ced0338
decompressed :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla

Process finished with exit code 0

UPDATE

Here is the code for producing Base64-encoded representation of the compressed string:

package dk.tbsalling.stackoverflow;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.zip.*;

import org.apache.commons.codec.binary.Base64;

public class App
{
    private String compress(String stringToCompress) throws UnsupportedEncodingException
    {
        byte[] compressedData = new byte[1024];
        byte[] stringAsBytes = stringToCompress.getBytes("UTF-8");

        Deflater compressor = new Deflater();
        compressor.setInput(stringAsBytes);
        compressor.finish();
        int compressedDataLength = compressor.deflate(compressedData);

        byte[] bytes = Arrays.copyOf(compressedData, compressedDataLength);
        return Base64.encodeBase64String(bytes);
    }

    private String decompressToString(String base64String) throws UnsupportedEncodingException, DataFormatException
    {
        byte[] compressedData = Base64.decodeBase64(base64String);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(compressedData, 0, compressedData.length);
        byte[] output = new byte[1024];
        int decompressedDataLength = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, decompressedDataLength, "UTF-8");
    }

    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        App m = new App();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedData  = m.compress(strToBeCompressed);
        String deCompressedString = m.decompressToString(compressedData);

        System.out.println("Original     :: " + strToBeCompressed.length() + " " + strToBeCompressed);
        System.out.println("Compressed   :: " + compressedData.toString());
        System.out.println("decompressed :: " + deCompressedString.length() + " " + deCompressedString);
    }
}

This produces output:

Original     :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla
Compressed   :: eJwNxMkNwDAIBMBW3BoW4lhI/LDY+pN5DAJ1NdwKei0KAe4uwdul9rDrwvRwQ3I0uETxB+dJX8L04zI+SVGLxEa1fNDSIlU=
decompressed :: 85 jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla

Process finished with exit code 0


回答2:

Buffer Problems

Your output buffer for both compress() and decompressToString() has a fixed size of 100. The output is smaller than 100 bytes however, so the end of the array will be unused (full of zeroes). When this is translated to Base64 the zeroes come out as A characters (the == is padding).

You need to only consider the part of the buffer that contains data and ignore the rest. The methods inflate() and deflate() return the number of bytes they filled. Unfortunately Apache's Base64 converter does not support ranges within an array, so you'll have to resize the buffer:

byte[] output = new byte[100];
int size = compressor.deflate(output);
output = Arrays.copyOf(output, size);

and similarly for decompressToString().

This solves the problem of the buffer not filling up completely but brings up an even bigger problem: the buffer could overflow. If the size of the compressed or decompressed string is greater than 100 bytes you will need to call inflate() and deflate() multiple times to get all the data.

Base 64 Problems

Currently, the input string to compress() is interpreted as a Base64 string. Similarly, the string returned from decompressToString() is data encoded as a Base64 string.

I think your intention is for the original string to be unrestricted. In compress(), instead of getting the byte array from the input string using Base64.decodeBase64(stringToCompress) simply use stringToCompress.getBytes(). The reverse should happen in decompressToString(): change Base64.encodeBase64String(output) to new String(output). You can use the String(byte[] bytes, int offset, int length) overload to specify a subrange of the output array instead of creating a copy.

Complete Code

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.zip.*;
import java.util.*;

import org.apache.commons.codec.binary.Base64;

public class main {

    public String compress(String stringToCompress) throws UnsupportedEncodingException
    {
        //System.out.println("String to Be Compressed :: " + stringToCompress);
        byte[] input = stringToCompress.getBytes();

        Deflater compressor = new Deflater();
        compressor.setInput(input);
        compressor.finish();

        byte[] output = new byte[100];
        int size = compressor.deflate(output);
        output = Arrays.copyOf(output, size);
        return Base64.encodeBase64String(output);
    }

    public String decompressToString(String stringToDecompress) throws UnsupportedEncodingException, DataFormatException
    {
        //System.out.println("String to be Decompressed :: " + stringToDecompress);
        byte[] input = Base64.decodeBase64(stringToDecompress);

        Inflater deCompressor = new Inflater();
        deCompressor.setInput(input,0,input.length);

        byte[] output = new byte[100];
        int size = deCompressor.inflate(output);
        deCompressor.end();

        return new String(output, 0, size);
    }


    public static void main(String[] args) throws UnsupportedEncodingException, DataFormatException {
        main m = new main();
        String strToBeCompressed  = "jhjksdhgfkjdsfhkjhjvblkajnlkdfmvlksjfdovbjaiudhv adjv ajgvoig avigogauguivadfhijbjkla";
        String compressedString  = m.compress(strToBeCompressed) ;
        String deCompressedString = m.decompressToString(compressedString);

        System.out.println("Original :: " + strToBeCompressed);
        System.out.println("Compressed :: " + compressedString);
        System.out.println("decompressed :: " + deCompressedString);
    }
}

Diff

@@ -3,2 +3,3 @@
 import java.util.zip.*;
+import java.util.*;

@@ -11,3 +12,3 @@
         //System.out.println("String to Be Compressed :: " + stringToCompress);
-        byte[] input = Base64.decodeBase64(stringToCompress);
+        byte[] input = stringToCompress.getBytes();

@@ -18,3 +19,4 @@
         byte[] output = new byte[100];
-        compressor.deflate(output);
+        int size = compressor.deflate(output);
+        output = Arrays.copyOf(output, size);
         return Base64.encodeBase64String(output);
@@ -31,6 +33,6 @@
         byte[] output = new byte[100];
-        deCompressor.inflate(output);
+        int size = deCompressor.inflate(output);
         deCompressor.end();

-        return Base64.encodeBase64String(output);
+        return new String(output, 0, size);
     }