开发者

How can I Zip and Unzip a string using GZIPOutputStream that is compatible with .Net?

开发者 https://www.devze.com 2023-03-21 04:51 出处:网络
I need an example for compressing a string using GZip in android. I want to send a string like \"hello\" to the method and get the following zipped string:

I need an example for compressing a string using GZip in android. I want to send a string like "hello" to the method and get the following zipped string:

BQAAAB+LCAAAAAAABADtvQdgHEmWJSYvbcp7f0r1StfgdKEIgGATJNiQQBDswYjN5pLsHWlHIymrKoHKZVZlXWYWQMztnbz33nvvvffee++997o7nU4n99//P1xmZAFs9s5K2smeIYCqyB8/fnwfPyLmeVlW/w+GphA2BQAAAA==

Then I need to decompress it. Can anybody give me an example and complete the following methods?

private String compressString(String input) {
    //...
}

private String decompressString(String input) {
    //...
}

Thanks,


update

According to scessor's answer, Now I have the following 4 methods. Android and .net compress and decompress methods. These methods are compatible with each other except in one case. I mean they are compatible in the first 3 states but incompatible in the 4th state:

  • state 1) Android.compress <-> Android.decompress: (OK)
  • state 2) Net.compress <-> Net.decompress: (OK)
  • state 3) Net.compress -> Android.decompress: (OK)
  • state 4) Android.compress -> .Net.decompress: (NOT OK)

can anybody solve it?

Android methods:

public static String compress(String str) throws IOException {

    byte[] blockcopy = ByteBuffer
            .allocate(4)
            .order(java.nio.ByteOrder.LITTLE_ENDIAN)
            .putInt(str.length())
            .array();
    ByteArrayOutputStream os = new ByteArrayOutputStream(str.length());
    GZIPOutputStream gos = new GZIPOutputStream(os);
    gos.write(str.getBytes());
    gos.close();
    os.close();
    byte[] compressed = new byte[4 + os.toByteArray().length];
    System.arraycopy(blockcopy, 0, compressed, 0, 4);
    System.arraycopy(os.toByteArray(), 0, compressed, 4,
            os.toByteArray().length);
    return Base64.encode(compressed);

}

public static String decompress(String zipText) throws IOException {
    byte[] compressed = Base64.decode(zipText);
    if (compressed.length > 4)
    {
        GZIPInputStream gzipInputStream = new GZIPIn开发者_如何学GoputStream(
                new ByteArrayInputStream(compressed, 4,
                        compressed.length - 4));

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        for (int value = 0; value != -1;) {
            value = gzipInputStream.read();
            if (value != -1) {
                baos.write(value);
            }
        }
        gzipInputStream.close();
        baos.close();
        String sReturn = new String(baos.toByteArray(), "UTF-8");
        return sReturn;
    }
    else
    {
        return "";
    }
}

.Net methods:

public static string compress(string text)
{
    byte[] buffer = Encoding.UTF8.GetBytes(text);
    MemoryStream ms = new MemoryStream();
    using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true))
    {
        zip.Write(buffer, 0, buffer.Length);
    }

    ms.Position = 0;
    MemoryStream outStream = new MemoryStream();

    byte[] compressed = new byte[ms.Length];
    ms.Read(compressed, 0, compressed.Length);

    byte[] gzBuffer = new byte[compressed.Length + 4];
    System.Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
    System.Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);
    return Convert.ToBase64String(gzBuffer);
}

public static string decompress(string compressedText)
{
    byte[] gzBuffer = Convert.FromBase64String(compressedText);
    using (MemoryStream ms = new MemoryStream())
    {
        int msgLength = BitConverter.ToInt32(gzBuffer, 0);
        ms.Write(gzBuffer, 4, gzBuffer.Length - 4);

        byte[] buffer = new byte[msgLength];

        ms.Position = 0;
        using (GZipStream zip = new GZipStream(ms, CompressionMode.Decompress))
        {
            zip.Read(buffer, 0, buffer.Length);
        }

        return Encoding.UTF8.GetString(buffer);
    }
}


The GZIP methods:

public static byte[] compress(String string) throws IOException {
    ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
    GZIPOutputStream gos = new GZIPOutputStream(os);
    gos.write(string.getBytes());
    gos.close();
    byte[] compressed = os.toByteArray();
    os.close();
    return compressed;
}

public static String decompress(byte[] compressed) throws IOException {
    final int BUFFER_SIZE = 32;
    ByteArrayInputStream is = new ByteArrayInputStream(compressed);
    GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE);
    StringBuilder string = new StringBuilder();
    byte[] data = new byte[BUFFER_SIZE];
    int bytesRead;
    while ((bytesRead = gis.read(data)) != -1) {
        string.append(new String(data, 0, bytesRead));
    }
    gis.close();
    is.close();
    return string.toString();
}

And a test:

final String text = "hello";
try {
    byte[] compressed = compress(text);
    for (byte character : compressed) {
        Log.d("test", String.valueOf(character));
    }
    String decompressed = decompress(compressed);
    Log.d("test", decompressed);
} catch (IOException e) {
    e.printStackTrace();
}

=== Update ===

If you need .Net compability my code has to be changed a little:

public static byte[] compress(String string) throws IOException {
    byte[] blockcopy = ByteBuffer
        .allocate(4)
        .order(java.nio.ByteOrder.LITTLE_ENDIAN)
        .putInt(string.length())
        .array();
    ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
    GZIPOutputStream gos = new GZIPOutputStream(os);
    gos.write(string.getBytes());
    gos.close();
    os.close();
    byte[] compressed = new byte[4 + os.toByteArray().length];
    System.arraycopy(blockcopy, 0, compressed, 0, 4);
    System.arraycopy(os.toByteArray(), 0, compressed, 4, os.toByteArray().length);
    return compressed;
}

public static String decompress(byte[] compressed) throws IOException {
    final int BUFFER_SIZE = 32;
    ByteArrayInputStream is = new ByteArrayInputStream(compressed, 4, compressed.length - 4);
    GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE);
    StringBuilder string = new StringBuilder();
    byte[] data = new byte[BUFFER_SIZE];
    int bytesRead;
    while ((bytesRead = gis.read(data)) != -1) {
        string.append(new String(data, 0, bytesRead));
    }
    gis.close();
    is.close();
    return string.toString();
}

You can use the same test script.


Whatever it was that compressed "Hello" to BQAAAB+LC... is a particularly poor implementation of a gzipper. It expanded "Hello" far, far more than necessary, using a dynamic block instead of a static block in the deflate format. After removing the four-byte prefix to the gzip stream (which always starts with hex 1f 8b), "Hello" was expanded to 123 bytes. In the world of compression, that is considered a crime.

The Compress method that you are complaining about is working correctly and properly. It is generating a static block and a total output of 25 bytes. The gzip format has a ten-byte header and eight-byte trailer overhead, leaving the five-byte input having been coded in seven bytes. That's more like it.

Streams that are not compressible will be expanded, but it shouldn't be by much. The deflate format used by gzip will add five bytes to every 16K to 64K for incompressible data.

To get actual compression, in general you need to give the compressor much more to work with that five bytes, so that it can find repeated strings and biased statistics in compressible data. I understand that you were just doing tests with a short string. But in an actual application, you would never use a general-purpose compressor with such short strings, since it would always be better to just send the string.


In your Decompress() method, the first 4 bytes of the Base64 decoded input are skipped before passing to GZipInputStream. These bytes are found to be 05 00 00 00 in this particular case. So in the Compress() method, these bytes have to be put back in just before the Base64 encode.

If I do this, Compress() returns the following:

BQAAAB+LCAAAAAAAAADLSM3JyQcAhqYQNgUAAAA=

I know that this is not exactly the same as your expectation, which is:

BQAAAB+LCAAAAAAABADtvQdgHEmWJSYvbcp7f0r1StfgdKEIgGATJNiQQBDswYjN5pLsHWlHIymrKoHKZVZlXWYWQMztnbz33nvvvffee++997o7nU4n99//P1xmZAFs9s5K2smeIYCqyB8/fnwfPyLmeVlW/w+GphA2BQAAAA==

But, if my result is plugged back into Decompress(), I think you'll still get "Hello". Try it. The difference may be due to the different compression level with which you got the original string.

So what are the mysterious prefixed bytes 05 00 00 00? According to this answer it may be the length of the compressed string so that the program knows how long the decompressed byte buffer should be. Still that does not tally in this case.

This is the modified code for compress():

public static String Compress(String text) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();

    // TODO: Should be computed instead of being hard-coded
    baos.write(new byte[]{0x05, 0, 0, 0}, 0, 4);

    GZIPOutputStream gzos = new GZIPOutputStream(baos);
    gzos.write(text.getBytes());
    gzos.close();

    return Base64.encode(baos.toByteArray());
}

Update:

The reason why the output strings in Android and your .NET code don't match is that the .NET GZip implementation does a faster compression (and thus larger output). This can be verified for sure by looking at the raw Base64 decoded byte values:

.NET:

1F8B 0800 0000 0000 0400 EDBD 0760 1C49
9625 262F 6DCA 7B7F 4AF5 4AD7 E074 A108
8060 1324 D890 4010 ECC1 88CD E692 EC1D
6947 2329 AB2A 81CA 6556 655D 6616 40CC
ED9D BCF7 DE7B EFBD F7DE 7BEF BDF7 BA3B
9D4E 27F7 DFFF 3F5C 6664 016C F6CE 4ADA
C99E 2180 AAC8 1F3F 7E7C 1F3F 22E6 7959
56FF 0F86 A610 3605 0000 00

My Android version:

1F8B 0800 0000 0000 0000 CB48 CDC9 C907
0086 A610 3605 0000 00

Now if we check the GZip File Format, we see that both the .NET and Android versions are mostly identical in the initial header and trailing CRC32 & Size fields. The only differences are in the below fields:

  • XFL = 04 (compressor used fastest algorithm) in the case of .NET, whereas it's 00 in Android
  • The actual compressed blocks

So it's clear from the XFL field that the .NET compression algorithm produces longer output.

Infact, when I creates a binary file with these raw data values and then uncompressed them using gunzip, both the .NET and Android versions gave exactly the same output as "hello".

So you don't have to bother about the differing results.


I tried your code in my project, and found a encoding bug in compress method on Android:

byte[] blockcopy = ByteBuffer
        .allocate(4)
        .order(java.nio.ByteOrder.LITTLE_ENDIAN)
        .putInt(str.length())
        .array();
ByteArrayOutputStream os = new ByteArrayOutputStream(str.length());
GZIPOutputStream gos = new GZIPOutputStream(os);
gos.write(str.getBytes());

on above code, u should use the corrected encoding, and fill the bytes length, not the string length:

byte[] data = str.getBytes("UTF-8");

byte[] blockcopy = ByteBuffer
        .allocate(4)
        .order(java.nio.ByteOrder.LITTLE_ENDIAN)
        .putInt(data.length)
            .array();

ByteArrayOutputStream os = new ByteArrayOutputStream( data.length );    
GZIPOutputStream gos = new GZIPOutputStream(os);
gos.write( data );


I got crazy with this issue. At the end, in my case (.Net 4) it was not necessary to add this extra 4 bytes at the begining for the .Net compatibility.

It works simply like this:

Android Compress:

public static byte[] compress(String string) throws IOException {
    ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
    GZIPOutputStream gos = new GZIPOutputStream(os);
    gos.write(string.getBytes());
    gos.close();
    byte[] compressed = os.toByteArray();
    os.close();
    return compressed;
}

.Net Decompress

public static byte[] DecompressViD(byte[] gzip)
    {
        // Create a GZIP stream with decompression mode.
        // ... Then create a buffer and write into while reading from the GZIP stream.
        using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
        {
            const int size = 4096;
            byte[] buffer = new byte[size];
            using (MemoryStream memory = new MemoryStream())
            {
                int count = 0;
                do
                {
                    count = stream.Read(buffer, 0, size);
                    if (count > 0)
                    {
                        memory.Write(buffer, 0, count);
                    }
                }
                while (count > 0);
                return memory.ToArray();
            }
        }
    }


OK, I hate chiming in when there are tons of existing answers, but unfortunately most of them are simply wrong for variety of reasons:

  • There are differences between GZIP algorithms within .NET Framework. If you are using .NET 4.5 most of the complaints you see in different answers simply don't apply to you (rather to those who use 2.0 or 3.5). If you go with "fixed" versions of code you'll actually mess-up compression/decompression.
  • Java uses unsigned byte[], .NET uses signed byte[]. This may cause problems during transport depending on how exactly you are transporting that byte[].
  • I've used Base64 to transport byte[] which can introduce even more problems. There are variety of other reasons, but let's skip further whining and get to the code...

If you are using .NET Framework 4.5 here is C# class you need (Base64 as a bonus):

public class CompressString
{
    private static void CopyTo(Stream src, Stream dest)
    {
        byte[] bytes = new byte[4096];
        int cnt;

        while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0)
        {
            dest.Write(bytes, 0, cnt);
        }
    }

    public static byte[] Zip(string str)
    {
        var bytes = Encoding.UTF8.GetBytes(str);

        using (var msi = new MemoryStream(bytes))
        using (var mso = new MemoryStream())
        {
            using (var gs = new GZipStream(mso, CompressionMode.Compress))
            {
                //msi.CopyTo(gs);
                CopyTo(msi, gs);
            }

            return mso.ToArray();
        }
    }

    public static string Unzip(byte[] bytes)
    {
        using (var msi = new MemoryStream(bytes))
        using (var mso = new MemoryStream())
        {
            using (var gs = new GZipStream(msi, CompressionMode.Decompress))
            {
                //gs.CopyTo(mso);
                CopyTo(gs, mso);
            }

            return Encoding.UTF8.GetString(mso.ToArray());
        }
    }

    // Base64
    public static string ZipBase64(string compress)
    {
        var bytes = Zip(compress);
        var encoded = Convert.ToBase64String(bytes, Base64FormattingOptions.None);
        return encoded;
    }

    public static string UnzipBase64(string compressRequest)
    {
        var bytes = Convert.FromBase64String(compressRequest);
        var unziped = Unzip(bytes);
        return unziped;
    }

    // Testing
    public static bool TestZip(String stringToTest)
    {
        byte[] compressed = Zip(stringToTest);
        Debug.WriteLine("Compressed to " + compressed.Length + " bytes");
        String decompressed = Unzip(compressed);
        Debug.WriteLine("Decompressed to: " + decompressed);

        return stringToTest == decompressed;
    }
}

And here is Android/Java class you need:

public class CompressString {
    public static byte[] compress(String string) {
        try {
            ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
            GZIPOutputStream gos = new GZIPOutputStream(os);
            gos.write(string.getBytes());
            gos.close();
            byte[] compressed = os.toByteArray();
            os.close();
            return compressed;
        } catch (IOException ex) {
            return null;
        }
    }

    public static String decompress(byte[] compressed) {
        try {
            final int BUFFER_SIZE = 32;
            ByteArrayInputStream is = new ByteArrayInputStream(compressed);
            GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE);
            byte[] data = new byte[BUFFER_SIZE];
            int bytesRead;
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            while ((bytesRead = gis.read(data)) != -1) {
                baos.write(data, 0, bytesRead);
            }
            gis.close();
            is.close();
            return baos.toString("UTF-8");
        } catch (IOException ex) {
            return null;
        }
    }    

    // Base64
    public static String compressBase64(String strToCompress) {
        byte[] compressed = compress(strToCompress);
        String encoded = android.util.Base64.encodeToString(compressed, android.util.Base64.NO_WRAP);
        return encoded;
    }

    public static String decompressBase64(String strEncoded) {
        byte[] decoded = android.util.Base64.decode(strEncoded, android.util.Base64.NO_WRAP);
        String decompressed = decompress(decoded);
        return decompressed;
    }


    // test
    public static boolean testCompression(String stringToTest) {
        byte[] compressed = compress(stringToTest);
        Log.d("compress-test", "Compressed to " + compressed.length + " bytes");
        String decompressed = decompress(compressed);
        Log.d("compress-test", "Decompressed to " + decompressed);

        return stringToTest.equals(decompressed);
    }
}

So, there you go - dependency free, 100% working compression Android/Java/C#/.NET classes. If you find string that's not working with .NET 4.5 (I've tried everything from "Hello world" to 1000 word short story) - let me know.


Android method decompress not ok

Android Compress -> OK:

public static byte[] compress(String string) throws IOException {
    ByteArrayOutputStream os = new ByteArrayOutputStream(string.length());
    GZIPOutputStream gos = new GZIPOutputStream(os);
    gos.write(string.getBytes());
    gos.close();
    byte[] compressed = os.toByteArray();
    os.close();
    return compressed;
}

.Net Decompress -> OK:

public static byte[] DecompressViD(byte[] gzip)
{
    // Create a GZIP stream with decompression mode.
    // ... Then create a buffer and write into while reading from the GZIP stream.
    using (GZipStream stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
    {
        const int size = 4096;
        byte[] buffer = new byte[size];
        using (MemoryStream memory = new MemoryStream())
        {
            int count = 0;
            do
            {
                count = stream.Read(buffer, 0, size);
                if (count > 0)
                {
                    memory.Write(buffer, 0, count);
                }
            }
            while (count > 0);
            return memory.ToArray();
        }
    }
}

.Net Compress -> OK:

    public static string compress(string text)
    {
        byte[] buffer = Encoding.UTF8.GetBytes(text);
        MemoryStream ms = new MemoryStream();
        using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true))
        {
            zip.Write(buffer, 0, buffer.Length);
        }

        ms.Position = 0;
        MemoryStream outStream = new MemoryStream();

        byte[] compressed = new byte[ms.Length];
        ms.Read(compressed, 0, compressed.Length);

        return Convert.ToBase64String(compressed);
    }

Android Decompress -> Not OK:

public static String decompress(String zipText) throws IOException {
    byte[] compressed = Base64.decode(zipText);

    GZIPInputStream os = new GZIPInputStream(new ByteArrayInputStream(compressed));

    GZIPInputStream gzipInputStream = new GZIPInputStream(os);

    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    for (int value = 0; value != -1;) {
        value = gzipInputStream.read();
        if (value != -1) {
            baos.write(value);
        }
    }
    gzipInputStream.close();
    baos.close();

    return new String(baos.toByteArray(), "UTF-8");
}


Here's a simple example to get you started.

public static void main(String[] args) throws IOException 
{
    byte[] buffer = new byte[4096];
    StringBuilder sb = new StringBuilder();

    //read file to compress

    String read = readFile( "spanish.xml", Charset.defaultCharset());

    if( read != null )
    {
        //compress file to output

        FileOutputStream fos = new FileOutputStream("spanish-new.xml");
        GZIPOutputStream gzos = new GZIPOutputStream(fos);
        gzos.write( read.getBytes());
        gzos.close();

        //uncompress and read back

        FileInputStream fis = new FileInputStream("spanish-new.xml");
        GZIPInputStream gzis = new GZIPInputStream(fis);

        int bytes = 0;

        while ((bytes = gzis.read(buffer)) != -1) {
            sb.append( new String( buffer ) );
        }
    }
}

static String readFile(String path, Charset encoding) throws IOException {
    byte[] encoded = Files.readAllBytes(Paths.get(path));
    return new String(encoded, encoding);
}


This might be late but could be useful for someone, I recently had a requirement to compress a string in C# Xamarin and decompress it in Android. Basically a Xamarin android app sends another Native Android app an intent with a compressed extra string. And the Android app has to decompress it before using it.

These are the methods that worked for me.

ANDROID DECOMPRESS

 public static String decompress(String zipText) throws IOException {
    int size = 0;
    byte[] gzipBuff = Base64.decode(zipText,Base64.DEFAULT);
    ByteArrayInputStream memstream = new ByteArrayInputStream(gzipBuff, 4,gzipBuff.length - 4);
    GZIPInputStream gzin = new GZIPInputStream(memstream);
    final int buffSize = 8192; 
   byte[] tempBuffer = new byte[buffSize];
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
  while ((size = gzin.read(tempBuffer, 0, buffSize)) != -1) { 
   baos.write(tempBuffer, 0, size);
  } byte[] buffer = baos.toByteArray();
  baos.close(); return new String(buffer, StandardCharsets.UTF_8);
  }

XAMARIN COMPRESS

public static string CompressString(string text)
    {
        byte[] buffer = Encoding.UTF8.GetBytes(text);
        var memoryStream = new MemoryStream();
        using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress, true))
        {
            gZipStream.Write(buffer, 0, buffer.Length);
        }
        memoryStream.Position = 0;
        var compressedData = new byte[memoryStream.Length];
        memoryStream.Read(compressedData, 0, compressedData.Length);
        var gZipBuffer = new byte[compressedData.Length + 4];
        Buffer.BlockCopy(compressedData, 0, gZipBuffer, 4, compressedData.Length);
        Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gZipBuffer, 0, 4);
        return Convert.ToBase64String(gZipBuffer);
    }


I do it so in Vb.net:

  Public Function zipString(ByVal Text As String) As String
    Dim res As String = ""
    Try

        Dim buffer As Byte() = System.Text.Encoding.UTF8.GetBytes(Text)
        Dim ms As New MemoryStream()
        Using zipStream As New System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Compress, True)
            zipStream.Write(buffer, 0, buffer.Length)
        End Using
        ms.Position = 0
        Dim outStream As New MemoryStream()
        Dim compressed As Byte() = New Byte(ms.Length - 1) {}
        ms.Read(compressed, 0, compressed.Length)
        Dim gzBuffer As Byte() = New Byte(compressed.Length + 3) {}
        System.Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length)
        System.Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4)
        res = Convert.ToBase64String(gzBuffer)
    Catch ex As Exception
        Log("mdl.zipString: " & ex.Message)
    End Try
    Return res
End Function

Public Function unzipString(ByVal compressedText As String) As String
    Dim res As String = ""
    Try
        Dim gzBuffer As Byte() = Convert.FromBase64String(compressedText)
        Using ms As New MemoryStream()
            Dim msgLength As Integer = BitConverter.ToInt32(gzBuffer, 0)
            ms.Write(gzBuffer, 4, gzBuffer.Length - 4)
            Dim buffer As Byte() = New Byte(msgLength - 1) {}
            ms.Position = 0
            Using zipStream As New System.IO.Compression.GZipStream(ms, System.IO.Compression.CompressionMode.Decompress)
                zipStream.Read(buffer, 0, buffer.Length)
            End Using
            res = System.Text.Encoding.UTF8.GetString(buffer, 0, buffer.Length)
        End Using
    Catch ex As Exception
        Log("mdl.unzipString: " & ex.Message)
    End Try
    Return res
End Function
0

精彩评论

暂无评论...
验证码 换一张
取 消