phobos/std/zlib.d

654 lines
16 KiB
D

// Written in the D programming language.
/**
* Compress/decompress data using the $(WEB www._zlib.net, zlib library).
*
* References:
* $(WEB en.wikipedia.org/wiki/Zlib, Wikipedia)
*
* Macros:
* WIKI = Phobos/StdZlib
*
* Copyright: Copyright Digital Mars 2000 - 2011.
* License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
* Authors: $(WEB digitalmars.com, Walter Bright)
* Source: $(PHOBOSSRC std/_zlib.d)
*/
/* Copyright Digital Mars 2000 - 2011.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
module std.zlib;
//debug=zlib; // uncomment to turn on debugging printf's
private import etc.c.zlib, std.conv;
// Values for 'mode'
enum
{
Z_NO_FLUSH = 0,
Z_SYNC_FLUSH = 2,
Z_FULL_FLUSH = 3,
Z_FINISH = 4,
}
/*************************************
* Errors throw a ZlibException.
*/
class ZlibException : Exception
{
this(int errnum)
{ string msg;
switch (errnum)
{
case Z_STREAM_END: msg = "stream end"; break;
case Z_NEED_DICT: msg = "need dict"; break;
case Z_ERRNO: msg = "errno"; break;
case Z_STREAM_ERROR: msg = "stream error"; break;
case Z_DATA_ERROR: msg = "data error"; break;
case Z_MEM_ERROR: msg = "mem error"; break;
case Z_BUF_ERROR: msg = "buf error"; break;
case Z_VERSION_ERROR: msg = "version error"; break;
default: msg = "unknown error"; break;
}
super(msg);
}
}
/**************************************************
* Compute the Adler32 checksum of the data in buf[]. adler is the starting
* value when computing a cumulative checksum.
*/
uint adler32(uint adler, const(void)[] buf)
{
import std.range : chunks;
foreach(chunk; (cast(ubyte[])buf).chunks(0xFFFF0000))
{
adler = etc.c.zlib.adler32(adler, chunk.ptr, cast(uint)chunk.length);
}
return adler;
}
unittest
{
static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
uint adler;
debug(zlib) printf("D.zlib.adler32.unittest\n");
adler = adler32(0u, cast(void[])data);
debug(zlib) printf("adler = %x\n", adler);
assert(adler == 0xdc0037);
}
/*********************************
* Compute the CRC32 checksum of the data in buf[]. crc is the starting value
* when computing a cumulative checksum.
*/
uint crc32(uint crc, const(void)[] buf)
{
import std.range : chunks;
foreach(chunk; (cast(ubyte[])buf).chunks(0xFFFF0000))
{
crc = etc.c.zlib.crc32(crc, chunk.ptr, cast(uint)chunk.length);
}
return crc;
}
unittest
{
static ubyte[] data = [1,2,3,4,5,6,7,8,9,10];
uint crc;
debug(zlib) printf("D.zlib.crc32.unittest\n");
crc = crc32(0u, cast(void[])data);
debug(zlib) printf("crc = %x\n", crc);
assert(crc == 0x2520577b);
}
/*********************************************
* Compresses the data in srcbuf[] using compression _level level.
* The default value
* for level is 6, legal values are 1..9, with 1 being the least compression
* and 9 being the most.
* Returns the compressed data.
*/
const(void)[] compress(const(void)[] srcbuf, int level)
in
{
assert(-1 <= level && level <= 9);
}
body
{
auto destlen = srcbuf.length + ((srcbuf.length + 1023) / 1024) + 12;
auto destbuf = new ubyte[destlen];
auto err = etc.c.zlib.compress2(destbuf.ptr, &destlen, cast(ubyte *)srcbuf.ptr, srcbuf.length, level);
if (err)
{ delete destbuf;
throw new ZlibException(err);
}
destbuf.length = destlen;
return destbuf;
}
/*********************************************
* ditto
*/
const(void)[] compress(const(void)[] buf)
{
return compress(buf, Z_DEFAULT_COMPRESSION);
}
/*********************************************
* Decompresses the data in srcbuf[].
* Params:
* srcbuf = buffer containing the compressed data.
* destlen = size of the uncompressed data.
* It need not be accurate, but the decompression will be faster
* if the exact size is supplied.
* winbits = the base two logarithm of the maximum window size.
* Returns: the decompressed data.
*/
void[] uncompress(void[] srcbuf, size_t destlen = 0u, int winbits = 15)
{
int err;
ubyte[] destbuf;
if (!destlen)
destlen = srcbuf.length * 2 + 1;
etc.c.zlib.z_stream zs;
zs.next_in = cast(typeof(zs.next_in)) srcbuf;
zs.avail_in = to!uint(srcbuf.length);
err = etc.c.zlib.inflateInit2(&zs, winbits);
if (err)
{
throw new ZlibException(err);
}
size_t olddestlen = 0u;
loop:
while (true)
{
destbuf.length = destlen;
zs.next_out = cast(typeof(zs.next_out)) &destbuf[olddestlen];
zs.avail_out = to!uint(destlen - olddestlen);
olddestlen = destlen;
err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
switch (err)
{
case Z_OK:
destlen = destbuf.length * 2;
continue loop;
case Z_STREAM_END:
destbuf.length = zs.total_out;
err = etc.c.zlib.inflateEnd(&zs);
if (err != Z_OK)
throw new ZlibException(err);
return destbuf;
default:
etc.c.zlib.inflateEnd(&zs);
throw new ZlibException(err);
}
}
assert(0);
}
unittest
{
ubyte[] src = cast(ubyte[])
"the quick brown fox jumps over the lazy dog\r
the quick brown fox jumps over the lazy dog\r
";
ubyte[] dst;
ubyte[] result;
//arrayPrint(src);
dst = cast(ubyte[])compress(cast(void[])src);
//arrayPrint(dst);
result = cast(ubyte[])uncompress(cast(void[])dst);
//arrayPrint(result);
assert(result == src);
}
unittest
{
ubyte[] src = new ubyte[1000000];
ubyte[] dst;
ubyte[] result;
src[] = 0x80;
dst = cast(ubyte[])compress(cast(void[])src);
assert(dst.length*2 + 1 < src.length);
result = cast(ubyte[])uncompress(cast(void[])dst);
assert(result == src);
}
/+
void arrayPrint(ubyte[] array)
{
//printf("array %p,%d\n", cast(void*)array, array.length);
for (size_t i = 0; i < array.length; i++)
{
printf("%02x ", array[i]);
if (((i + 1) & 15) == 0)
printf("\n");
}
printf("\n\n");
}
+/
/// the header format the compressed stream is wrapped in
enum HeaderFormat {
deflate, /// a standard zlib header
gzip, /// a gzip file format header
determineFromData /// used when decompressing. Try to automatically detect the stream format by looking at the data
}
/*********************************************
* Used when the data to be compressed is not all in one buffer.
*/
class Compress
{
private:
z_stream zs;
int level = Z_DEFAULT_COMPRESSION;
int inited;
immutable bool gzip;
void error(int err)
{
if (inited)
{ deflateEnd(&zs);
inited = 0;
}
throw new ZlibException(err);
}
public:
/**
* Construct. level is the same as for D.zlib.compress(). header can be used to make a gzip compatible stream.
*/
this(int level, HeaderFormat header = HeaderFormat.deflate)
in
{
assert(1 <= level && level <= 9);
}
body
{
this.level = level;
this.gzip = header == HeaderFormat.gzip;
}
/// ditto
this(HeaderFormat header = HeaderFormat.deflate)
{
this.gzip = header == HeaderFormat.gzip;
}
~this()
{ int err;
if (inited)
{
inited = 0;
deflateEnd(&zs);
}
}
/**
* Compress the data in buf and return the compressed data.
* The buffers
* returned from successive calls to this should be concatenated together.
*/
const(void)[] compress(const(void)[] buf)
{ int err;
ubyte[] destbuf;
if (buf.length == 0)
return null;
if (!inited)
{
err = deflateInit2(&zs, level, Z_DEFLATED, 15 + (gzip ? 16 : 0), 8, Z_DEFAULT_STRATEGY);
if (err)
error(err);
inited = 1;
}
destbuf = new ubyte[zs.avail_in + buf.length];
zs.next_out = destbuf.ptr;
zs.avail_out = to!uint(destbuf.length);
if (zs.avail_in)
buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
zs.next_in = cast(typeof(zs.next_in)) buf.ptr;
zs.avail_in = to!uint(buf.length);
err = deflate(&zs, Z_NO_FLUSH);
if (err != Z_STREAM_END && err != Z_OK)
{ delete destbuf;
error(err);
}
destbuf.length = destbuf.length - zs.avail_out;
return destbuf;
}
/***
* Compress and return any remaining data.
* The returned data should be appended to that returned by compress().
* Params:
* mode = one of the following:
* $(DL
$(DT Z_SYNC_FLUSH )
$(DD Syncs up flushing to the next byte boundary.
Used when more data is to be compressed later on.)
$(DT Z_FULL_FLUSH )
$(DD Syncs up flushing to the next byte boundary.
Used when more data is to be compressed later on,
and the decompressor needs to be restartable at this
point.)
$(DT Z_FINISH)
$(DD (default) Used when finished compressing the data. )
)
*/
void[] flush(int mode = Z_FINISH)
in
{
assert(mode == Z_FINISH || mode == Z_SYNC_FLUSH || mode == Z_FULL_FLUSH);
}
body
{
ubyte[] destbuf;
ubyte[512] tmpbuf = void;
int err;
if (!inited)
return null;
/* may be zs.avail_out+<some constant>
* zs.avail_out is set nonzero by deflate in previous compress()
*/
//tmpbuf = new void[zs.avail_out];
zs.next_out = tmpbuf.ptr;
zs.avail_out = tmpbuf.length;
while( (err = deflate(&zs, mode)) != Z_STREAM_END)
{
if (err == Z_OK)
{
if (zs.avail_out != 0 && mode != Z_FINISH)
break;
else if(zs.avail_out == 0)
{
destbuf ~= tmpbuf;
zs.next_out = tmpbuf.ptr;
zs.avail_out = tmpbuf.length;
continue;
}
err = Z_BUF_ERROR;
}
delete destbuf;
error(err);
}
destbuf ~= tmpbuf[0 .. (tmpbuf.length - zs.avail_out)];
if (mode == Z_FINISH)
{
err = deflateEnd(&zs);
inited = 0;
if (err)
error(err);
}
return destbuf;
}
}
/******
* Used when the data to be decompressed is not all in one buffer.
*/
class UnCompress
{
private:
z_stream zs;
int inited;
int done;
size_t destbufsize;
HeaderFormat format;
void error(int err)
{
if (inited)
{ inflateEnd(&zs);
inited = 0;
}
throw new ZlibException(err);
}
public:
/**
* Construct. destbufsize is the same as for D.zlib.uncompress().
*/
this(uint destbufsize)
{
this.destbufsize = destbufsize;
}
/** ditto */
this(HeaderFormat format = HeaderFormat.determineFromData)
{
this.format = format;
}
~this()
{ int err;
if (inited)
{
inited = 0;
inflateEnd(&zs);
}
done = 1;
}
/**
* Decompress the data in buf and return the decompressed data.
* The buffers returned from successive calls to this should be concatenated
* together.
*/
const(void)[] uncompress(const(void)[] buf)
in
{
assert(!done);
}
body
{ int err;
ubyte[] destbuf;
if (buf.length == 0)
return null;
if (!inited)
{
int windowBits = 15;
if(format == HeaderFormat.gzip)
windowBits += 16;
else if(format == HeaderFormat.determineFromData)
windowBits += 32;
err = inflateInit2(&zs, windowBits);
if (err)
error(err);
inited = 1;
}
if (!destbufsize)
destbufsize = to!uint(buf.length) * 2;
destbuf = new ubyte[zs.avail_in * 2 + destbufsize];
zs.next_out = destbuf.ptr;
zs.avail_out = to!uint(destbuf.length);
if (zs.avail_in)
buf = zs.next_in[0 .. zs.avail_in] ~ cast(ubyte[]) buf;
zs.next_in = cast(ubyte*) buf;
zs.avail_in = to!uint(buf.length);
err = inflate(&zs, Z_NO_FLUSH);
if (err != Z_STREAM_END && err != Z_OK)
{ delete destbuf;
error(err);
}
destbuf.length = destbuf.length - zs.avail_out;
return destbuf;
}
/**
* Decompress and return any remaining data.
* The returned data should be appended to that returned by uncompress().
* The UnCompress object cannot be used further.
*/
void[] flush()
in
{
assert(!done);
}
out
{
assert(done);
}
body
{
ubyte[] extra;
ubyte[] destbuf;
int err;
done = 1;
if (!inited)
return null;
L1:
destbuf = new ubyte[zs.avail_in * 2 + 100];
zs.next_out = destbuf.ptr;
zs.avail_out = to!uint(destbuf.length);
err = etc.c.zlib.inflate(&zs, Z_NO_FLUSH);
if (err == Z_OK && zs.avail_out == 0)
{
extra ~= destbuf;
goto L1;
}
if (err != Z_STREAM_END)
{
delete destbuf;
if (err == Z_OK)
err = Z_BUF_ERROR;
error(err);
}
destbuf = destbuf.ptr[0 .. zs.next_out - destbuf.ptr];
err = etc.c.zlib.inflateEnd(&zs);
inited = 0;
if (err)
error(err);
if (extra.length)
destbuf = extra ~ destbuf;
return destbuf;
}
}
/* ========================== unittest ========================= */
private import std.stdio;
private import std.random;
unittest // by Dave
{
debug(zlib) writeln("std.zlib.unittest");
bool CompressThenUncompress (ubyte[] src)
{
ubyte[] dst = cast(ubyte[])std.zlib.compress(cast(void[])src);
double ratio = (dst.length / cast(double)src.length);
debug(zlib) writef("src.length: %1$d, dst: %2$d, Ratio = %3$f", src.length, dst.length, ratio);
ubyte[] uncompressedBuf;
uncompressedBuf = cast(ubyte[])std.zlib.uncompress(cast(void[])dst);
assert(src.length == uncompressedBuf.length);
assert(src == uncompressedBuf);
return true;
}
// smallish buffers
for(int idx = 0; idx < 25; idx++) {
char[] buf = new char[uniform(0, 100)];
// Alternate between more & less compressible
foreach(ref char c; buf)
c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 2)));
if(CompressThenUncompress(cast(ubyte[])buf)) {
debug(zlib) writeln("; Success.");
} else {
return;
}
}
// larger buffers
for(int idx = 0; idx < 25; idx++) {
char[] buf = new char[uniform(0, 1000/*0000*/)];
// Alternate between more & less compressible
foreach(ref char c; buf)
c = cast(char) (' ' + (uniform(0, idx % 2 ? 91 : 10)));
if(CompressThenUncompress(cast(ubyte[])buf)) {
debug(zlib) writefln("; Success.");
} else {
return;
}
}
debug(zlib) writefln("PASSED std.zlib.unittest");
}
unittest // by Artem Rebrov
{
Compress cmp = new Compress;
UnCompress decmp = new UnCompress;
const(void)[] input;
input = "tesatdffadf";
const(void)[] buf = cmp.compress(input);
buf ~= cmp.flush();
const(void)[] output = decmp.uncompress(buf);
//writefln("input = '%s'", cast(char[])input);
//writefln("output = '%s'", cast(char[])output);
assert( output[] == input[] );
}