File compression with zlib without saving to disk and send via socket - c++

I have a server that receives client files, but the transfer in external networks is too slow, reading a bit i found the solution in file compression. I decided to use zlib for compression, i did a search in some examples in the documentation and below follows two I would like to use in my project. My question is, how to use the following examples to compress a file without saving to disk and send via socket (client-side). And receive and decompress the file (server side).
The client application run on Windows, and server application run on Linux.
Client Side:
int def(FILE *source, FILE *dest, int level)
{
int ret, flush;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate deflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit(&strm, level);
if (ret != Z_OK)
return ret;
/* compress until end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
strm.next_in = in;
/* run deflate() on input until output buffer not full, finish
compression if all of source has been read in */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = deflate(&strm, flush); /* no bad return value */
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
assert(strm.avail_in == 0); /* all input will be used */
/* done when last data in file processed */
} while (flush != Z_FINISH);
assert(ret == Z_STREAM_END); /* stream will be complete */
/* clean up and return */
(void)deflateEnd(&strm);
return Z_OK;
}
Server Side:
int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK)
return ret;
/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
/* clean up and return */
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}

One way is to use a Boost Iostream compressor (they support zlib, gzip, bzip2 out of the box) and an ip::tcp::iostream socket from Boost Asio. Something like:
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/asio/ip/tcp.hpp>
int main() {
boost::asio::ip::tcp::iostream connection;
boost::iostreams::filtering_stream<boost::iostreams::input> connection_reader;
connection_reader.push(boost::iostreams::zlib_decompressor());
connection_reader.push(connection);
boost::iostreams::filtering_stream<boost::iostreams::output> connection_writer;
connection_writer.push(boost::iostreams::zlib_compressor());
connection_writer.push(connection);
auto const url = "127.0.0.1";
connection.connect(url, "http");
// Send.
connection_writer << "hello there\n";
// Receive.
for(std::string line; getline(connection_reader, line);) {
// Process line.
}
}

Related

zlib decompression return -3 (z_data_error)

The zlib uncompress() return -3 (z_data_error) when I decompress data.
From doc: returns Z_DATA_ERROR if the input data was corrupted or incomplete,
uncompress((Bytef*)uncompressbuffer, &uncompressbuffersize, (const Bytef*)compressbuffer, &compressbuffersize)
In another application, where I use deflate/inflate I get the same error.
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = inputLength;
strm.next_in = (unsigned char*) inputBuffer;
ret = inflateInit(&strm);
if (ret != Z_OK)
{
delete[] uncompressedData;
return ERROR;
}
/******************************************************/
strm.avail_out = unusedData;
strm.next_out = (uncompressedData + MIN_CHUNK) - unusedData;
/* run inflate() on input until output buffer not full */
do {
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret)
{
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
} while (strm.avail_out != 0 && ret == Z_OK);
but
this error happens only with x64 version of my software. A x86 working properly. The unzipped data is intact. The buffer size of compressed and uncompressed data are correct.
Zlib is correctly compiled to x64.
What else could be causing this problem? Any hint?
Sample code with "uncompress":
#include <iostream>
#include <fstream>
#include <cstdio>
#include <vector>
#include <zlib.h>
#include <assert.h>
#include <cstdlib>
#define CHUNK 16384
const int BUFFERSIZE = 4096;
using namespace std;
void compress(FILE* fin, FILE* fout) {
char buffer[BUFFERSIZE];
int byte_read = fread(buffer, sizeof(char), BUFFERSIZE, fin);
z_stream strm;
int ret;
unsigned have;
unsigned char* tmp = new unsigned char[CHUNK];
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = (unsigned char*)buffer;
strm.avail_in = byte_read;
strm.next_out = tmp;
strm.avail_out = CHUNK;
ret = deflateInit(&strm, Z_DEFAULT_COMPRESSION);
//first loop: compress input data stream and write on RTDB file
do
{
ret = deflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
have = BUFFERSIZE - strm.avail_out;
fwrite(tmp, sizeof(char), BUFFERSIZE, fout);
} while (strm.avail_out == 0);
//assert(strm.avail_in == 0);
//second loop: all input data consumed. Flush everything...
do
{
strm.next_out = tmp;
strm.avail_out = BUFFERSIZE;
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
have = BUFFERSIZE - strm.avail_out;
fwrite(tmp, sizeof(char), BUFFERSIZE, fout);
} while (ret != Z_STREAM_END);
(void)deflateEnd(&strm);
delete tmp;
}
void decompress(FILE* fin, FILE* fout) {
int status;
char buffer[BUFFERSIZE];
int byte_read = fread(buffer, sizeof(char), BUFFERSIZE, fin);
void* compressedBuffer;
void* uncompressedBuffer;
uLongf compressedBufferSize = BUFFERSIZE;
uLongf uncompressedBufferSize = BUFFERSIZE;
compressedBuffer = malloc(compressedBufferSize);
uncompressedBuffer = malloc(uncompressedBufferSize);
status = uncompress((Bytef*)uncompressedBuffer, &uncompressedBufferSize, (const Bytef*)buffer, compressedBufferSize);
fwrite(uncompressedBuffer, sizeof(char), BUFFERSIZE, fout);
cout << "Status " << status << endl;
}
int main(int argc, char *argv[]) {
//if (argc == 2)
//{
// if (strcmp(argv[1], "/?") == 0 || strcmp(argv[1], "--help") == 0)
// {
// cout << "Please give me 1 argument" << endl;
// //getchar();
// return -1;
// }
//}
//else
//{
// cout << "Please give me 1 argument" << endl;
// //getchar();
// return -1;
//}
//char *inputdata = argv[1];
//const char *inputdata = "C:\\Users\\Francesco\\source\\repos\\zlibtest\\P0000P0000_no_com-alt.rtdb";
const char *inputdata = "C:\\Users\\Francesco\\source\\repos\\zlibtest\\AAA.txt";
//const char *inputdata = "C:\\Users\\Francesco\\source\\repos\\zlibtest\\P0000P0000_no_com-alt.rtdb";
cout << inputdata << endl;
FILE *fin, *fout, *fdec;
fopen_s(&fin, inputdata, "r+");
fopen_s(&fout, "output.txt", "w+");
compress(fin, fout);
fclose(fin);
fclose(fout);
fopen_s(&fout, "output.txt", "r");
fopen_s(&fdec, "dec.txt", "w");
decompress(fout, fdec);
fclose(fout);
fclose(fdec);
}
Your first problem is that as you are using windows you must open the compressed file in binary mode otherwise it will be corrupted:
fopen_s(&fout, "output.txt", "w+b");
fopen_s(&fout, "output.txt", "rb");
if the file you are compressing isn't text or if it is text and you want to perfectly preserve it you should also open the input and dec files in binary mode.
Next in compress you confuse BUFFERSIZE and CHUNK, have = BUFFERSIZE - strm.avail_out; should be have = CHUNK - strm.avail_out;, you then need to pass have to fwrite: fwrite(tmp, sizeof(char), have, fout);.
In decompress you need to pass uncompressedBufferSize to fwrite instead of BUFFERSIZE.
Fully working code (with some additional changes to fix memory leaks):
#include <iostream>
#include <fstream>
#include <cstdio>
#include <vector>
#include <zlib.h>
#include <assert.h>
#include <cstdlib>
#include <array>
const size_t CHUNK_SIZE = 16384;
const size_t BUFFER_SIZE = 4096;
void compress(FILE* fin, FILE* fout) {
std::array<Bytef, BUFFER_SIZE> buffer;
int byte_read = fread(buffer.data(), sizeof(char), buffer.size(), fin);
z_stream strm;
int ret;
unsigned have;
std::vector<Bytef> tmp(CHUNK_SIZE);
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = buffer.data();
strm.avail_in = byte_read;
strm.next_out = tmp.data();
strm.avail_out = tmp.size();
ret = deflateInit(&strm, Z_DEFAULT_COMPRESSION);
//first loop: compress input data stream and write on RTDB file
do
{
ret = deflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
have = tmp.size() - strm.avail_out;
fwrite(tmp.data(), sizeof(char), have, fout);
} while (strm.avail_out == 0);
//assert(strm.avail_in == 0);
//second loop: all input data consumed. Flush everything...
do
{
strm.next_out = tmp.data();
strm.avail_out = tmp.size();
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
have = tmp.size() - strm.avail_out;
fwrite(tmp.data(), sizeof(char), have, fout);
} while (ret != Z_STREAM_END);
(void)deflateEnd(&strm);
}
void decompress(FILE* fin, FILE* fout) {
int status;
std::array<Bytef, BUFFER_SIZE> compressedBuffer;
std::array<Bytef, BUFFER_SIZE> uncompressedBuffer;
int byte_read = fread(compressedBuffer.data(), sizeof(char), compressedBuffer.size(), fin);
uLongf compressedBufferSize = compressedBuffer.size();
uLongf uncompressedBufferSize = uncompressedBuffer.size();
status = uncompress(uncompressedBuffer.data(), &uncompressedBufferSize, compressedBuffer.data(), compressedBufferSize);
fwrite(uncompressedBuffer.data(), sizeof(char), uncompressedBufferSize, fout);
std::cout << "Status " << status << "\n";
}
int main(int argc, char* argv[]) {
const char* inputdata = "C:\\Users\\alan\\source\\repos\\ConanScratch\\main.cpp";
std::cout << inputdata << "\n";
FILE* fin, * fout, * fdec;
fopen_s(&fin, inputdata, "r+b");
if (!fin)
{
std::cout << "unable to open input\n";
return -1;
}
fopen_s(&fout, "output.txt", "w+b");
if (!fout)
{
std::cout << "unable to open output\n";
return -1;
}
compress(fin, fout);
fclose(fin);
fclose(fout);
fopen_s(&fout, "output.txt", "r+b");
if (!fout)
{
std::cout << "unable to open output\n";
return -1;
}
fopen_s(&fdec, "dec.txt", "wb");
if (!fdec)
{
std::cout << "unable to open dec\n";
return -1;
}
decompress(fout, fdec);
fclose(fout);
fclose(fdec);
}
Your code can be greatly simplified with boost::iostreams:
#include <iostream>
#include <fstream>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filter/zlib.hpp>
int main(int argc, char* argv[])
{
{
std::ifstream input("C:\\Users\\alan\\source\\repos\\ConanScratch\\main.cpp", std::ios_base::binary);
boost::iostreams::filtering_ostream output;
output.push(boost::iostreams::zlib_compressor{});
output.push(boost::iostreams::file_sink("output.txt", std::ios_base::out | std::ios_base::binary));
output << input.rdbuf();
}
{
boost::iostreams::filtering_istream input;
input.push(boost::iostreams::zlib_decompressor{});
input.push(boost::iostreams::file_source("output.txt", std::ios_base::in | std::ios_base::binary));
std::ofstream output("dec.txt", std::ios_base::binary);
output << input.rdbuf();
}
}

How do I use Zlib with concatenated .gz files in winAPI?

I am downloading common crawl files from AWS. Apparently, they are large concatenated .gz files, which is supported by the gzip standard. I am using zlib to deflate but I only get the decompressed contents of the file up to the first concatenation. I have tried adding inflateReset() but then I get error -5, which indicates a buffer or file problem. I suspect I am close.
here's the code without inflateReset. It works fine on non-concatenated files.
#include "zlib.h"
#define CHUNK 16384
...
file = L"CC-MAIN-20181209185547-20181209211547-00040.warc.wet.gz";
fileDecompress(&file);
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring destPath = dir + lpFileName->c_str() + L".wet";
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
// Open input file for reading, existing file only.
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str());
return 0;
}
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str());
return 0;
}
// Get compressed file size.
Success = GetFileSizeEx(InputFile, &FileSize);
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
}
have = CHUNK - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, out, have, &written, NULL);
if (written != have || !res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"file write error:%d\n", res);
return 0;
}
} while (strm.avail_out == 0); // avail_out == 0 means output buffer is full
} while (ret != Z_STREAM_END); /* done when inflate() says it's done */ // Z_STREAM_END is 1
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
return 0;
}
Here's the version with the inflateReset() added. this version causes inflate to generate error -5 (bad buffer or truncated file).
...
int ret;
z_stream strm{};
array<uint8_t, CHUNK> scratch = {}; //scratch buffer for decompressing the data.
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
sendToReportWindow(L"strm.avail_in:%d\n", strm.avail_in); // strm.avail_in = 0
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = scratch.size();
strm.next_out = scratch.data();
ret = inflate(&strm, Z_NO_FLUSH);
//if (ret != Z_OK) break; // 0
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_STREAM_ERROR: // -2
sendToReportWindow(L"Z_STREAM_ERROR:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
//return 0;
break;
}
auto bytes_decoded = scratch.size() - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, &scratch, bytes_decoded, &written, NULL);
if (ret == Z_STREAM_END) break;
} while (true); // avail_out == 0 means output buffer is full
ret == Z_STREAM_END;
auto reset_result = inflateReset(&strm); // work with concatenation
sendToReportWindow(L"resetting inflate: %d\n", reset_result);
assert(reset_result == Z_OK);
} while (strm.avail_in > 0);
...
Thank you!
update: I think readFile should read in CHUNK instead of 1. changed for both examples. This now gives me error -3: "Z_DATA_ERROR". checking to see if this change is now actually hitting readfile too many times.
typical file I want to deflate: [https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-51/segments/1544376823009.19/wet/CC-MAIN-20181209185547-20181209211547-00041.warc.wet.gz]
update 2:
Thank you Mark Adler! using the example you provided, I was able to fix the logic in my code. this satisfies the winAPI requirement. I also added file ext handling, moved things to heap and added a timer. The timer revealed that more memory helped reduce deflate time by 30%.
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
// zlib does not work with .zip files
sendToReportWindow(L"inside fileDecompress()\n");
// deflate .gz (gzip) files. single or multiple member (concatenated)
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring::size_type lastdot = lpFileName->find_last_of(L"."); // remove .gz extension: get length to last dot and truncate
lpFileName->resize(lastdot);
wstring destPath = dir + lpFileName->c_str();
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
double InflateTime;
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str()); return 0; }
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str()); return 0; }
Success = GetFileSizeEx(InputFile, &FileSize); // Get compressed file size.
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
StartTime = GetTickCount64();
#define CHUNK 524288 // buffer size. doesn't use much ram and speeds up inflate
z_stream strm = {}; // Initialize zlib for file compression/decompression
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
unsigned char *in = new unsigned char[CHUNK]; unsigned char* out = new unsigned char[CHUNK];
for (;;) { // Decompress from input to output.
if (strm.avail_in == 0) { // Keep reading until the end of the input file or an error
DWORD read;
(void)ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
do { // Decompress all of what's in the CHUNK in buffer.
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH); // Decompress as much as possible to the CHUNK out buffer.
size_t got = CHUNK - strm.avail_out;
DWORD written;
(void)WriteFile(OutputFile, out, got, &written, NULL); // Write to the outputFile whatever inflate() left in out buffer
if (written != got) {sendToReportWindow(L"file write error\n"); delete[] in; delete[] out; return 0;}
if (ret == Z_STREAM_END) // Check for the end of a gzip member, in which case,
assert(inflateReset(&strm) == Z_OK); // reset inflate for the next gzip member. (concatenated files)
else if (ret != Z_OK) { // Return on a data error.
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
delete[] in; delete[] out;
return 0;
}
} while (strm.avail_in > 0); // Continue until everything in the input buffer is consumed.
} // for() loop to get next input buffer CHUNK from input file
EndTime = GetTickCount64();
InflateTime = (EndTime - StartTime) / 1000.0; // Get how long it took to inflate file
delete[] in; delete[] out;
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
sendToReportWindow(L"Inflate Time: %.2f seconds. Done with fileDecompress function.\n", InflateTime);
return 0;
}
Does your compiler not at least warn you about the naked conditional ret == Z_STREAM_END;? You want an if there and some braces around the inflateReset() related statements.
There's still a problem in that you are leaving the outer loop if strm.avail_in is zero. That will happen every time, except when reaching the end of member. It can even happen then if you just so happen to exhaust the input buffer to decompress that member. Just make the outer loop a while (true).
Even after fixing all that, you would then discard the remaining available input when you do the read at the top of the outer loop. Only do that read if strm.avail_in is zero.
A simpler approach would be to do the reset in the inner loop. Like this (example in C):
// Decompress a gzip file input, potentially with multiple gzip members. Write
// the decompressed data to output. Return Z_STREAM_END on success. Return Z_OK
// if the gzip stream was correct up to where it ended prematurely. Return
// Z_DATA error if the gzip stream is invalid.
int inflate_gzip(FILE *input, FILE *output) {
// Initialize inflate for gzip input.
z_stream strm = {};
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
// Decompress from input to output.
unsigned char in[CHUNK];
for (;;) {
// Keep reading until the end of the input file or an error.
if (strm.avail_in == 0) {
strm.avail_in = fread(in, 1, CHUNK, input);
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
// Decompress all of what's in the input buffer.
do {
// Decompress as much as possible to the CHUNK output buffer.
unsigned char out[CHUNK];
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
// Write to the output file whatever inflate() left in the output
// buffer. Return with an error if the write does not complete.
size_t got = CHUNK - strm.avail_out;
size_t put = fwrite(out, 1, got, output);
if (put != got)
return Z_ERRNO;
// Check for the end of a gzip member, in which case reset inflate
// for the next gzip member.
if (ret == Z_STREAM_END)
assert(inflateReset(&strm) == Z_OK);
// Return on a data error.
else if (ret != Z_OK) {
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
return ret;
}
// Continue until everything in the input buffer is consumed.
} while (strm.avail_in > 0);
}
// Successfully decompressed all of the input file. Clean up and return.
assert(inflateEnd(&strm) == Z_OK);
return ret;
}

C++ memory leaks when read binary file

I wrote the program automatically sends the compressed files to the server.
Files are sent with a weak VPS that the disposal has 1.5 GB of RAM.
When the load binary files gets micro memory leaks, but at 30 000 .pdf refuses to continue allocating memory.
I did a little deception because it saves log and run the program again, cleaning RAM in this way, however, would like to know why. Mark that if I do not do 'ios: binary', such leaks are not.
My code :
std::ifstream ifs(url,ios::binary);
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
content = PFHelper::stream_compression(content);
content = PFHelper::ASE_encodeCppStandard(content,KS,PFHelper::AES_CBC_128);
PFHelper::stream_compression
std::string PFHelper::stream_compression(std::string stream)
{
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
if (deflateInit(&zs, Z_BEST_COMPRESSION) != Z_OK)
{
throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION");
}
zs.next_in = (Bytef*)stream.data();
zs.avail_in = stream.size(); // set the z_stream's input
int ret;
char outbuffer[32768];
std::string outstring;
// retrieve the compressed bytes blockwise
do {
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = deflate(&zs, Z_FINISH);
if (outstring.size() < zs.total_out) {
// append the block to the output string
outstring.append(outbuffer,
zs.total_out - outstring.size());
}
} while (ret == Z_OK);
deflateEnd(&zs);
if (ret != Z_STREAM_END) { throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION"); }
return outstring;
}
PFHelper::ASE_encodeCppStandard
std::string PFHelper::ASE_encodeCppStandard(std::string in, wchar_t* KS ,wchar_t* typ)
{
string ctext = "";
std::string KS_string = PFHelper::ConvertFromUtf8ToString(KS);
if (typ == PFHelper::AES_CBC_128)
ctext = encrypt(KS_string,in);
if (typ == PFHelper::AES_CBC_256)
ctext = encryptEX(KS_string, in);
return ctext;
}
static string encrypt(string KS, const string ptext)
{
EVP_CIPHER_CTX* ctx;
ctx = EVP_CIPHER_CTX_new();
int rc = EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, (byte*)&KS[0], (byte*)&KS[0]);
if (rc != 1)
throw runtime_error("EVP_EncryptInit_ex failed");
// Cipher text will be upto 16 bytes larger than plain text
std::string ctext;
ctext.resize(ptext.size()+16);
int out_len1 = (int)ctext.size();
rc = EVP_EncryptUpdate(ctx, (byte*)&ctext[0], &out_len1, (const byte*)&ptext[0], (int)ptext.size());
if (rc != 1)
throw runtime_error("EVP_EncryptUpdate failed");
int out_len2 = (int)ctext.size() - out_len1;
rc = EVP_EncryptFinal_ex(ctx, (byte*)&ctext[0] + out_len1, &out_len2);
if (rc != 1)
throw runtime_error("EVP_EncryptFinal_ex failed");
ctext.resize(out_len1 + out_len2);
return ctext;
}

zlib different decompresssion size

I am trying to use zlib for decompression. Im look at a tutorial at zlib site and inflate codes produce different size output.
int CZLib::Inflate() {
int ret;
unsigned int have;
z_stream zstream;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
zstream.avail_in = 0;
zstream.next_in = Z_NULL;
ret = inflateInit(&zstream);
if (ret != Z_OK)
return ret;
do {
zstream.avail_in = fread(in, 1, CHUNK, fin);
if (ferror(fin)) {
(void)inflateEnd(&zstream);
return Z_ERRNO;
}
if (zstream.avail_in == 0) break;
zstream.next_in = in;
do {
zstream.avail_out = CHUNK;
zstream.next_out = out;
ret = inflate(&zstream, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR;
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&zstream);
return ret;
}
have = CHUNK - zstream.avail_out;
if (fwrite(out, 1, have, fout) != have || ferror(fout)) {
(void)inflateEnd(&zstream);
return Z_ERRNO;
}
} while (zstream.avail_out == 0);
} while (ret != Z_STREAM_END);
(void)inflateEnd(&zstream);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}
and other
int CZLib::Inflate(const std::string& src) {
std::vector<char> output;
z_stream zstream;
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
zstream.avail_in = 0;
zstream.next_in = Z_NULL;
int ret = inflateInit(&zstream);
if (ret != Z_OK)
return ret;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
int have = 0, nByte = CHUNK, off = 0, remaining = src.size();
if (src.size() < CHUNK) nByte = src.size();
do {
memcpy(in, &src[off], nByte);
off += nByte;
remaining -= nByte;
if (nByte > 0) zstream.avail_in = nByte;
if (remaining > CHUNK) { nByte = CHUNK; }
else { nByte = remaining; }
if (zstream.avail_in == 0) break;
zstream.next_in = in;
do {
zstream.avail_out = CHUNK;
zstream.next_out = out;
ret = inflate(&zstream, Z_NO_FLUSH);
have = CHUNK - zstream.avail_out;
output.insert(output.end(), out, out + have);
} while (zstream.avail_out == 0);
} while (ret != Z_STREAM_END);
CFile* file = new CFile("in.out", "wb");
file->Write<char>(&output[0], output.size());
delete file;
return ret;
}
Its uses same data. One of them reads file on disk and other uses memory (buffer method). CHUNK size 16384. First code produce 524288(0x80000) and other 524800 (0x80200) byte. The difference are 512 bytes. Why is it happening ?
In the first code example you have this line
zstream.avail_in = fread(in, 1, CHUNK, fin);
and then you have
if (zstream.avail_in == 0) break;
to stop the loop.
In the second code example, you have the same line to stop the loop but you also have this line:
if (nByte > 0) zstream.avail_in = nByte;
^^^^^^^^^
So you only assign to zstream.avail_in when nByte > 0
....
....
if (zstream.avail_in == 0) break;
^^^^^^^^^^^^^^^^
Consequently this will not be true when nByte is zero and the
code will not exit
Try this instead:
zstream.avail_in = nByte; // Unconditional assignment
....
if (zstream.avail_in <= 0) break; // Less or equal to zero

C++ ZLib GZipStream Decompression NULL terminated

There are a lot of questions out there revolving around zlib and GZipStreams but none that I've found answer this question. I'm using a C# GZipStream to send compressed data to a client. It reads the compressed data in entirely then tries to decompress it. However, each time inflate() is called in the loop it only gets the NULL terminated string. When sending a binary this is a pretty huge problem.
Before I show you code, I just wanted to say that if I write the received compressed bytes to a .gz file and use gzFile/gzopen/gzread/gzclose everything works perfectly. That means all the data is coming in properly. I want to read in the compressed data, decompress it in memory, and have the contents in a variable.
I think the issue is that inflate() is writing to a char* which is NULL terminated. I just don't know how to get it to be a string. I do fully anticipate this being a major oversight and a simple fix. Thanks for any help!
Here's the decompression code:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
std::stringstream tmpString;
tmpString << outbuffer;
if (dMsg.size() < zs.total_out) {
dMsg.append(tmpString.str().substr(0, zs.total_out - dMsg.size()));
}
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
//"\n<EOF>" is appended by sender to signify the end of file. This removes it
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}
Working code from solution:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
// get the decompressed bytes blockwise using repeated calls to inflate
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
//Here's the difference
if (dMsg.size() < zs.total_out)
dMsg.append(reinterpret_cast<char*>(outbuffer), bufferSize);
//End
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}
string is not a problem in itself, it can handle binary data.
It is this line that assumes a zero-terminated c-string:
tmpString << outbuffer;
Replace it with
tmpString.append(outbuffer, bufferSize);