C++ memory leaks when read binary file - c++

I wrote the program automatically sends the compressed files to the server.
Files are sent with a weak VPS that the disposal has 1.5 GB of RAM.
When the load binary files gets micro memory leaks, but at 30 000 .pdf refuses to continue allocating memory.
I did a little deception because it saves log and run the program again, cleaning RAM in this way, however, would like to know why. Mark that if I do not do 'ios: binary', such leaks are not.
My code :
std::ifstream ifs(url,ios::binary);
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
content = PFHelper::stream_compression(content);
content = PFHelper::ASE_encodeCppStandard(content,KS,PFHelper::AES_CBC_128);
PFHelper::stream_compression
std::string PFHelper::stream_compression(std::string stream)
{
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
if (deflateInit(&zs, Z_BEST_COMPRESSION) != Z_OK)
{
throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION");
}
zs.next_in = (Bytef*)stream.data();
zs.avail_in = stream.size(); // set the z_stream's input
int ret;
char outbuffer[32768];
std::string outstring;
// retrieve the compressed bytes blockwise
do {
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = deflate(&zs, Z_FINISH);
if (outstring.size() < zs.total_out) {
// append the block to the output string
outstring.append(outbuffer,
zs.total_out - outstring.size());
}
} while (ret == Z_OK);
deflateEnd(&zs);
if (ret != Z_STREAM_END) { throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION"); }
return outstring;
}
PFHelper::ASE_encodeCppStandard
std::string PFHelper::ASE_encodeCppStandard(std::string in, wchar_t* KS ,wchar_t* typ)
{
string ctext = "";
std::string KS_string = PFHelper::ConvertFromUtf8ToString(KS);
if (typ == PFHelper::AES_CBC_128)
ctext = encrypt(KS_string,in);
if (typ == PFHelper::AES_CBC_256)
ctext = encryptEX(KS_string, in);
return ctext;
}
static string encrypt(string KS, const string ptext)
{
EVP_CIPHER_CTX* ctx;
ctx = EVP_CIPHER_CTX_new();
int rc = EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, (byte*)&KS[0], (byte*)&KS[0]);
if (rc != 1)
throw runtime_error("EVP_EncryptInit_ex failed");
// Cipher text will be upto 16 bytes larger than plain text
std::string ctext;
ctext.resize(ptext.size()+16);
int out_len1 = (int)ctext.size();
rc = EVP_EncryptUpdate(ctx, (byte*)&ctext[0], &out_len1, (const byte*)&ptext[0], (int)ptext.size());
if (rc != 1)
throw runtime_error("EVP_EncryptUpdate failed");
int out_len2 = (int)ctext.size() - out_len1;
rc = EVP_EncryptFinal_ex(ctx, (byte*)&ctext[0] + out_len1, &out_len2);
if (rc != 1)
throw runtime_error("EVP_EncryptFinal_ex failed");
ctext.resize(out_len1 + out_len2);
return ctext;
}

Related

How to cut videos/audios using FFmpeg C APIs

I'm trying to write a function to cut videos/audios FFmpeg C APIs in C++. I started with the remuxing.c example from FFmpeg GitHub repository, and tried to apply the same changes mentioned in this question, but I'm getting blank screen in the beginning of the output equal to the duration that I want to cut. This is the function I came with (Differences between the function and the remuxing example noted with // <- HERE):
int cut_video(const char *in_filename, const char *out_filename, double from_seconds, double end_seconds) {
const AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket *pkt = NULL;
int ret, i;
int stream_index = 0;
int *stream_mapping = NULL;
int stream_mapping_size = 0;
pkt = av_packet_alloc();
if (!pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
return 1;
}
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
av_dump_format(ifmt_ctx, 0, in_filename, 0);
ret = av_seek_frame(ifmt_ctx, -1, from_seconds * AV_TIME_BASE, AVSEEK_FLAG_ANY); // <- HERE
if (ret < 0) { // <- HERE
fprintf(stderr, "Error seek\n"); // <- HERE
goto end; // <- HERE
} // <- HERE
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
stream_mapping_size = ifmt_ctx->nb_streams;
stream_mapping = (int *)av_calloc(stream_mapping_size, sizeof(*stream_mapping));
if (!stream_mapping) {
ret = AVERROR(ENOMEM);
goto end;
}
ofmt = ofmt_ctx->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *out_stream;
AVStream *in_stream = ifmt_ctx->streams[i];
AVCodecParameters *in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
stream_mapping[i] = -1;
continue;
}
stream_mapping[i] = stream_index++;
out_stream = avformat_new_stream(ofmt_ctx, NULL);
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
if (ret < 0) {
fprintf(stderr, "Failed to copy codec parameters\n");
goto end;
}
out_stream->codecpar->codec_tag = 0;
}
av_dump_format(ofmt_ctx, 0, out_filename, 1);
if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
while (1) {
AVStream *in_stream, *out_stream;
ret = av_read_frame(ifmt_ctx, pkt);
if (ret < 0) break;
in_stream = ifmt_ctx->streams[pkt->stream_index];
if (pkt->stream_index >= stream_mapping_size || stream_mapping[pkt->stream_index] < 0 ||
av_q2d(in_stream->time_base) * pkt->pts > end_seconds) { // <- HERE
av_packet_unref(pkt);
continue;
}
pkt->stream_index = stream_mapping[pkt->stream_index];
out_stream = ofmt_ctx->streams[pkt->stream_index];
log_packet(ifmt_ctx, pkt, "in");
/* copy packet */
av_packet_rescale_ts(pkt, in_stream->time_base, out_stream->time_base);
pkt->pos = -1;
log_packet(ofmt_ctx, pkt, "out");
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
fprintf(stderr, "Error muxing packet\n");
break;
}
}
av_write_trailer(ofmt_ctx);
end:
av_packet_free(&pkt);
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE)) avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
av_freep(&stream_mapping);
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
return 1;
}
return 0;
}
And here is how I call it:
cut_video("/Users/aliosm/Desktop/1.mp4", "/Users/aliosm/Desktop/2.mp4", 10, 40);
I searched a lot on Google and I didn't find anything useful related to this specific use-case, do you have any idea?
Finally, I was able to do that by the help from #ffmpeg channel community on Libera.Chat IRC. The final code:
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/timestamp.h>
}
/**
* #brief Print the information of the passed packet.
*
* #fn logPacket
* #param avFormatContext AVFormatContext of the given packet.
* #param avPacket AVPacket to log.
* #param tag String to tag the log output.
*/
void logPacket(const AVFormatContext *avFormatContext, const AVPacket *avPacket, const QString tag) {
AVRational *timeBase = &avFormatContext->streams[avPacket->stream_index]->time_base;
qDebug() << QString("%1: pts:%2 pts_time:%3 dts:%4 dts_time:%5 duration:%6 duration_time:%7 stream_index:%8")
.arg(tag)
.arg(av_ts2str(avPacket->pts))
.arg(av_ts2timestr(avPacket->pts, timeBase))
.arg(av_ts2str(avPacket->dts))
.arg(av_ts2timestr(avPacket->dts, timeBase))
.arg(av_ts2str(avPacket->duration))
.arg(av_ts2timestr(avPacket->duration, timeBase))
.arg(avPacket->stream_index);
}
/**
* #brief Cut a file in the given input file path based on the start and end seconds, and output the cutted file to the
* given output file path.
*
* #fn cutFile
* #param inputFilePath Input file path to be cutted.
* #param startSeconds Cutting start time in seconds.
* #param endSeconds Cutting end time in seconds.
* #param outputFilePath Output file path to write the new cutted file.
*
* #details This function will take an input file path and cut it based on the given start and end seconds. The cutted
* file will then be outputted to the given output file path.
*
* #return True if the cutting operation finished successfully, false otherwise.
*/
bool cutFile(const QString& inputFilePath, const long long& startSeconds, const long long& endSeconds,
const QString& outputFilePath) {
int operationResult;
AVPacket* avPacket = NULL;
AVFormatContext* avInputFormatContext = NULL;
AVFormatContext* avOutputFormatContext = NULL;
avPacket = av_packet_alloc();
if (!avPacket) {
qCritical("Failed to allocate AVPacket.");
return false;
}
try {
operationResult = avformat_open_input(&avInputFormatContext, inputFilePath.toStdString().c_str(), 0, 0);
if (operationResult < 0) {
throw std::runtime_error(QString("Failed to open the input file '%1'.").arg(inputFilePath).toStdString().c_str());
}
operationResult = avformat_find_stream_info(avInputFormatContext, 0);
if (operationResult < 0) {
throw std::runtime_error(QString("Failed to retrieve the input stream information.").toStdString().c_str());
}
avformat_alloc_output_context2(&avOutputFormatContext, NULL, NULL, outputFilePath.toStdString().c_str());
if (!avOutputFormatContext) {
operationResult = AVERROR_UNKNOWN;
throw std::runtime_error(QString("Failed to create the output context.").toStdString().c_str());
}
int streamIndex = 0;
int streamMapping[avInputFormatContext->nb_streams];
int streamRescaledStartSeconds[avInputFormatContext->nb_streams];
int streamRescaledEndSeconds[avInputFormatContext->nb_streams];
// Copy streams from the input file to the output file.
for (int i = 0; i < avInputFormatContext->nb_streams; i++) {
AVStream* outStream;
AVStream* inStream = avInputFormatContext->streams[i];
streamRescaledStartSeconds[i] = av_rescale_q(startSeconds * AV_TIME_BASE, AV_TIME_BASE_Q, inStream->time_base);
streamRescaledEndSeconds[i] = av_rescale_q(endSeconds * AV_TIME_BASE, AV_TIME_BASE_Q, inStream->time_base);
if (inStream->codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
inStream->codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&
inStream->codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
streamMapping[i] = -1;
continue;
}
streamMapping[i] = streamIndex++;
outStream = avformat_new_stream(avOutputFormatContext, NULL);
if (!outStream) {
operationResult = AVERROR_UNKNOWN;
throw std::runtime_error(QString("Failed to allocate the output stream.").toStdString().c_str());
}
operationResult = avcodec_parameters_copy(outStream->codecpar, inStream->codecpar);
if (operationResult < 0) {
throw std::runtime_error(
QString("Failed to copy codec parameters from input stream to output stream.").toStdString().c_str());
}
outStream->codecpar->codec_tag = 0;
}
if (!(avOutputFormatContext->oformat->flags & AVFMT_NOFILE)) {
operationResult = avio_open(&avOutputFormatContext->pb, outputFilePath.toStdString().c_str(), AVIO_FLAG_WRITE);
if (operationResult < 0) {
throw std::runtime_error(
QString("Failed to open the output file '%1'.").arg(outputFilePath).toStdString().c_str());
}
}
operationResult = avformat_write_header(avOutputFormatContext, NULL);
if (operationResult < 0) {
throw std::runtime_error(QString("Error occurred when opening output file.").toStdString().c_str());
}
operationResult = avformat_seek_file(avInputFormatContext, -1, INT64_MIN, startSeconds * AV_TIME_BASE,
startSeconds * AV_TIME_BASE, 0);
if (operationResult < 0) {
throw std::runtime_error(
QString("Failed to seek the input file to the targeted start position.").toStdString().c_str());
}
while (true) {
operationResult = av_read_frame(avInputFormatContext, avPacket);
if (operationResult < 0) break;
// Skip packets from unknown streams and packets after the end cut position.
if (avPacket->stream_index >= avInputFormatContext->nb_streams || streamMapping[avPacket->stream_index] < 0 ||
avPacket->pts > streamRescaledEndSeconds[avPacket->stream_index]) {
av_packet_unref(avPacket);
continue;
}
avPacket->stream_index = streamMapping[avPacket->stream_index];
logPacket(avInputFormatContext, avPacket, "in");
// Shift the packet to its new position by subtracting the rescaled start seconds.
avPacket->pts -= streamRescaledStartSeconds[avPacket->stream_index];
avPacket->dts -= streamRescaledStartSeconds[avPacket->stream_index];
av_packet_rescale_ts(avPacket, avInputFormatContext->streams[avPacket->stream_index]->time_base,
avOutputFormatContext->streams[avPacket->stream_index]->time_base);
avPacket->pos = -1;
logPacket(avOutputFormatContext, avPacket, "out");
operationResult = av_interleaved_write_frame(avOutputFormatContext, avPacket);
if (operationResult < 0) {
throw std::runtime_error(QString("Failed to mux the packet.").toStdString().c_str());
}
}
av_write_trailer(avOutputFormatContext);
} catch (std::runtime_error e) {
qCritical("%s", e.what());
}
av_packet_free(&avPacket);
avformat_close_input(&avInputFormatContext);
if (avOutputFormatContext && !(avOutputFormatContext->oformat->flags & AVFMT_NOFILE))
avio_closep(&avOutputFormatContext->pb);
avformat_free_context(avOutputFormatContext);
if (operationResult < 0 && operationResult != AVERROR_EOF) {
qCritical("%s", QString("Error occurred: %1.").arg(av_err2str(operationResult)).toStdString().c_str());
return false;
}
return true;
}
The code is written in C++, and it is using some Qt related classes, you can remove them and use the code on plain C++ projects.
I tried my best to make it readable, I hope it is good and helpful.
Update 1: I updated the code to fix a bug in it.
Update 2: I updated the code to do some refactoring.

How do I use Zlib with concatenated .gz files in winAPI?

I am downloading common crawl files from AWS. Apparently, they are large concatenated .gz files, which is supported by the gzip standard. I am using zlib to deflate but I only get the decompressed contents of the file up to the first concatenation. I have tried adding inflateReset() but then I get error -5, which indicates a buffer or file problem. I suspect I am close.
here's the code without inflateReset. It works fine on non-concatenated files.
#include "zlib.h"
#define CHUNK 16384
...
file = L"CC-MAIN-20181209185547-20181209211547-00040.warc.wet.gz";
fileDecompress(&file);
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring destPath = dir + lpFileName->c_str() + L".wet";
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
// Open input file for reading, existing file only.
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str());
return 0;
}
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str());
return 0;
}
// Get compressed file size.
Success = GetFileSizeEx(InputFile, &FileSize);
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
}
have = CHUNK - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, out, have, &written, NULL);
if (written != have || !res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"file write error:%d\n", res);
return 0;
}
} while (strm.avail_out == 0); // avail_out == 0 means output buffer is full
} while (ret != Z_STREAM_END); /* done when inflate() says it's done */ // Z_STREAM_END is 1
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
return 0;
}
Here's the version with the inflateReset() added. this version causes inflate to generate error -5 (bad buffer or truncated file).
...
int ret;
z_stream strm{};
array<uint8_t, CHUNK> scratch = {}; //scratch buffer for decompressing the data.
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
sendToReportWindow(L"strm.avail_in:%d\n", strm.avail_in); // strm.avail_in = 0
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = scratch.size();
strm.next_out = scratch.data();
ret = inflate(&strm, Z_NO_FLUSH);
//if (ret != Z_OK) break; // 0
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_STREAM_ERROR: // -2
sendToReportWindow(L"Z_STREAM_ERROR:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
//return 0;
break;
}
auto bytes_decoded = scratch.size() - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, &scratch, bytes_decoded, &written, NULL);
if (ret == Z_STREAM_END) break;
} while (true); // avail_out == 0 means output buffer is full
ret == Z_STREAM_END;
auto reset_result = inflateReset(&strm); // work with concatenation
sendToReportWindow(L"resetting inflate: %d\n", reset_result);
assert(reset_result == Z_OK);
} while (strm.avail_in > 0);
...
Thank you!
update: I think readFile should read in CHUNK instead of 1. changed for both examples. This now gives me error -3: "Z_DATA_ERROR". checking to see if this change is now actually hitting readfile too many times.
typical file I want to deflate: [https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-51/segments/1544376823009.19/wet/CC-MAIN-20181209185547-20181209211547-00041.warc.wet.gz]
update 2:
Thank you Mark Adler! using the example you provided, I was able to fix the logic in my code. this satisfies the winAPI requirement. I also added file ext handling, moved things to heap and added a timer. The timer revealed that more memory helped reduce deflate time by 30%.
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
// zlib does not work with .zip files
sendToReportWindow(L"inside fileDecompress()\n");
// deflate .gz (gzip) files. single or multiple member (concatenated)
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring::size_type lastdot = lpFileName->find_last_of(L"."); // remove .gz extension: get length to last dot and truncate
lpFileName->resize(lastdot);
wstring destPath = dir + lpFileName->c_str();
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
double InflateTime;
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str()); return 0; }
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str()); return 0; }
Success = GetFileSizeEx(InputFile, &FileSize); // Get compressed file size.
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
StartTime = GetTickCount64();
#define CHUNK 524288 // buffer size. doesn't use much ram and speeds up inflate
z_stream strm = {}; // Initialize zlib for file compression/decompression
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
unsigned char *in = new unsigned char[CHUNK]; unsigned char* out = new unsigned char[CHUNK];
for (;;) { // Decompress from input to output.
if (strm.avail_in == 0) { // Keep reading until the end of the input file or an error
DWORD read;
(void)ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
do { // Decompress all of what's in the CHUNK in buffer.
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH); // Decompress as much as possible to the CHUNK out buffer.
size_t got = CHUNK - strm.avail_out;
DWORD written;
(void)WriteFile(OutputFile, out, got, &written, NULL); // Write to the outputFile whatever inflate() left in out buffer
if (written != got) {sendToReportWindow(L"file write error\n"); delete[] in; delete[] out; return 0;}
if (ret == Z_STREAM_END) // Check for the end of a gzip member, in which case,
assert(inflateReset(&strm) == Z_OK); // reset inflate for the next gzip member. (concatenated files)
else if (ret != Z_OK) { // Return on a data error.
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
delete[] in; delete[] out;
return 0;
}
} while (strm.avail_in > 0); // Continue until everything in the input buffer is consumed.
} // for() loop to get next input buffer CHUNK from input file
EndTime = GetTickCount64();
InflateTime = (EndTime - StartTime) / 1000.0; // Get how long it took to inflate file
delete[] in; delete[] out;
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
sendToReportWindow(L"Inflate Time: %.2f seconds. Done with fileDecompress function.\n", InflateTime);
return 0;
}
Does your compiler not at least warn you about the naked conditional ret == Z_STREAM_END;? You want an if there and some braces around the inflateReset() related statements.
There's still a problem in that you are leaving the outer loop if strm.avail_in is zero. That will happen every time, except when reaching the end of member. It can even happen then if you just so happen to exhaust the input buffer to decompress that member. Just make the outer loop a while (true).
Even after fixing all that, you would then discard the remaining available input when you do the read at the top of the outer loop. Only do that read if strm.avail_in is zero.
A simpler approach would be to do the reset in the inner loop. Like this (example in C):
// Decompress a gzip file input, potentially with multiple gzip members. Write
// the decompressed data to output. Return Z_STREAM_END on success. Return Z_OK
// if the gzip stream was correct up to where it ended prematurely. Return
// Z_DATA error if the gzip stream is invalid.
int inflate_gzip(FILE *input, FILE *output) {
// Initialize inflate for gzip input.
z_stream strm = {};
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
// Decompress from input to output.
unsigned char in[CHUNK];
for (;;) {
// Keep reading until the end of the input file or an error.
if (strm.avail_in == 0) {
strm.avail_in = fread(in, 1, CHUNK, input);
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
// Decompress all of what's in the input buffer.
do {
// Decompress as much as possible to the CHUNK output buffer.
unsigned char out[CHUNK];
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
// Write to the output file whatever inflate() left in the output
// buffer. Return with an error if the write does not complete.
size_t got = CHUNK - strm.avail_out;
size_t put = fwrite(out, 1, got, output);
if (put != got)
return Z_ERRNO;
// Check for the end of a gzip member, in which case reset inflate
// for the next gzip member.
if (ret == Z_STREAM_END)
assert(inflateReset(&strm) == Z_OK);
// Return on a data error.
else if (ret != Z_OK) {
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
return ret;
}
// Continue until everything in the input buffer is consumed.
} while (strm.avail_in > 0);
}
// Successfully decompressed all of the input file. Clean up and return.
assert(inflateEnd(&strm) == Z_OK);
return ret;
}

RSA_Verify fails returns Bad signature error

I can't get the message verified after signing it.
I'm writing a project to simulate signing and verifying data from files with OpenSSL.
the signing process seems to work great, but on the verification process RSA_Verify always return 0. which is not verified.
I can't get it. what am I missing here?
#include <string>
#include <fstream>
#include <openssl/rsa.h>
#include <openssl/pem.h>
#include <openssl/applink.c>
#include <openssl/err.h>
#define PRIKEY_FILENAME "private"
#define PUBKEY_FILENAME "public"
using namespace std;
typedef struct _INFO
{
unsigned char *sig;
unsigned int nLen;
}INFO, *pINFO;
bool ReadFileContent(string fileName, char** out, size_t &nLen)
{
if (NULL == out)
return false;
ifstream file(fileName, ios::in | ios::binary | ios::ate);
if (file.is_open())
{
nLen = (int)file.tellg();
*out = new char[nLen];
file.seekg(0, ios::beg);
file.read(*out, nLen);
file.close();
}
else
{
cout << "Unable to open file \"" << fileName << " \"\n";
return false;
}
return true;
}
bool WriteFileContent(const char* data, int nLen, string fileName)
{
if (NULL == data)
return false;
ofstream file(fileName, ios::out | ios::binary | ios::ate);
if (file.is_open())
{
file.write(data, nLen);
file.close();
}
else
{
cout << "Unable to open file \"" << fileName << " \"\n";
return false;
}
return true;
}
bool GenerateKeyPairs()
{
int ret = 0;
RSA *r = NULL;
BIGNUM *bne = NULL;
BIO *bp_public = NULL, *bp_private = NULL;
int bits = 2048;
unsigned long e = RSA_F4;
// 1. generate rsa key
bne = BN_new();
ret = BN_set_word(bne, e);
if (ret != 1)
{
goto free_all;
}
r = RSA_new();
ret = RSA_generate_key_ex(r, bits, bne, NULL);
if (ret != 1)
{
goto free_all;
}
// 2. save public key
bp_public = BIO_new_file(PUBKEY_FILENAME, "w+");
ret = PEM_write_bio_RSAPublicKey(bp_public, r);
if (ret != 1)
{
goto free_all;
}
// 3. save private key
bp_private = BIO_new_file(PRIKEY_FILENAME, "w+");
ret = PEM_write_bio_RSAPrivateKey(bp_private, r, NULL, NULL, 0, NULL, NULL);
// 4. free
free_all:
BIO_free_all(bp_public);
BIO_free_all(bp_private);
RSA_free(r);
BN_free(bne);
return (ret == 1);
}
bool DoSign(string priKeyFile, pINFO pInfo, string fileName)
{
int ret;
unsigned char* data = NULL;
unsigned char* encodedData = NULL;
size_t nFileSize = 0;
unsigned int nEncodedDataLen = 0;
RSA* priKey = NULL;
FILE* fp = NULL;
if (!ReadFileContent(fileName, (char**)&data, nFileSize))
{
return false;
}
if (data == NULL || nFileSize <= 0)
return false;
//SHA512(data, nFileSize, pInfo->sig);
fp = fopen(PRIKEY_FILENAME, "r");
priKey = PEM_read_RSAPrivateKey(fp, &priKey, NULL, NULL);
pInfo->sig = (unsigned char*)malloc(RSA_size(priKey));
/* Sign */
ret = RSA_sign(NID_sha512, data, nFileSize, pInfo->sig, &pInfo->nLen, priKey);
WriteFileContent((char*)pInfo->sig, pInfo->nLen, fileName + ".sign");
return true;
}
bool DoVerify(string pubKeyFile, pINFO pInfo, string fileName)
{
int ret = 0;
unsigned char* data = NULL;
unsigned char* encodedData = NULL;
size_t nFileSize = 0;
FILE* fp = NULL;
RSA* pubkey = NULL;
unsigned int nEncodedDataLen = 0;
if (!ReadFileContent(fileName, (char**)&data, nFileSize))
{
return false;
}
if (data == NULL || nFileSize <= 0)
return false;
fp = fopen(PUBKEY_FILENAME, "r");
pubkey = PEM_read_RSAPublicKey(fp, NULL, NULL, NULL);
ret = RSA_verify(NID_sha512, data, nFileSize, pInfo->sig, pInfo->nLen, pubkey);
char buf[512];
ERR_error_string(ERR_get_error(), buf);
//Error here says bad signature
return true;
}
int main()
{
INFO info = { 0 };
GenerateKeyPairs();
DoSign(PRIKEY_FILENAME, &info, "Hello.txt");
DoVerify(PUBKEY_FILENAME, &info, "Hello.txt.sign");
return 0;
}
It should return 1 in RSA_Sign.
DoVerify(PUBKEY_FILENAME, &info, "Hello.txt.sign");
This should be:
DoVerify(PUBKEY_FILENAME, &info, "Hello.txt");
The signature itself is being passed through the &info parameter. The last parameter is supposed to be the actual thing signed, not the signature.

C++ ZLib GZipStream Decompression NULL terminated

There are a lot of questions out there revolving around zlib and GZipStreams but none that I've found answer this question. I'm using a C# GZipStream to send compressed data to a client. It reads the compressed data in entirely then tries to decompress it. However, each time inflate() is called in the loop it only gets the NULL terminated string. When sending a binary this is a pretty huge problem.
Before I show you code, I just wanted to say that if I write the received compressed bytes to a .gz file and use gzFile/gzopen/gzread/gzclose everything works perfectly. That means all the data is coming in properly. I want to read in the compressed data, decompress it in memory, and have the contents in a variable.
I think the issue is that inflate() is writing to a char* which is NULL terminated. I just don't know how to get it to be a string. I do fully anticipate this being a major oversight and a simple fix. Thanks for any help!
Here's the decompression code:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
std::stringstream tmpString;
tmpString << outbuffer;
if (dMsg.size() < zs.total_out) {
dMsg.append(tmpString.str().substr(0, zs.total_out - dMsg.size()));
}
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
//"\n<EOF>" is appended by sender to signify the end of file. This removes it
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}
Working code from solution:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
// get the decompressed bytes blockwise using repeated calls to inflate
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
//Here's the difference
if (dMsg.size() < zs.total_out)
dMsg.append(reinterpret_cast<char*>(outbuffer), bufferSize);
//End
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}
string is not a problem in itself, it can handle binary data.
It is this line that assumes a zero-terminated c-string:
tmpString << outbuffer;
Replace it with
tmpString.append(outbuffer, bufferSize);

Send binary file over TCP/IP connection

I will rephrase the whole question here so that it is answerable.
I am able to copy binary file perfectly in the same machine not using sockets but just making a simple copy function. Trying to implement this code for copying onto a TCP/IP connection but can't get it to work.
FILE *filehandle = fopen("imagefile.jpg", "rb");
FILE *dest =fopen("imagecopy.jpg", "wb"); // copied image file
fseek(filehandle, 0, SEEK_END);
unsigned long filesize = ftell(filehandle);
char *buffer = (char*)malloc(sizeof(char)*filesize);
rewind(filehandle);
int bytesread = fread(buffer, sizeof(char), filesize, filehandle);
for( int i=0; i<filesize; i++ )
{
fputc(buffer[i], filehandle); // copies all the contents to dest
}
The code above works perfectly for copying an image file in the computer but when implemented to copy on server, it is difficult to go about it.
I am trying to send an image file from a server to a client both which have been made manually in C. The length of the file to be sent by the server is only known to the server when it's sending the file so the buffer is dynamically generated in the server, something like this:
SERVER
fseek(filehandle, 0, SEEK_END);
long filesize = ftell(filehandle); // file could be 11000bytes
char *buffer = (char*)malloc(sizeof(char)*filesize); // char buffer with 11000 bytes to store the data from the file.
// then I call the send() function
rewind(filehandle); // go back to beginning
send(clientsocket, buffer, filesize, 0); // this is being sent perfectly, no errors because in the actual code, I am checking for errors
CLIENT
// here is where I don't understand how to dynamically allocate the 11000 bytes to store the data in a client buffer
// the filesize is not necessarily going to be 11000 so need to dynamically allocate
// I did the following:
#define BUFSIZE 10
FILE *filehandle = fopen("imagefile.jpg", "wb"); // image file created by client
char *buffer = (char*)malloc(sizeof(char)*BUFSIZE);
int bytesread = recv(buffer, 1, strlen(buffer), 0);
if( bytesread > 0 )
{
printf("Bytes read: %d\n", bytesread); // bytes read is 5
printf("Buffer: %s\n", buffer); // but buffer shows all the binary text like it normally would
// when I try to store buffer in a file, it doesn't put full buffer because only 5 characters are written
for( int i=0; i<bytesread; i++ )
{
fputc(buffer[i], filehandle); // this doesn't create full image
}
}
How can I dynamically allocate the 11000 bytes sent by the server?
You need to loop both the sending and receiving. Neither send() nor recv() are guaranteed to send/read as many bytes as you requested.
You also should send the file size before the file data so the receiver knows how many bytes to expect and when to stop reading.
Try something more like this:
SERVER
bool senddata(SOCKET sock, void *buf, int buflen)
{
unsigned char *pbuf = (unsigned char *) buf;
while (buflen > 0)
{
int num = send(sock, pbuf, buflen, 0);
if (num == SOCKET_ERROR)
{
if (WSAGetLastError() == WSAEWOULDBLOCK)
{
// optional: use select() to check for timeout to fail the send
continue;
}
return false;
}
pbuf += num;
buflen -= num;
}
return true;
}
bool sendlong(SOCKET sock, long value)
{
value = htonl(value);
return senddata(sock, &value, sizeof(value));
}
bool sendfile(SOCKET sock, FILE *f)
{
fseek(f, 0, SEEK_END);
long filesize = ftell(f);
rewind(f);
if (filesize == EOF)
return false;
if (!sendlong(sock, filesize))
return false;
if (filesize > 0)
{
char buffer[1024];
do
{
size_t num = min(filesize, sizeof(buffer));
num = fread(buffer, 1, num, f);
if (num < 1)
return false;
if (!senddata(sock, buffer, num, 0))
return false;
filesize -= num;
}
while (filesize > 0);
}
return true;
}
FILE *filehandle = fopen("imagefile.jpg", "rb");
if (filehandle != NULL)
{
sendfile(clientsocket, filehandle);
fclose(filehandle);
}
CLIENT
bool readdata(SOCKET sock, void *buf, int buflen)
{
unsigned char *pbuf = (unsigned char *) buf;
while (buflen > 0)
{
int num = recv(sock, pbuf, buflen, 0);
if (num == SOCKET_ERROR)
{
if (WSAGetLastError() == WSAEWOULDBLOCK)
{
// optional: use select() to check for timeout to fail the read
continue;
}
return false;
}
else if (num == 0)
return false;
pbuf += num;
buflen -= num;
}
return true;
}
bool readlong(SOCKET sock, long *value)
{
if (!readdata(sock, value, sizeof(value)))
return false;
*value = ntohl(*value);
return true;
}
bool readfile(SOCKET sock, FILE *f)
{
long filesize;
if (!readlong(sock, &filesize))
return false;
if (filesize > 0)
{
char buffer[1024];
do
{
int num = min(filesize, sizeof(buffer));
if (!readdata(sock, buffer, num))
return false;
int offset = 0;
do
{
size_t written = fwrite(&buffer[offset], 1, num-offset, f);
if (written < 1)
return false;
offset += written;
}
while (offset < num);
filesize -= num;
}
while (filesize > 0);
}
return true;
}
FILE *filehandle = fopen("imagefile.jpg", "wb");
if (filehandle != NULL)
{
bool ok = readfile(clientsocket, filehandle);
fclose(filehandle);
if (ok)
{
// use file as needed...
}
else
remove("imagefile.jpg");
}
We could avoid the header that contains the image size, but we just read to the end of the sent data. About the buffer size, we could use a fixed number such as 10 * 1024, when we received some data from the server, we just save it into a file according to the actual received data length.
// please open a file ...
FILE * fp;
// ...
const int LENGTH = 10 * 1024;
int len = 0;
char * buffer = (char *)malloc(LENGTH);
while ((len = recv(socket, buffer, LENGTH, 0)) > 0) {
fwrite(buffer, 1, len, fp);
}
free(buffer);
// close the file
#T.C: I guess we cannot allocate a buffer according to the size sent from the server in case the image is too large to save inside the client's memory. Not mention the server is fake, and intended to make any attack.