How do I use Zlib with concatenated .gz files in winAPI?

How do I use Zlib with concatenated .gz files in winAPI? - c++

I am downloading common crawl files from AWS. Apparently, they are large concatenated .gz files, which is supported by the gzip standard. I am using zlib to deflate but I only get the decompressed contents of the file up to the first concatenation. I have tried adding inflateReset() but then I get error -5, which indicates a buffer or file problem. I suspect I am close.
here's the code without inflateReset. It works fine on non-concatenated files.
#include "zlib.h"
#define CHUNK 16384
...
file = L"CC-MAIN-20181209185547-20181209211547-00040.warc.wet.gz";
fileDecompress(&file);
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring destPath = dir + lpFileName->c_str() + L".wet";
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
// Open input file for reading, existing file only.
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str());
return 0;
}
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE)
{
sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str());
return 0;
}
// Get compressed file size.
Success = GetFileSizeEx(InputFile, &FileSize);
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
}
have = CHUNK - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, out, have, &written, NULL);
if (written != have || !res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"file write error:%d\n", res);
return 0;
}
} while (strm.avail_out == 0); // avail_out == 0 means output buffer is full
} while (ret != Z_STREAM_END); /* done when inflate() says it's done */ // Z_STREAM_END is 1
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
return 0;
}
Here's the version with the inflateReset() added. this version causes inflate to generate error -5 (bad buffer or truncated file).
...
int ret;
z_stream strm{};
array<uint8_t, CHUNK> scratch = {}; //scratch buffer for decompressing the data.
strm.zalloc = Z_NULL; // allocate inflate state
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, 16 + MAX_WBITS);
if (ret != Z_OK)
{
return 0;
}
do { /* decompress until deflate stream ends or end of file */
DWORD read;
BOOL res = ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (!res) {
(void)inflateEnd(&strm);
sendToReportWindow(L"read error on input file\n");
return 0;
}
if (strm.avail_in == 0)
{
sendToReportWindow(L"strm.avail_in:%d\n", strm.avail_in); // strm.avail_in = 0
break;
}
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = scratch.size();
strm.next_out = scratch.data();
ret = inflate(&strm, Z_NO_FLUSH);
//if (ret != Z_OK) break; // 0
switch (ret) {
case Z_NEED_DICT: // 2
sendToReportWindow(L"z_need_dict:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
//ret = Z_DATA_ERROR; /* and fall through */
case Z_STREAM_ERROR: // -2
sendToReportWindow(L"Z_STREAM_ERROR:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_DATA_ERROR: // -3
sendToReportWindow(L"z_data_error:%d\n", ret);
(void)inflateEnd(&strm);
return 0;
case Z_MEM_ERROR: // -4
(void)inflateEnd(&strm);
sendToReportWindow(L"z_mem_error:%d\n", ret);
sendToReportWindow(L"ret:%d\n", ret);
DisplayErrorBox((LPWSTR)L"inflate");
return 0;
case Z_BUF_ERROR: // -5
sendToReportWindow(L"z_buf_error:%d\n", ret);
(void)inflateEnd(&strm);
//return 0;
break;
}
auto bytes_decoded = scratch.size() - strm.avail_out;
DWORD written;
BOOL res = WriteFile(OutputFile, &scratch, bytes_decoded, &written, NULL);
if (ret == Z_STREAM_END) break;
} while (true); // avail_out == 0 means output buffer is full
ret == Z_STREAM_END;
auto reset_result = inflateReset(&strm); // work with concatenation
sendToReportWindow(L"resetting inflate: %d\n", reset_result);
assert(reset_result == Z_OK);
} while (strm.avail_in > 0);
...
Thank you!
update: I think readFile should read in CHUNK instead of 1. changed for both examples. This now gives me error -3: "Z_DATA_ERROR". checking to see if this change is now actually hitting readfile too many times.
typical file I want to deflate: [https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-51/segments/1544376823009.19/wet/CC-MAIN-20181209185547-20181209211547-00041.warc.wet.gz]
update 2:
Thank you Mark Adler! using the example you provided, I was able to fix the logic in my code. this satisfies the winAPI requirement. I also added file ext handling, moved things to heap and added a timer. The timer revealed that more memory helped reduce deflate time by 30%.
DWORD WINAPI fileDecompress(LPVOID lpParameter)
{
// zlib does not work with .zip files
sendToReportWindow(L"inside fileDecompress()\n");
// deflate .gz (gzip) files. single or multiple member (concatenated)
wstring dir = L"C:\\AI\\corpora\\";
wstring* lpFileName = static_cast<wstring*>(lpParameter);
sendToReportWindow(L"File to decompress is \"%s\" in \"%s\"\n", lpFileName->c_str(), dir.c_str());
wstring sourcePath = dir + lpFileName->c_str();
sendToReportWindow(L"input file with path:%s\n", sourcePath.c_str());
wstring::size_type lastdot = lpFileName->find_last_of(L"."); // remove .gz extension: get length to last dot and truncate
lpFileName->resize(lastdot);
wstring destPath = dir + lpFileName->c_str();
sendToReportWindow(L"output file with path:%s\n", destPath.c_str());
HANDLE InputFile = INVALID_HANDLE_VALUE;
HANDLE OutputFile = INVALID_HANDLE_VALUE;
BOOL Success;
DWORD InputFileSize;
ULONGLONG StartTime, EndTime;
LARGE_INTEGER FileSize;
double InflateTime;
InputFile = CreateFile(
sourcePath.c_str(), // Input file name, compressed file
GENERIC_READ, // Open for reading
FILE_SHARE_READ, // Share for read
NULL, // Default security
OPEN_EXISTING, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (InputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open input \t%s\n", sourcePath.c_str()); return 0; }
OutputFile = CreateFile(
destPath.c_str(), // Input file name, compressed file
GENERIC_WRITE, // Open for reading
0, // Share for read
NULL, // Default security
CREATE_ALWAYS, // Existing file only
FILE_ATTRIBUTE_NORMAL, // Normal file
NULL); // No template
if (OutputFile == INVALID_HANDLE_VALUE){sendToReportWindow(L"Cannot open output \t%s\n", destPath.c_str()); return 0; }
Success = GetFileSizeEx(InputFile, &FileSize); // Get compressed file size.
if ((!Success) || (FileSize.QuadPart > 0xFFFFFFFF))
{
sendToReportWindow(L"Cannot get input file size or file is larger than 4GB.\n");
CloseHandle(InputFile);
return 0;
}
InputFileSize = FileSize.LowPart;
sendToReportWindow(L"input file size: %u bytes\n", InputFileSize);
StartTime = GetTickCount64();
#define CHUNK 524288 // buffer size. doesn't use much ram and speeds up inflate
z_stream strm = {}; // Initialize zlib for file compression/decompression
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
unsigned char *in = new unsigned char[CHUNK]; unsigned char* out = new unsigned char[CHUNK];
for (;;) { // Decompress from input to output.
if (strm.avail_in == 0) { // Keep reading until the end of the input file or an error
DWORD read;
(void)ReadFile(InputFile, in, CHUNK, &read, NULL);
strm.avail_in = read;
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
do { // Decompress all of what's in the CHUNK in buffer.
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH); // Decompress as much as possible to the CHUNK out buffer.
size_t got = CHUNK - strm.avail_out;
DWORD written;
(void)WriteFile(OutputFile, out, got, &written, NULL); // Write to the outputFile whatever inflate() left in out buffer
if (written != got) {sendToReportWindow(L"file write error\n"); delete[] in; delete[] out; return 0;}
if (ret == Z_STREAM_END) // Check for the end of a gzip member, in which case,
assert(inflateReset(&strm) == Z_OK); // reset inflate for the next gzip member. (concatenated files)
else if (ret != Z_OK) { // Return on a data error.
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
delete[] in; delete[] out;
return 0;
}
} while (strm.avail_in > 0); // Continue until everything in the input buffer is consumed.
} // for() loop to get next input buffer CHUNK from input file
EndTime = GetTickCount64();
InflateTime = (EndTime - StartTime) / 1000.0; // Get how long it took to inflate file
delete[] in; delete[] out;
(void)inflateEnd(&strm);
CloseHandle(InputFile); CloseHandle(OutputFile);
sendToReportWindow(L"Inflate Time: %.2f seconds. Done with fileDecompress function.\n", InflateTime);
return 0;
}

Does your compiler not at least warn you about the naked conditional ret == Z_STREAM_END;? You want an if there and some braces around the inflateReset() related statements.
There's still a problem in that you are leaving the outer loop if strm.avail_in is zero. That will happen every time, except when reaching the end of member. It can even happen then if you just so happen to exhaust the input buffer to decompress that member. Just make the outer loop a while (true).
Even after fixing all that, you would then discard the remaining available input when you do the read at the top of the outer loop. Only do that read if strm.avail_in is zero.
A simpler approach would be to do the reset in the inner loop. Like this (example in C):
// Decompress a gzip file input, potentially with multiple gzip members. Write
// the decompressed data to output. Return Z_STREAM_END on success. Return Z_OK
// if the gzip stream was correct up to where it ended prematurely. Return
// Z_DATA error if the gzip stream is invalid.
int inflate_gzip(FILE *input, FILE *output) {
// Initialize inflate for gzip input.
z_stream strm = {};
int ret = inflateInit2(&strm, 16 + MAX_WBITS);
assert(ret == Z_OK);
// Decompress from input to output.
unsigned char in[CHUNK];
for (;;) {
// Keep reading until the end of the input file or an error.
if (strm.avail_in == 0) {
strm.avail_in = fread(in, 1, CHUNK, input);
if (strm.avail_in == 0)
break;
strm.next_in = in;
}
// Decompress all of what's in the input buffer.
do {
// Decompress as much as possible to the CHUNK output buffer.
unsigned char out[CHUNK];
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
// Write to the output file whatever inflate() left in the output
// buffer. Return with an error if the write does not complete.
size_t got = CHUNK - strm.avail_out;
size_t put = fwrite(out, 1, got, output);
if (put != got)
return Z_ERRNO;
// Check for the end of a gzip member, in which case reset inflate
// for the next gzip member.
if (ret == Z_STREAM_END)
assert(inflateReset(&strm) == Z_OK);
// Return on a data error.
else if (ret != Z_OK) {
assert(ret == Z_DATA_ERROR);
(void)inflateEnd(&strm);
return ret;
}
// Continue until everything in the input buffer is consumed.
} while (strm.avail_in > 0);
}
// Successfully decompressed all of the input file. Clean up and return.
assert(inflateEnd(&strm) == Z_OK);
return ret;
}

Related

How do I use the FFmpeg libraries to extract every nth frame from a video and save it as a small image file in C++?

After experimenting with the examples on the FFmpeg documentation, I was finally able to create a short program that extracts every nth frame from a video. However, the output files that it produces are huge at over 15mb for each image. How can I change this to produce lower quality images?
The result I am trying to get is done easily on the command line with:
ffmpeg -i [input video] -vf "select=not(mod(n\,10))" -fps_mode vfr img_%03d.jpg
For a video with about 500 frames, this creates 50 images that are only about 800kb each; how am would I be able to mimic this in my program?
My code consists of opening the input file, decoding the packets, then saving the frames:
#include <cstdio>
#include <cstdlib>
#include <iostream>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}
static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
static int video_stream_index = -1;
// OPEN THE INPUT FILE
static int open_input_file(const char *filename) {
// INIT VARS AND FFMPEG OBJECTS
int ret;
const AVCodec *dec;
// OPEN INPUT FILE
if((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
printf("ERROR: failed to open input file\n");
return ret;
}
// FIND STREAM INFO BASED ON INPUT FILE
if((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
printf("ERROR: failed to find stream information\n");
return ret;
}
// FIND THE BEST VIDEO STREAM FOR THE INPUT FILE
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if(ret < 0) {
printf("ERROR: failed to find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
// ALLOCATE THE DECODING CONTEXT FOR THE INPUT FILE
dec_ctx = avcodec_alloc_context3(dec);
if(!dec_ctx) {
printf("ERROR: failed to allocate decoding context\n");
// CAN NOT ALLOCATE MEMORY ERROR
return AVERROR(ENOMEM);
}
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
// INIT THE VIDEO DECODER
if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
printf("ERROR: failed to open video decoder\n");
return ret;
}
return 0;
}
// SAVE THE FILE
static void save(unsigned char *buf, int wrap, int x_size, int y_size, char *file_name) {
// INIT THE EMPTY FILE
FILE *file;
// OPEN AND WRITE THE IMAGE FILE
file = fopen(file_name, "wb");
fprintf(file, "P6\n%d %d\n%d\n", x_size, y_size, 255);
for(int i = 0; i < y_size; i++) {
fwrite(buf + i * wrap, 1, x_size * 3, file);
}
fclose(file);
}
// DECODE FRAME AND CONVERT IT TO AN RGB IMAGE
static void decode(AVCodecContext *cxt, AVFrame *frame, AVPacket *pkt,
const char *out_file_name, const char *file_ext, int mod=1) {
// INIT A BLANK CHAR TO HOLD THE FILE NAME AND AN EMPTY INT TO HOLD FUNCTION RETURN VALUES
char buf[1024];
int ret;
// SEND PACKET TO DECODER
ret = avcodec_send_packet(cxt, pkt);
if(ret < 0) {
printf("ERROR: error sending packet for decoding\n");
exit(1);
}
// CREATE A SCALAR CONTEXT FOR CONVERSION
SwsContext *sws_ctx = sws_getContext(dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->width,
dec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
// CREATE A NEW RGB FRAME FOR CONVERSION
AVFrame* rgb_frame = av_frame_alloc();
rgb_frame->format = AV_PIX_FMT_RGB24;
rgb_frame->width = dec_ctx->width;
rgb_frame->height = dec_ctx->height;
// ALLOCATE A NEW BUFFER FOR THE RGB CONVERSION FRAME
av_frame_get_buffer(rgb_frame, 0);
// WHILE RETURN COMES BACK OKAY (FUNCTION RETURNS >= 0)...
while(ret >= 0) {
// GET FRAME BACK FROM DECODER
ret = avcodec_receive_frame(cxt, frame);
// IF "RESOURCE TEMP NOT AVAILABLE" OR "END OF FILE" ERROR...
if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return;
} else if(ret < 0) {
printf("ERROR: error during decoding\n");
exit(1);
}
// IF FRAME NUMBER IF THE (MOD)TH FRAME...
if(cxt->frame_number % mod == 0){
// OUTPUT WHICH FRAME IS BEING SAVED
printf("saving frame %03d\n", cxt->frame_number);
// REMOVES TEMPORARY BUFFERED DATA
fflush(stdout);
// SCALE (CONVERT) THE OLD FRAME TO THE NEW RGB FRAME
sws_scale(sws_ctx, frame->data, frame->linesize, 0, frame->height,
rgb_frame->data, rgb_frame->linesize);
// SET "BUF" TO THE OUTPUT FILE PATH (SAVES TO "out_file_name_###.file_ext")
snprintf(buf, sizeof(buf), "%s_%03d.%s", out_file_name, cxt->frame_number, file_ext);
// SAVE THE FRAME
save(rgb_frame->data[0], rgb_frame->linesize[0], rgb_frame->width, rgb_frame->height, buf);
}
}
}
int main() {
// SIMULATE COMMAND LINE ARGUMENTS
char argv0[] = "test";
char argv1[] = "/User/Desktop/frames/test_video.mov";
char *argv[] = {argv0, argv1, nullptr};
// INIT VARS AND FFMPEG OBJECTS
int ret;
AVPacket *packet;
AVFrame *frame;
// ALLOCATE FRAME AND PACKET
frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
// IF FILE DOESN'T OPEN, GO TO THE END
if((ret = open_input_file(argv[1])) < 0) {
goto end;
}
// READ ALL THE PACKETS - simple
while(av_read_frame(fmt_ctx, packet) >= 0) {
// IF PACKET INDEX MATCHES VIDEO INDEX...
if (packet->stream_index == video_stream_index) {
// SEND PACKET TO THE DECODER and SAVE
std::string name = "/User/Desktop/frames/img";
std::string ext = "bmp";
decode(dec_ctx, frame, packet, name.c_str(), ext.c_str(), 5);
}
// UNREFERENCE THE PACKET
av_packet_unref(packet);
}
// END MARKER
end:
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_packet_free(&packet);
// FINAL ERROR CATCH
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
exit(1);
}
exit(0);
}
I am not sure how to go about producing images that are much smaller in size like the ones produced on the command line. I have a feeling that this is possible somehow during the conversion to RGB or the saving of the file but I can't seem to figure out how.
Also, is there any way that I could go about this much more efficiently? On the command line, this finishes very quickly (no more than a second or two for a 9 sec. movie at ~60 fps).

The command line version compresses the frame into jpeg file hence the size is very small. On the other hand, your code writes the rgb values directly into a file (regardless of the file extension). The size of the image is then Height x Width x 3 bytes, which is very big.
Solution: Adjust your save function to also compress the image.
Code example from Github - save_frame_as_jpeg.c:
int save_frame_as_jpeg(AVCodecContext *pCodecCtx, AVFrame *pFrame, int FrameNo)
{
AVCodec *jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
if (!jpegCodec) { return -1; }
AVCodecContext *jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) { return -1; }
jpegContext->pix_fmt = pCodecCtx->pix_fmt;
jpegContext->height = pFrame->height;
jpegContext->width = pFrame->width;
if (avcodec_open2(jpegContext, jpegCodec, NULL) < 0)
{ return -1; }
FILE *JPEGFile;
char JPEGFName[256];
AVPacket packet = {.data = NULL, .size = 0};
av_init_packet(&packet);
int gotFrame;
if (avcodec_encode_video2(jpegContext, &packet, pFrame, &gotFrame) < 0)
{ return -1; }
sprintf(JPEGFName, "dvr-%06d.jpg", FrameNo);
JPEGFile = fopen(JPEGFName, "wb");
fwrite(packet.data, 1, packet.size, JPEGFile);
fclose(JPEGFile);
av_free_packet(&packet);
avcodec_close(jpegContext);
return 0;
}

C++ memory leaks when read binary file

I wrote the program automatically sends the compressed files to the server.
Files are sent with a weak VPS that the disposal has 1.5 GB of RAM.
When the load binary files gets micro memory leaks, but at 30 000 .pdf refuses to continue allocating memory.
I did a little deception because it saves log and run the program again, cleaning RAM in this way, however, would like to know why. Mark that if I do not do 'ios: binary', such leaks are not.
My code :
std::ifstream ifs(url,ios::binary);
std::string content((std::istreambuf_iterator<char>(ifs)), (std::istreambuf_iterator<char>()));
content = PFHelper::stream_compression(content);
content = PFHelper::ASE_encodeCppStandard(content,KS,PFHelper::AES_CBC_128);
PFHelper::stream_compression
std::string PFHelper::stream_compression(std::string stream)
{
z_stream zs; // z_stream is zlib's control structure
memset(&zs, 0, sizeof(zs));
if (deflateInit(&zs, Z_BEST_COMPRESSION) != Z_OK)
{
throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION");
}
zs.next_in = (Bytef*)stream.data();
zs.avail_in = stream.size(); // set the z_stream's input
int ret;
char outbuffer[32768];
std::string outstring;
// retrieve the compressed bytes blockwise
do {
zs.next_out = reinterpret_cast<Bytef*>(outbuffer);
zs.avail_out = sizeof(outbuffer);
ret = deflate(&zs, Z_FINISH);
if (outstring.size() < zs.total_out) {
// append the block to the output string
outstring.append(outbuffer,
zs.total_out - outstring.size());
}
} while (ret == Z_OK);
deflateEnd(&zs);
if (ret != Z_STREAM_END) { throw new PFException(L"EXCEPTION_DURING_ZLIB_COMPRESSION"); }
return outstring;
}
PFHelper::ASE_encodeCppStandard
std::string PFHelper::ASE_encodeCppStandard(std::string in, wchar_t* KS ,wchar_t* typ)
{
string ctext = "";
std::string KS_string = PFHelper::ConvertFromUtf8ToString(KS);
if (typ == PFHelper::AES_CBC_128)
ctext = encrypt(KS_string,in);
if (typ == PFHelper::AES_CBC_256)
ctext = encryptEX(KS_string, in);
return ctext;
}
static string encrypt(string KS, const string ptext)
{
EVP_CIPHER_CTX* ctx;
ctx = EVP_CIPHER_CTX_new();
int rc = EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, (byte*)&KS[0], (byte*)&KS[0]);
if (rc != 1)
throw runtime_error("EVP_EncryptInit_ex failed");
// Cipher text will be upto 16 bytes larger than plain text
std::string ctext;
ctext.resize(ptext.size()+16);
int out_len1 = (int)ctext.size();
rc = EVP_EncryptUpdate(ctx, (byte*)&ctext[0], &out_len1, (const byte*)&ptext[0], (int)ptext.size());
if (rc != 1)
throw runtime_error("EVP_EncryptUpdate failed");
int out_len2 = (int)ctext.size() - out_len1;
rc = EVP_EncryptFinal_ex(ctx, (byte*)&ctext[0] + out_len1, &out_len2);
if (rc != 1)
throw runtime_error("EVP_EncryptFinal_ex failed");
ctext.resize(out_len1 + out_len2);
return ctext;
}

File compression with zlib without saving to disk and send via socket

I have a server that receives client files, but the transfer in external networks is too slow, reading a bit i found the solution in file compression. I decided to use zlib for compression, i did a search in some examples in the documentation and below follows two I would like to use in my project. My question is, how to use the following examples to compress a file without saving to disk and send via socket (client-side). And receive and decompress the file (server side).
The client application run on Windows, and server application run on Linux.
Client Side:
int def(FILE *source, FILE *dest, int level)
{
int ret, flush;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate deflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
ret = deflateInit(&strm, level);
if (ret != Z_OK)
return ret;
/* compress until end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
strm.next_in = in;
/* run deflate() on input until output buffer not full, finish
compression if all of source has been read in */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = deflate(&strm, flush); /* no bad return value */
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)deflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
assert(strm.avail_in == 0); /* all input will be used */
/* done when last data in file processed */
} while (flush != Z_FINISH);
assert(ret == Z_STREAM_END); /* stream will be complete */
/* clean up and return */
(void)deflateEnd(&strm);
return Z_OK;
}
Server Side:
int inf(FILE *source, FILE *dest)
{
int ret;
unsigned have;
z_stream strm;
unsigned char in[CHUNK];
unsigned char out[CHUNK];
/* allocate inflate state */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit(&strm);
if (ret != Z_OK)
return ret;
/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = fread(in, 1, CHUNK, source);
if (ferror(source)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
if (strm.avail_in == 0)
break;
strm.next_in = in;
/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&strm);
return ret;
}
have = CHUNK - strm.avail_out;
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
(void)inflateEnd(&strm);
return Z_ERRNO;
}
} while (strm.avail_out == 0);
/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);
/* clean up and return */
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}

One way is to use a Boost Iostream compressor (they support zlib, gzip, bzip2 out of the box) and an ip::tcp::iostream socket from Boost Asio. Something like:
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/asio/ip/tcp.hpp>
int main() {
boost::asio::ip::tcp::iostream connection;
boost::iostreams::filtering_stream<boost::iostreams::input> connection_reader;
connection_reader.push(boost::iostreams::zlib_decompressor());
connection_reader.push(connection);
boost::iostreams::filtering_stream<boost::iostreams::output> connection_writer;
connection_writer.push(boost::iostreams::zlib_compressor());
connection_writer.push(connection);
auto const url = "127.0.0.1";
connection.connect(url, "http");
// Send.
connection_writer << "hello there\n";
// Receive.
for(std::string line; getline(connection_reader, line);) {
// Process line.
}
}

Extracting the h264 part of a video file (demuxing)

I am trying to demux a video file into the video part (h264, mpeg4, h265, vp8, etc) and the audio part (mp3, aac, ac3, etc) and the subtitle part (srt) using ffmpeg in c++.
The audio part came out alright and played on all the media players I have, so also did the subtitle part. The video part however came out WITHOUT error and saved into a .h264 file but when I use ffprobe to check it or ffplay to play it, it always give the error "Invalid data found when processing input".
The code below
/* Separate a media file into audio, video and subtitle files (demuxing, complex) */
//TODO: mute error when subtitle is not present
#define __STDC_CONSTANT_MACROS
extern "C"
{
#include "libavformat/avformat.h"
}
int main()
{
//Input AVFormatContext and Output AVFormatContext
AVOutputFormat *ofmt_a = NULL, *ofmt_v = NULL, *ofmt_s = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx_a = NULL, *ofmt_ctx_v = NULL, *ofmt_ctx_s = NULL;
AVPacket pkt;
int ret, i;
int videoindex=-1, audioindex=-1, srtindex=-1;
int frame_index=0;
//Input file URL
const char *in_filename = "sample.mp4";
//Output file URL
const char *out_filename_v = "sample.h264";
const char *out_filename_a = "sample.mp3";
const char *out_filename_s = "sample.srt";
av_register_all();
//Input
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
printf( "Could not open input file.");
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
printf( "Failed to retrieve input stream information");
goto end;
}
//Output
avformat_alloc_output_context2(&ofmt_ctx_v, NULL, NULL, out_filename_v);
if (!ofmt_ctx_v) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_v = ofmt_ctx_v->oformat;
avformat_alloc_output_context2(&ofmt_ctx_a, NULL, NULL, out_filename_a);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_a = ofmt_ctx_a->oformat;
avformat_alloc_output_context2(&ofmt_ctx_s, NULL, NULL, out_filename_s);
if (!ofmt_ctx_a) {
printf( "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt_s = ofmt_ctx_s->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
//Create output AVStream according to input AVStream
AVFormatContext *ofmt_ctx;
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = NULL;
if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO){
videoindex=i;
out_stream=avformat_new_stream(ofmt_ctx_v, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_v;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO){
audioindex=i;
out_stream=avformat_new_stream(ofmt_ctx_a, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_a;
}
else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
srtindex=i;
out_stream=avformat_new_stream(ofmt_ctx_s, in_stream->codec->codec);
ofmt_ctx=ofmt_ctx_s;
}
else{
break;
}
if (!out_stream) {
printf( "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
//Copy the settings of AVCodecContext
if (avcodec_copy_context(out_stream->codec, in_stream->codec) < 0) {
printf( "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
//Dump Format------------------
printf("\n==============Input Video=============\n");
av_dump_format(ifmt_ctx, 0, in_filename, 0);
printf("\n==============Output Video============\n");
av_dump_format(ofmt_ctx_v, 0, out_filename_v, 1);
printf("\n==============Output Audio============\n");
av_dump_format(ofmt_ctx_a, 0, out_filename_a, 1);
/*printf("\n==============Output Subtitle============\n");
av_dump_format(ofmt_ctx_s, 0, out_filename_s, 1);*/
printf("\n======================================\n");
//Open output file
if (!(ofmt_v->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_v->pb, out_filename_v, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_v);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_a->pb, out_filename_a, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_a);
goto end;
}
}
if (!(ofmt_a->flags & AVFMT_NOFILE)) {
if (avio_open(&ofmt_ctx_s->pb, out_filename_s, AVIO_FLAG_WRITE) < 0) {
printf( "Could not open output file '%s'", out_filename_s);
goto end;
}
}
//Write file header
if (avformat_write_header(ofmt_ctx_v, NULL) < 0) {
printf( "Error occurred when opening video output file\n");
goto end;
}
system("pause");
if (avformat_write_header(ofmt_ctx_a, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
if (avformat_write_header(ofmt_ctx_s, NULL) < 0) {
printf( "Error occurred when opening audio output file\n");
goto end;
}
AVBitStreamFilterContext* h264bsfc = av_bitstream_filter_init("h264_mp4toannexb");
while (1) {
AVFormatContext *ofmt_ctx;
AVStream *in_stream, *out_stream;
//Get an AVPacket
if (av_read_frame(ifmt_ctx, &pkt) < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
if(pkt.stream_index==videoindex){
out_stream = ofmt_ctx_v->streams[0];
ofmt_ctx=ofmt_ctx_v;
printf("Write Video Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
av_bitstream_filter_filter(h264bsfc, in_stream->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);
}else if(pkt.stream_index==audioindex){
out_stream = ofmt_ctx_a->streams[0];
ofmt_ctx=ofmt_ctx_a;
printf("Write Audio Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else if(pkt.stream_index==srtindex){
out_stream = ofmt_ctx_s->streams[0];
ofmt_ctx=ofmt_ctx_s;
printf("Write Subtitle Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
}
else{
continue;
}
//Convert PTS/DTS
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
pkt.stream_index=0;
//Write
if (av_interleaved_write_frame(ofmt_ctx, &pkt) < 0) {
printf( "Error muxing packet\n");
break;
}
//printf("Write %8d frames to output file\n",frame_index);
av_free_packet(&pkt);
frame_index++;
}
av_bitstream_filter_close(h264bsfc);
//Write file trailer
av_write_trailer(ofmt_ctx_a);
av_write_trailer(ofmt_ctx_v);
av_write_trailer(ofmt_ctx_s);
end:
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx_a && !(ofmt_a->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_a->pb);
if (ofmt_ctx_v && !(ofmt_v->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_v->pb);
if (ofmt_ctx_s && !(ofmt_s->flags & AVFMT_NOFILE))
avio_close(ofmt_ctx_s->pb);
avformat_free_context(ofmt_ctx_a);
avformat_free_context(ofmt_ctx_v);
avformat_free_context(ofmt_ctx_s);
system("pause");
if (ret < 0 && ret != AVERROR_EOF) {
printf( "Error occurred.\n");
return -1;
}
return 0;
}
EDIT 1
Screen shot of resultant h264 file
EDIT 2
I think the "error" has to do with FFMPEG's "Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead" error.
I revert to an older version of FFMPEG and with the same code, the resultant h264 file was ok!

You need to convert an H.264 bitstream from length prefixed mode to start code prefixed mode.This is required by some streaming formats, typically the MPEG-2 transport stream format ("mpegts").
Take a look at https://www.ffmpeg.org/ffmpeg-bitstream-filters.html#h264_005fmp4toannexb
Look at lines from 402 to 424 and from 842 to 843.
https://www.ffmpeg.org/doxygen/0.7/crystalhd_8c-source.html
I used it, to extract h264 from mp4.
//Use this filter on your first h264 input AVPacket
AVFormatContext *ifmt_ctx = NULL;
//...
//... //init input
//...
AVPacket *firstPacket;
//...
//... //get packet from stream
//...
uint8_t *dummy_p;
int dummy_int;
AVBitStreamFilterContext *filter = v_bitstream_filter_init("h264_mp4toannexb");
if (!filter)
{
printf("Can't open filter\n");
exit(1);
}
ret = av_bitstream_filter_filter(filter, ifmt_ctx->streams[videoindex]->codec, NULL,
&dummy_p, &dummy_int,
firstPacket->data, firstPacket->size, 0);
if( ret < 0 )
{
printf("Can't filter\n");
exit(1);
}
// use dummy_p to write to file, as first packet

C++ ZLib GZipStream Decompression NULL terminated

There are a lot of questions out there revolving around zlib and GZipStreams but none that I've found answer this question. I'm using a C# GZipStream to send compressed data to a client. It reads the compressed data in entirely then tries to decompress it. However, each time inflate() is called in the loop it only gets the NULL terminated string. When sending a binary this is a pretty huge problem.
Before I show you code, I just wanted to say that if I write the received compressed bytes to a .gz file and use gzFile/gzopen/gzread/gzclose everything works perfectly. That means all the data is coming in properly. I want to read in the compressed data, decompress it in memory, and have the contents in a variable.
I think the issue is that inflate() is writing to a char* which is NULL terminated. I just don't know how to get it to be a string. I do fully anticipate this being a major oversight and a simple fix. Thanks for any help!
Here's the decompression code:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
std::stringstream tmpString;
tmpString << outbuffer;
if (dMsg.size() < zs.total_out) {
dMsg.append(tmpString.str().substr(0, zs.total_out - dMsg.size()));
}
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
//"\n<EOF>" is appended by sender to signify the end of file. This removes it
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}
Working code from solution:
bool DecompressString(const std::string& message, std::string& dMsg)
{
int bufferSize = 512;
int messageSize = message.size() + 1;
//decompress string
z_stream zs;
memset(&zs, 0, sizeof(zs));
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = Z_NULL;
zs.next_in = (Bytef*)message.data();
zs.avail_in = messageSize;
int ret = Z_OK;
unsigned char* outbuffer = new unsigned char[bufferSize];
if (inflateInit2(&zs, 16+MAX_WBITS) == Z_OK)
{
// get the decompressed bytes blockwise using repeated calls to inflate
do {
zs.next_out = outbuffer;
zs.avail_out = bufferSize;
ret = inflate(&zs, Z_NO_FLUSH);
if (ret < 0) return false;
//Here's the difference
if (dMsg.size() < zs.total_out)
dMsg.append(reinterpret_cast<char*>(outbuffer), bufferSize);
//End
} while (ret == Z_OK);
}
inflateEnd(&zs);
delete[] outbuffer;
if (dMsg.find("\n<EOF>") != -1)
dMsg = dMsg.substr(0, dMsg.find("\n<EOF>"));
return true;
}

string is not a problem in itself, it can handle binary data.
It is this line that assumes a zero-terminated c-string:
tmpString << outbuffer;
Replace it with
tmpString.append(outbuffer, bufferSize);

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How do I use Zlib with concatenated .gz files in winAPI? - c++

Related

How do I use the FFmpeg libraries to extract every nth frame from a video and save it as a small image file in C++?

C++ memory leaks when read binary file

File compression with zlib without saving to disk and send via socket

Extracting the h264 part of a video file (demuxing)

C++ ZLib GZipStream Decompression NULL terminated

Categories

Resources