I'm learning how to raw deflate (no header or trailer information) & inflate data in C++, so I decided to try the zlib and Crypto++ libraries.
I've found that, when deflating the same file, Crypto++ sometimes adds 4 extra bytes (depending on the method used).
For example, for a file containing the following sequence, whitespaces included: 1 2 3 4 5 6, deflating with zlib produces a file of size 14 bytes.
This holds true for Crypto++ deflate_method1, but for Crypto++ deflate_method2, the file size is 18 bytes.
Also, when trying to inflate a file that was deflated using Crypto++ deflate_method2 with Crypto++ inflate_method1, an exception is raised:
terminate called after throwing an instance of 'CryptoPP::Inflator::UnexpectedEndErr'
what(): Inflator: unexpected end of compressed block
Aborted (core dumped)
To compare, I did another test deflating/inflating with Python:
Deflating also yields a file of size 14 bytes.
I'm able to inflate all the deflated files correctly, regardless of the method used to deflate them.
At this point, I would like to understand two things:
Why is there a discrepancy in the size of the deflated files?
Why Python is able to inflate any of the files but Crypto++ is being picky?
Info & code:
OS: Ubuntu 16.04 Xenial
Zlib version: 1.0.1 from Ubuntu repos.
Crypto++ version: 8.0.2 from GitHub release.
Python version: 3.5.2
zlib version: 1.2.8 / runtime version: 1.2.8
Input & output files as base64:
Input: MSAyIDMgNCA1IDYK
Deflated:
Python: M1QwUjBWMFEwVTDjAgA=
Zlib: M1QwUjBWMFEwVTDjAgA=
Crypto++ method1: M1QwUjBWMFEwVTDjAgA=
Crypto++ method2: MlQwUjBWMFEwVTDjAgAAAP//
Zlib:
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include "zlib.h"
constexpr uint32_t BUFFER_READ_SIZE = 128;
constexpr uint32_t BUFFER_WRITE_SIZE = 128;
bool mydeflate(std::vector<unsigned char> & input)
{
const std::string inputStream{ input.begin(), input.end() };
uint64_t inputSize = input.size();
// Create a string stream where output will be created.
std::stringstream outputStringStream(std::ios::in | std::ios::out | std::ios::binary);
// Initialize zlib structures.
std::vector<char *> readBuffer(BUFFER_READ_SIZE);
std::vector<char *> writeBuffer(BUFFER_WRITE_SIZE);
z_stream zipStream;
zipStream.avail_in = 0;
zipStream.avail_out = BUFFER_WRITE_SIZE;
zipStream.next_out = reinterpret_cast<Bytef *>(writeBuffer.data());
zipStream.total_in = 0;
zipStream.total_out = 0;
zipStream.data_type = Z_BINARY;
zipStream.zalloc = nullptr;
zipStream.zfree = nullptr;
zipStream.opaque = nullptr;
// Window bits is passed < 0 to tell that there is no zlib header.
if (deflateInit2_(&zipStream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -MAX_WBITS, 8, Z_DEFAULT_STRATEGY, ZLIB_VERSION, sizeof(zipStream)) != Z_OK)
{
return false;
}
// Deflate the input stream
uint32_t readSize = 0;
uint64_t dataPendingToCompress = inputSize;
uint64_t dataPendingToWrite = 0;
bool isEndOfInput = false;
while (dataPendingToCompress > 0)
{
if (dataPendingToCompress > BUFFER_READ_SIZE)
{
readSize = BUFFER_READ_SIZE;
}
else
{
readSize = dataPendingToCompress;
isEndOfInput = true;
}
// Copy the piece of input stream to the read buffer.
std::memcpy(readBuffer.data(), &inputStream[inputSize - dataPendingToCompress], readSize);
dataPendingToCompress -= readSize;
zipStream.next_in = reinterpret_cast<Bytef *>(readBuffer.data());
zipStream.avail_in = readSize;
// While there is input data to compress.
while (zipStream.avail_in > 0)
{
// Output buffer is full.
if (zipStream.avail_out == 0)
{
outputStringStream.write(reinterpret_cast<const char*>(writeBuffer.data()), dataPendingToWrite);
zipStream.total_in = 0;
zipStream.avail_out = BUFFER_WRITE_SIZE;
zipStream.next_out = reinterpret_cast<Bytef *>(writeBuffer.data());
dataPendingToWrite = 0;
}
uint64_t totalOutBefore = zipStream.total_out;
int zlibError = deflate(&zipStream, isEndOfInput ? Z_FINISH : Z_NO_FLUSH);
if ((zlibError != Z_OK) && (zlibError != Z_STREAM_END))
{
deflateEnd(&zipStream);
return false;
}
dataPendingToWrite += static_cast<uint64_t>(zipStream.total_out - totalOutBefore);
}
}
// Flush last compressed data.
while (dataPendingToWrite > 0)
{
if (dataPendingToWrite > BUFFER_WRITE_SIZE)
{
outputStringStream.write(reinterpret_cast<const char*>(writeBuffer.data()), BUFFER_WRITE_SIZE);
}
else
{
outputStringStream.write(reinterpret_cast<const char*>(writeBuffer.data()), dataPendingToWrite);
}
zipStream.total_in = 0;
zipStream.avail_out = BUFFER_WRITE_SIZE;
zipStream.next_out = reinterpret_cast<Bytef *>(writeBuffer.data());
uint64_t totalOutBefore = zipStream.total_out;
int zlibError = deflate(&zipStream, Z_FINISH);
if ((zlibError != Z_OK) && (zlibError != Z_STREAM_END))
{
deflateEnd(&zipStream);
return false;
}
dataPendingToWrite = static_cast<uint64_t>(zipStream.total_out - totalOutBefore);
}
deflateEnd(&zipStream);
const std::string & outputString = outputStringStream.str();
std::vector<unsigned char> deflated{outputString.begin(), outputString.end()};
std::cout << "Output String size: " << outputString.size() << std::endl;
input.swap(deflated);
return true;
}
int main(int argc, char * argv[])
{
std::ifstream input_file{"/tmp/test.txt"};
std::vector<unsigned char> data((std::istreambuf_iterator<char>(input_file)), std::istreambuf_iterator<char>());
std::cout << "Deflated: " << mydeflate(data) << '\n';
std::ofstream output_file{"/tmp/deflated.txt"};
output_file.write(reinterpret_cast<char *>(data.data()), data.size());
return 0;
}
Crypto++:
#include "cryptopp/files.h"
#include "cryptopp/zdeflate.h"
#include "cryptopp/zinflate.h"
void deflate_method1(const std::string & input_file_path, const std::string & output_file_path)
{
CryptoPP::Deflator deflator(new CryptoPP::FileSink(output_file_path.c_str(), true), CryptoPP::Deflator::DEFAULT_DEFLATE_LEVEL, CryptoPP::Deflator::MAX_LOG2_WINDOW_SIZE);
CryptoPP::FileSource fs(input_file_path.c_str(), true);
fs.TransferAllTo(deflator);
}
void inflate_method1(const std::string & input_file_path, const std::string & output_file_path)
{
CryptoPP::FileSource fs(input_file_path.c_str(), true);
CryptoPP::Inflator inflator(new CryptoPP::FileSink(output_file_path.c_str(), true));
fs.TransferAllTo(inflator);
}
void deflate_method2(const std::string& input_file_path, const std::string& output_file_path)
{
CryptoPP::Deflator deflator(new CryptoPP::FileSink(output_file_path.c_str(), true), CryptoPP::Deflator::DEFAULT_DEFLATE_LEVEL, 15);
std::ifstream file_in;
file_in.open(input_file_path, std::ios::binary);
std::string buffer;
size_t num_read = 0;
const size_t buffer_size(1024 * 1024);
buffer.resize(buffer_size);
file_in.read(const_cast<char*>(buffer.data()), buffer_size);
num_read = file_in.gcount();
while (num_read) {
deflator.ChannelPut(CryptoPP::DEFAULT_CHANNEL, reinterpret_cast<unsigned char*>(const_cast<char *>(buffer.data())), num_read);
file_in.read(const_cast<char*>(buffer.data()), buffer_size);
num_read = file_in.gcount();
}
file_in.close();
deflator.Flush(true);
}
void inflate_method2(const std::string& input_file_path, const std::string& output_file_path)
{
CryptoPP::Inflator inflator(new CryptoPP::FileSink(output_file_path.c_str(), true));
std::ifstream file_in;
file_in.open(input_file_path, std::ios::binary);
std::string buffer;
size_t num_read = 0;
const size_t buffer_size(1024 * 1024);
buffer.resize(buffer_size);
file_in.read(const_cast<char*>(buffer.data()), buffer_size);
num_read = file_in.gcount();
while (num_read) {
inflator.ChannelPut(CryptoPP::DEFAULT_CHANNEL, reinterpret_cast<unsigned char*>(const_cast<char *>(buffer.data())), num_read);
file_in.read(const_cast<char*>(buffer.data()), buffer_size);
num_read = file_in.gcount();
}
file_in.close();
inflator.Flush(true);
}
int main(int argc, char * argv[])
{
deflate_method1("/tmp/test.txt", "/tmp/deflated_method1.bin");
inflate_method1("/tmp/deflated_method1.bin", "/tmp/inflated_method1.txt");
deflate_method2("/tmp/test.txt", "/tmp/deflated_method2.bin");
inflate_method2("/tmp/deflated_method2.bin", "/tmp/inflated_method2.txt");
// This throws: Inflator: unexpected end of compressed block
inflate_method1("/tmp/deflated_method2.bin", "/tmp/inflated_with_method1_file_deflated_with_method2.txt");
return 0;
}
Python:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import zlib
def CHUNKSIZE():
return 128
def deflate(file_path, compression_level, method, wbits):
plain_data = None
deflated_data = bytearray()
deflator = zlib.compressobj(compression_level, method, wbits)
with open(file_path, 'rb') as input_file:
while True:
plain_data = input_file.read(CHUNKSIZE())
if not plain_data:
break
deflated_data += deflator.compress(plain_data)
deflated_data += deflator.flush()
return deflated_data
def inflate(file_path, wbits):
inflated_data = bytearray()
inflator = zlib.decompressobj(wbits)
with open(file_path, 'rb') as deflated_file:
buffer = deflated_file.read(CHUNKSIZE())
while buffer:
inflated_data += inflator.decompress(buffer)
buffer = deflated_file.read(CHUNKSIZE())
inflated_data += inflator.flush()
return inflated_data
def write_file(file_path, data):
with open(file_path, 'wb') as output_file:
output_file.write(data)
if __name__ == "__main__":
deflated_data = deflate("/tmp/test.txt", zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS)
write_file("/tmp/deflated_python.bin", deflated_data)
The first three are working correctly, generating a valid deflate compressed stream with a single, last deflate block.
You "Crypto++ method2" is generating two deflate blocks, where the second one is an empty stored block that is not marked as the last block. This is not a valid deflate stream since it does not terminate. You are not correctly finishing the compression.
Your deflator.Flush(true) is flushing the first block and emitting that empty stored block, without ending the deflate stream.
I'm not seeing much in the way of documentation, or really any at all, but looking at the source code, I would try deflator.EndBlock(true) instead.
Update:
Per the comment below, EndBlock is not public. Instead MessageEnd is what is needed to terminate the deflate stream.
Related
I'm reading a file from stdin and just need to run through it as fast as possible and do some processing on each of the delimited tokens.
#include <iostream>
#include <array>
#include <string_view>
namespace {
constexpr auto BUFFER_SIZE = 25*1024*1024; // 25MiB
std::array<char, BUFFER_SIZE> g_buf;
}
inline void process_line(std::string_view line, const char delim = '|') {
size_t delim_pos = 0, field_start = 0;
while (delim_pos = line.find_first_of(delim, field_start)) {
// TODO: process field
field_start = delim_pos + 1;
if (delim_pos == std::string::npos) {
break;
}
}
}
int main() {
std::ios_base::sync_with_stdio(false);
std::cin.rdbuf()->pubsetbuf(g_buf.data(), g_buf.size());
std::string line;
while(std::getline(std::cin, line)) {
process_line(line);
}
}
For additional context, I'm building out test implementations to compare a C++ solution vs a python solution. These test solutions need to read hundreds of TBs of compressed data (hopefully quickly) and do some transformations. Previous implementation against a similar type of file was in python and was overly complex/slow. I'm looking to split out the decompression using a linux utility like bzcat or zcat and piping into the above implementation. I'm basically doing something like below if the line matches a condition:
import sys
delim = '|'
for line in sys.stdin:
tokens = delim.split(line)
// TODO: process tokens
If the line matches I'll either be storing it and transforming (want to avoid because of memory usage), or throwing it back out on stdout in a different format to be written to disk or re-compress
I've added another implementation below which also reads from stdin but uses the linux read into a defined buffer as suggested in the comments (although it can probably be simplified):
#include <unistd.h>
#include <cstdint>
#include <cstring>
#include <array>
namespace {
constexpr size_t BUFFER_SIZE = 1024*1024*1024; // 1GiB
std::array<char, BUFFER_SIZE> g_buf;
}
int main() {
void * ptr = reinterpret_cast<void *>(g_buf.data());
ssize_t bytes_read = 0;
size_t offset = 0, total_valid_bytes = 0;
while(offset = reinterpret_cast<const char *>(ptr) - g_buf.data(),
bytes_read = read(STDIN_FILENO, ptr, g_buf.size() - offset),
total_valid_bytes = offset + bytes_read,
bytes_read > 0) {
size_t line_start = 0, line_end = 0;
std::string_view read_buf(reinterpret_cast<const char *>(g_buf.data()), total_valid_bytes);
// process each line
while(line_end = read_buf.find_first_of('\n', line_start), line_end != std::string_view::npos) {
std::string_view line(read_buf.data() + line_start, line_end - line_start);
// TODO: process line
// std::cout << line << std::endl;
line_start = line_end + 1;
}
if (line_end == std::string_view::npos) {
size_t line_frag_size = total_valid_bytes - line_start;
std::memcpy(g_buf.data(), g_buf.data() + line_start, line_frag_size); // compact
ptr = g_buf.data() + line_frag_size;
}
}
}
I want to calculate Sha1 of any given file in C++ using OpenSSL library.
I have read any article on the internet (including all from stackoverflow too) about doing this for almost 3 days.
Finally I get my program to work but the generated hash of any given file is not as it should be.
My code is someway similar to these found here and here but more easy to read and to use further in my program I write.
Also, I want to use C++ code not C code as they are written in the links above, second, they use:
SHA256_Init(&context);
SHA256_Update(&context, (unsigned char*)input, length);
SHA256_Final(md, &context);
which aren't available anymore in the new/current OpenSSL version (3.0 or so, I think).
So, I think this question will help many other readers that I observe meet the same problem(s) I do with the new OpenSSL version and can not use old code samples anymore.
This is my C++ code that is created to read huge files by chuncks without loading them into memory (hope this will help future readers of this post because it have many useful lines but it is not fully working as you will see):
bool hashFullFile(const std::string& FilePath, std::string &hashed, std::string &hash_type) {
bool success = false;
EVP_MD_CTX *context = EVP_MD_CTX_new();
//read file by chuncks:
const int BUFFER_SIZE = 1024;
std::vector<char> buffer (BUFFER_SIZE + 1, 0);
// check if the file to read from exists and if so read the file in chunks
std::ifstream fin(FilePath, std::ifstream::binary | std::ifstream::in);
if (hash_type == "SHA1") {
if (context != NULL) {
if (EVP_DigestInit_ex(context, EVP_sha1(), NULL)) {
while (fin.good()){
fin.read(buffer.data(), BUFFER_SIZE);
std::streamsize s = ((fin) ? BUFFER_SIZE : fin.gcount());
buffer[s] = 0;
//convert vector of chars to string:
std::string str(buffer.data());
if (!EVP_DigestUpdate(context, str.c_str(), str.length())) {
fprintf(stderr, "Error while digesting file.\n");
return false;
}
}
unsigned char hash[EVP_MAX_MD_SIZE];
unsigned int lengthOfHash = 0;
if (EVP_DigestFinal_ex(context, hash, &lengthOfHash)) {
std::stringstream ss;
for (unsigned int i = 0; i < lengthOfHash; ++i) {
ss << std::hex << std::setw(2) << std::setfill('0') << (int) hash[i];
}
hashed = ss.str();
success = true;
}else{
fprintf(stderr, "Error while finalizing digest.\n");
return false;
}
}else{
fprintf(stderr, "Error while initializing digest context.\n");
return false;
}
EVP_MD_CTX_free(context);
}else{
fprintf(stderr, "Error while creating digest context.\n");
return false;
}
}
fin.close();
return success;
}
And I am using it like this into main function:
std::string myhash;
std::string myhash_type = "SHA1";
hashFullFile(R"(C:\Users\UserName\data.bin)", myhash, myhash_type);
cout<<myhash<<endl;
The problem is that for a given file it calculates hash:
e.g. 169ed28c9796a8065f96c98d205f21ddac11b14e as the hash output but the same file has the hash:
openssl dgst -sha1 data.bin
SHA1(data.bin)= 1927f720a858d0c3b53893695879ae2a7897eedb
generated by Openssl command line and also by any site from the internet.
I can't figure out what am I doing wrong since my code seems to be correct.
Please help.
Thank you very much in advance!
You're missing the finishing calculation on your EVP API attempt. The use of an intermediate string is unnecessary as well. Finally, the function should return the digest as a vector of bytes. let the caller do with that what they want.
Examples using both the EVP API and a BIO chain are shown below.
#include <iostream>
#include <fstream>
#include <algorithm>
#include <array>
#include <vector>
#include <memory>
#include <openssl/evp.h>
#include <openssl/sha.h>
namespace
{
struct Delete
{
void operator()(BIO * p) const
{
BIO_free(p);
}
void operator()(EVP_MD_CTX *p) const
{
EVP_MD_CTX_free(p);
}
};
using BIO_ptr = std::unique_ptr<BIO, Delete>;
using EVP_MD_CTX_ptr = std::unique_ptr<EVP_MD_CTX, Delete>;
}
std::vector<uint8_t> hashFileEVP(const std::string &fname, std::string const &mdname = "sha1")
{
// will hold the resulting digest
std::vector<uint8_t> md;
// set this to however big you want the chunk size to be
static constexpr size_t BUFFER_SIZE = 1024;
std::array<char, BUFFER_SIZE> buff;
// get the digest algorithm by name
const EVP_MD *mthd = EVP_get_digestbyname(mdname.c_str());
if (mthd)
{
std::ifstream inp(fname, std::ios::in | std::ios::binary);
if (inp.is_open())
{
EVP_MD_CTX_ptr ctx{EVP_MD_CTX_new()};
EVP_DigestInit_ex(ctx.get(), mthd, nullptr);
while (inp.read(buff.data(), BUFFER_SIZE).gcount() > 0)
EVP_DigestUpdate(ctx.get(), buff.data(), inp.gcount());
// size output vector
unsigned int mdlen = EVP_MD_size(mthd);
md.resize(mdlen);
// general final digest
EVP_DigestFinal_ex(ctx.get(), md.data(), &mdlen);
}
}
return md;
}
std::vector<uint8_t> hashFileBIO(std::string const &fname, std::string const &mdname = "sha1")
{
// the fixed-size read buffer
static constexpr size_t BUFFER_SIZE = 1024;
// will hold the resulting digest
std::vector<uint8_t> md;
// select this however you want.
const EVP_MD *mthd = EVP_get_digestbyname(mdname.c_str());
if (mthd)
{
// open the file and a message digest BIO
BIO_ptr bio_f(BIO_new_file(fname.c_str(), "rb"));
BIO_ptr bio_md(BIO_new(BIO_f_md()));
BIO_set_md(bio_md.get(), mthd);
// chain the bios together. note this bio is NOT
// held together with a smart pointer; all the
// bios in the chain are.
BIO *bio = BIO_push(bio_md.get(), bio_f.get());
// read through file one buffer at a time.
std::array<char, BUFFER_SIZE> buff;
while (BIO_read(bio, buff.data(), buff.size()) > 0)
; // intentionally empty
// size output buffer
unsigned int mdlen = EVP_MD_size(mthd);
md.resize(mdlen);
// read final digest from md bio.
BIO_gets(bio_md.get(), (char *)md.data(), mdlen);
}
return md;
}
// convert a vector of byte to std::string
std::string bin2hex(std::vector<uint8_t> const& bin)
{
std::string res;
size_t len = 0;
if (OPENSSL_buf2hexstr_ex(nullptr, 0, &len, bin.data(), bin.size(), 0) != 0)
{
res.resize(len);
OPENSSL_buf2hexstr_ex(&res[0], len, &len, bin.data(), bin.size(), 0);
}
return res;
}
int main()
{
OpenSSL_add_all_digests();
// i have this on my rig. use whatever you want
// or get the name from argv or some such.
static const char fname[] = "dictionary.txt";
auto md1 = hashFileEVP(fname);
auto md1str = bin2hex(md1);
std::cout << "hashed with EVP API\n";
std::cout << md1str << '\n';
auto md2 = hashFileBIO(fname);
auto md2str = bin2hex(md1);
std::cout << "hashed with BIO chain\n";
std::cout << md2str << '\n';
}
Output
hashed with EVP API
0A97D663ADA2E039FD904846ABC5361291BD2D8E
hashed with BIO chain
0A97D663ADA2E039FD904846ABC5361291BD2D8E
Output from openssl command line
craig#rogue1 % openssl dgst -sha1 dictionary.txt
SHA1(dictionary.txt)= 0a97d663ada2e039fd904846abc5361291bd2d8e
Note the digests are the same in all three cases.
I have following piece of code that is supposed to calculate the SHA256 of a file. I am reading the file chunk by chunk and using EVP_DigestUpdate for the chunk. When I test the code with the file that has content
Test Message
Hello World
in Windows, it gives me SHA256 value of 97b2bc0cd1c3849436c6532d9c8de85456e1ce926d1e872a1e9b76a33183655f but the value is supposed to be 318b20b83a6730b928c46163a2a1cefee4466132731c95c39613acb547ccb715, which can be verified here too.
Here is the code:
#include <openssl\evp.h>
#include <iostream>
#include <string>
#include <fstream>
#include <cstdio>
const int MAX_BUFFER_SIZE = 1024;
std::string FileChecksum(std::string, std::string);
int main()
{
std::string checksum = FileChecksum("C:\\Users\\Dell\\Downloads\\somefile.txt","sha256");
std::cout << checksum << std::endl;
return 0;
}
std::string FileChecksum(std::string file_path, std::string algorithm)
{
EVP_MD_CTX *mdctx;
const EVP_MD *md;
unsigned char md_value[EVP_MAX_MD_SIZE];
int i;
unsigned int md_len;
OpenSSL_add_all_digests();
md = EVP_get_digestbyname(algorithm.c_str());
if(!md) {
printf("Unknown message digest %s\n",algorithm);
exit(1);
}
mdctx = EVP_MD_CTX_create();
std::ifstream readfile(file_path,std::ifstream::in|std::ifstream::binary);
if(!readfile.is_open())
{
std::cout << "COuldnot open file\n";
return 0;
}
readfile.seekg(0, std::ios::end);
long filelen = readfile.tellg();
std::cout << "LEN IS " << filelen << std::endl;
readfile.seekg(0, std::ios::beg);
if(filelen == -1)
{
std::cout << "Return Null \n";
return 0;
}
EVP_DigestInit_ex(mdctx, md, NULL);
long temp_fil = filelen;
while(!readfile.eof() && readfile.is_open() && temp_fil>0)
{
int bufferS = (temp_fil < MAX_BUFFER_SIZE) ? temp_fil : MAX_BUFFER_SIZE;
char *buffer = new char[bufferS+1];
buffer[bufferS] = 0;
readfile.read(buffer, bufferS);
std::cout << strlen(buffer) << std::endl;
EVP_DigestUpdate(mdctx, buffer, strlen(buffer));
temp_fil -= bufferS;
delete[] buffer;
}
EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_destroy(mdctx);
printf("Digest is: ");
//char *checksum_msg = new char[md_len];
//int cx(0);
for(i = 0; i < md_len; i++)
{
//_snprintf(checksum_msg+cx,md_len-cx,"%02x",md_value[i]);
printf("%02x", md_value[i]);
}
//std::string res(checksum_msg);
//delete[] checksum_msg;
printf("\n");
/* Call this once before exit. */
EVP_cleanup();
return "";
}
I tried to write the hash generated by program as string using _snprintf but it didn't worked. How can I generate the correct hash and return the value as string from FileChecksum Function? Platform is Windows.
EDIT: It seems the problem was because of CRLF issue. As Windows in saving file using \r\n, the Checksum calculated was different. How to handle this?
MS-DOS used the CR-LF convention,So basically while saving the file in windows, \r\n comes in effect for carriage return and newline. And while testing on online (given by you), only \n character comes in effect.
Thus either you have to check the checksum of Test Message\r\nHello World\r\n in string which is equivalent to creating and reading file in windows(as given above), which is the case here.
However, the checksum of files,wherever created, will be same.
Note: your code works fine :)
It seems the problem was associated with the value of length I passed in EVP_DigestUpdate. I had passed value from strlen, but replacing it with bufferS did fixed the issue.
The code was modified as:
while(!readfile.eof() && readfile.is_open() && temp_fil>0)
{
int bufferS = (temp_fil < MAX_BUFFER_SIZE) ? temp_fil : MAX_BUFFER_SIZE;
char *buffer = new char[bufferS+1];
buffer[bufferS] = 0;
readfile.read(buffer, bufferS);
EVP_DigestUpdate(mdctx, buffer, bufferS);
temp_fil -= bufferS;
delete[] buffer;
}
and to send the checksum string, I modified the code as:
EVP_DigestFinal_ex(mdctx, md_value, &md_len);
EVP_MD_CTX_destroy(mdctx);
char str[128] = { 0 };
char *ptr = str;
std::string ret;
for(i = 0; i < md_len; i++)
{
//_snprintf(checksum_msg+cx,md_len-cx,"%02x",md_value[i]);
sprintf(ptr,"%02x", md_value[i]);
ptr += 2;
}
ret = str;
/* Call this once before exit. */
EVP_cleanup();
return ret;
As for the wrong checksum earlier, the problem was associated in how windows keeps the line feed. As suggested by Zangetsu, Windows was making text file as CRLF, but linux and the site I mentioned earlier was using LF. Thus there was difference in the checksum value. For files other than text, eg dll the code now computes correct checksum as string
There is a usage example at the zlib website: http://www.zlib.net/zlib_how.html
However in the example they are compressing a file. I would like to compress a binary data stored in a buffer in memory. I don't want to save the compressed buffer to disk either.
Basically here is my buffer:
fIplImageHeader->imageData = (char*)imageIn->getFrame();
How can I compress it with zlib?
I would appreciate some code example of how to do that.
zlib.h has all the functions you need: compress (or compress2) and uncompress. See the source code of zlib for an answer.
ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen));
/*
Compresses the source buffer into the destination buffer. sourceLen is
the byte length of the source buffer. Upon entry, destLen is the total size
of the destination buffer, which must be at least the value returned by
compressBound(sourceLen). Upon exit, destLen is the actual size of the
compressed buffer.
compress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output
buffer.
*/
ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen));
/*
Decompresses the source buffer into the destination buffer. sourceLen is
the byte length of the source buffer. Upon entry, destLen is the total size
of the destination buffer, which must be large enough to hold the entire
uncompressed data. (The size of the uncompressed data must have been saved
previously by the compressor and transmitted to the decompressor by some
mechanism outside the scope of this compression library.) Upon exit, destLen
is the actual size of the uncompressed buffer.
uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output
buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In
the case where there is not enough room, uncompress() will fill the output
buffer with the uncompressed data up to that point.
*/
This is an example to pack a buffer with zlib and save the compressed contents in a vector.
void compress_memory(void *in_data, size_t in_data_size, std::vector<uint8_t> &out_data)
{
std::vector<uint8_t> buffer;
const size_t BUFSIZE = 128 * 1024;
uint8_t temp_buffer[BUFSIZE];
z_stream strm;
strm.zalloc = 0;
strm.zfree = 0;
strm.next_in = reinterpret_cast<uint8_t *>(in_data);
strm.avail_in = in_data_size;
strm.next_out = temp_buffer;
strm.avail_out = BUFSIZE;
deflateInit(&strm, Z_BEST_COMPRESSION);
while (strm.avail_in != 0)
{
int res = deflate(&strm, Z_NO_FLUSH);
assert(res == Z_OK);
if (strm.avail_out == 0)
{
buffer.insert(buffer.end(), temp_buffer, temp_buffer + BUFSIZE);
strm.next_out = temp_buffer;
strm.avail_out = BUFSIZE;
}
}
int deflate_res = Z_OK;
while (deflate_res == Z_OK)
{
if (strm.avail_out == 0)
{
buffer.insert(buffer.end(), temp_buffer, temp_buffer + BUFSIZE);
strm.next_out = temp_buffer;
strm.avail_out = BUFSIZE;
}
deflate_res = deflate(&strm, Z_FINISH);
}
assert(deflate_res == Z_STREAM_END);
buffer.insert(buffer.end(), temp_buffer, temp_buffer + BUFSIZE - strm.avail_out);
deflateEnd(&strm);
out_data.swap(buffer);
}
You can easily adapt the example by replacing fread() and fwrite() calls with direct pointers to your data. For zlib compression (referred to as deflate as you "take out all the air of your data") you allocate z_stream structure, call deflateInit() and then:
fill next_in with the next chunk of data you want to compress
set avail_in to the number of bytes available in next_in
set next_out to where the compressed data should be written which should usually be a pointer inside your buffer that advances as you go along
set avail_out to the number of bytes available in next_out
call deflate
repeat steps 3-5 until avail_out is non-zero (i.e. there's more room in the output buffer than zlib needs - no more data to write)
repeat steps 1-6 while you have data to compress
Eventually you call deflateEnd() and you're done.
You're basically feeding it chunks of input and output until you're out of input and it is out of output.
The classic way more convenient with C++ features
Here's a full example which demonstrates compression and decompression using C++ std::vector objects:
#include <cstdio>
#include <iosfwd>
#include <iostream>
#include <vector>
#include <zconf.h>
#include <zlib.h>
#include <iomanip>
#include <cassert>
void add_buffer_to_vector(std::vector<char> &vector, const char *buffer, uLongf length) {
for (int character_index = 0; character_index < length; character_index++) {
char current_character = buffer[character_index];
vector.push_back(current_character);
}
}
int compress_vector(std::vector<char> source, std::vector<char> &destination) {
unsigned long source_length = source.size();
uLongf destination_length = compressBound(source_length);
char *destination_data = (char *) malloc(destination_length);
if (destination_data == nullptr) {
return Z_MEM_ERROR;
}
Bytef *source_data = (Bytef *) source.data();
int return_value = compress2((Bytef *) destination_data, &destination_length, source_data, source_length,
Z_BEST_COMPRESSION);
add_buffer_to_vector(destination, destination_data, destination_length);
free(destination_data);
return return_value;
}
int decompress_vector(std::vector<char> source, std::vector<char> &destination) {
unsigned long source_length = source.size();
uLongf destination_length = compressBound(source_length);
char *destination_data = (char *) malloc(destination_length);
if (destination_data == nullptr) {
return Z_MEM_ERROR;
}
Bytef *source_data = (Bytef *) source.data();
int return_value = uncompress((Bytef *) destination_data, &destination_length, source_data, source.size());
add_buffer_to_vector(destination, destination_data, destination_length);
free(destination_data);
return return_value;
}
void add_string_to_vector(std::vector<char> &uncompressed_data,
const char *my_string) {
int character_index = 0;
while (true) {
char current_character = my_string[character_index];
uncompressed_data.push_back(current_character);
if (current_character == '\00') {
break;
}
character_index++;
}
}
// https://stackoverflow.com/a/27173017/3764804
void print_bytes(std::ostream &stream, const unsigned char *data, size_t data_length, bool format = true) {
stream << std::setfill('0');
for (size_t data_index = 0; data_index < data_length; ++data_index) {
stream << std::hex << std::setw(2) << (int) data[data_index];
if (format) {
stream << (((data_index + 1) % 16 == 0) ? "\n" : " ");
}
}
stream << std::endl;
}
void test_compression() {
std::vector<char> uncompressed(0);
auto *my_string = (char *) "Hello, world!";
add_string_to_vector(uncompressed, my_string);
std::vector<char> compressed(0);
int compression_result = compress_vector(uncompressed, compressed);
assert(compression_result == F_OK);
std::vector<char> decompressed(0);
int decompression_result = decompress_vector(compressed, decompressed);
assert(decompression_result == F_OK);
printf("Uncompressed: %s\n", uncompressed.data());
printf("Compressed: ");
std::ostream &standard_output = std::cout;
print_bytes(standard_output, (const unsigned char *) compressed.data(), compressed.size(), false);
printf("Decompressed: %s\n", decompressed.data());
}
In your main.cpp simply call:
int main(int argc, char *argv[]) {
test_compression();
return EXIT_SUCCESS;
}
The output produced:
Uncompressed: Hello, world!
Compressed: 78daf348cdc9c9d75128cf2fca495164000024e8048a
Decompressed: Hello, world!
The Boost way
#include <iostream>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/zlib.hpp>
std::string compress(const std::string &data) {
boost::iostreams::filtering_streambuf<boost::iostreams::output> output_stream;
output_stream.push(boost::iostreams::zlib_compressor());
std::stringstream string_stream;
output_stream.push(string_stream);
boost::iostreams::copy(boost::iostreams::basic_array_source<char>(data.c_str(),
data.size()), output_stream);
return string_stream.str();
}
std::string decompress(const std::string &cipher_text) {
std::stringstream string_stream;
string_stream << cipher_text;
boost::iostreams::filtering_streambuf<boost::iostreams::input> input_stream;
input_stream.push(boost::iostreams::zlib_decompressor());
input_stream.push(string_stream);
std::stringstream unpacked_text;
boost::iostreams::copy(input_stream, unpacked_text);
return unpacked_text.str();
}
TEST_CASE("zlib") {
std::string plain_text = "Hello, world!";
const auto cipher_text = compress(plain_text);
const auto decompressed_plain_text = decompress(cipher_text);
REQUIRE(plain_text == decompressed_plain_text);
}
This is not a direct answer on your question about the zlib API, but you may be interested in boost::iostreams library paired with zlib.
This allows to use zlib-driven packing algorithms using the basic "stream" operations notation and then your data could be easily compressed by opening some memory stream and doing the << data operation on it.
In case of boost::iostreams this would automatically invoke the corresponding packing filter for every data that passes through the stream.
How can I generate SHA1 or SHA2 hashes using the OpenSSL libarary?
I searched google and could not find any function or example code.
From the command line, it's simply:
printf "compute sha1" | openssl sha1
You can invoke the library like this:
#include <stdio.h>
#include <string.h>
#include <openssl/sha.h>
int main()
{
unsigned char ibuf[] = "compute sha1";
unsigned char obuf[20];
SHA1(ibuf, strlen(ibuf), obuf);
int i;
for (i = 0; i < 20; i++) {
printf("%02x ", obuf[i]);
}
printf("\n");
return 0;
}
OpenSSL has a horrible documentation with no code examples, but here you are:
#include <openssl/sha.h>
bool simpleSHA256(void* input, unsigned long length, unsigned char* md)
{
SHA256_CTX context;
if(!SHA256_Init(&context))
return false;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return false;
if(!SHA256_Final(md, &context))
return false;
return true;
}
Usage:
unsigned char md[SHA256_DIGEST_LENGTH]; // 32 bytes
if(!simpleSHA256(<data buffer>, <data length>, md))
{
// handle error
}
Afterwards, md will contain the binary SHA-256 message digest. Similar code can be used for the other SHA family members, just replace "256" in the code.
If you have larger data, you of course should feed data chunks as they arrive (multiple SHA256_Update calls).
Adaptation of #AndiDog version for big file:
static const int K_READ_BUF_SIZE{ 1024 * 16 };
std::optional<std::string> CalcSha256(std::string filename)
{
// Initialize openssl
SHA256_CTX context;
if(!SHA256_Init(&context))
{
return std::nullopt;
}
// Read file and update calculated SHA
char buf[K_READ_BUF_SIZE];
std::ifstream file(filename, std::ifstream::binary);
while (file.good())
{
file.read(buf, sizeof(buf));
if(!SHA256_Update(&context, buf, file.gcount()))
{
return std::nullopt;
}
}
// Get Final SHA
unsigned char result[SHA256_DIGEST_LENGTH];
if(!SHA256_Final(result, &context))
{
return std::nullopt;
}
// Transform byte-array to string
std::stringstream shastr;
shastr << std::hex << std::setfill('0');
for (const auto &byte: result)
{
shastr << std::setw(2) << (int)byte;
}
return shastr.str();
}
correct syntax at command line should be
echo -n "compute sha1" | openssl sha1
otherwise you'll hash the trailing newline character as well.
Here is OpenSSL example of calculating sha-1 digest using BIO:
#include <openssl/bio.h>
#include <openssl/evp.h>
std::string sha1(const std::string &input)
{
BIO * p_bio_md = nullptr;
BIO * p_bio_mem = nullptr;
try
{
// make chain: p_bio_md <-> p_bio_mem
p_bio_md = BIO_new(BIO_f_md());
if (!p_bio_md) throw std::bad_alloc();
BIO_set_md(p_bio_md, EVP_sha1());
p_bio_mem = BIO_new_mem_buf((void*)input.c_str(), input.length());
if (!p_bio_mem) throw std::bad_alloc();
BIO_push(p_bio_md, p_bio_mem);
// read through p_bio_md
// read sequence: buf <<-- p_bio_md <<-- p_bio_mem
std::vector<char> buf(input.size());
for (;;)
{
auto nread = BIO_read(p_bio_md, buf.data(), buf.size());
if (nread < 0) { throw std::runtime_error("BIO_read failed"); }
if (nread == 0) { break; } // eof
}
// get result
char md_buf[EVP_MAX_MD_SIZE];
auto md_len = BIO_gets(p_bio_md, md_buf, sizeof(md_buf));
if (md_len <= 0) { throw std::runtime_error("BIO_gets failed"); }
std::string result(md_buf, md_len);
// clean
BIO_free_all(p_bio_md);
return result;
}
catch (...)
{
if (p_bio_md) { BIO_free_all(p_bio_md); }
throw;
}
}
Though it's longer than just calling SHA1 function from OpenSSL, but it's more universal and can be reworked for using with file streams (thus processing data of any length).
C version of #Nayfe code, generating SHA1 hash from file:
#include <stdio.h>
#include <openssl/sha.h>
static const int K_READ_BUF_SIZE = { 1024 * 16 };
unsigned char* calculateSHA1(char *filename)
{
if (!filename) {
return NULL;
}
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
return NULL;
}
unsigned char* sha1_digest = malloc(sizeof(char)*SHA_DIGEST_LENGTH);
SHA_CTX context;
if(!SHA1_Init(&context))
return NULL;
unsigned char buf[K_READ_BUF_SIZE];
while (!feof(fp))
{
size_t total_read = fread(buf, 1, sizeof(buf), fp);
if(!SHA1_Update(&context, buf, total_read))
{
return NULL;
}
}
fclose(fp);
if(!SHA1_Final(sha1_digest, &context))
return NULL;
return sha1_digest;
}
It can be used as follows:
unsigned char *sha1digest = calculateSHA1("/tmp/file1");
The res variable contains the sha1 hash.
You can print it on the screen using the following for-loop:
char *sha1hash = (char *)malloc(sizeof(char) * 41);
sha1hash[40] = '\0';
int i;
for (i = 0; i < SHA_DIGEST_LENGTH; i++)
{
sprintf(&sha1hash[i*2], "%02x", sha1digest[i]);
}
printf("SHA1 HASH: %s\n", sha1hash);