Fail to extract binary file from Tar archive using libarchive - c++

Update
The example below does work. I had misread the intention of the flag ARCHIVE_EXTRACT_TIME and was expecting to see the newly created file. Instead the file was being extracted correctly but with its original creation date. It was a long week! :-)
I have the following data stored in a Tar archive:
opt/
fw.bin
ad/
bin/
installer.sh
I have the following code, running on Ubuntu (g++ version 5.2.1), to extract the contents of the archive in memory using libarchive, largely taken from the examples:
int copy_data(struct archive *ar, struct archive *aw)
{
int r;
const void *buff;
size_t size;
#if ARCHIVE_VERSION_NUMBER >= 3000000
int64_t offset;
#else
off_t offset;
#endif
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
return (ARCHIVE_OK);
if (r != ARCHIVE_OK)
return (r);
r = archive_write_data(aw, buff, size);
if (size != r)
{
std::cerr << "Failed to write " << (size - r) << " Bytes\n";
}
}
}
}
int main(int argc, char** argv)
{
std::ifstream input("archive.tar.gz", std::ios::binary | std::ios::ate );
auto const size = input.tellg();
char* buffer = new char[size];
input.seekg(0);
input.read(buffer, size);
input.close();
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int retCode;
int flags = ARCHIVE_EXTRACT_TIME;
a = archive_read_new();
archive_read_support_format_all(a);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((retCode = archive_read_open_memory(a, buffer, size)))
for (;;) {
retCode = archive_read_next_header(a, &entry);
if (retCode == ARCHIVE_EOF)
{
break;
}
if (retCode != ARCHIVE_OK)
{
return -1;
}
retCode = archive_write_header(ext, entry);
if (retCode != ARCHIVE_OK)
{
return -1;
}
else
{
if (archive_entry_size(entry) > 0)
{
copy_data(a, ext);
}
retCode = archive_write_finish_entry(ext);
if (retCode != ARCHIVE_OK)
{
return -1;
}
}
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
return 0;
}
It unpacks the directories correctly as above and writes the installer.sh file. However, the binary file (fw.bin) is missing.
Are there specific settings I am missing in order to write binary files to disk? Am I using the wrong API calls?

Related

Extracting .tar.gz using libarchive on UNC path?

I'm trying to use libarchive to extract .tar.gz file to current folder. Test machine if win7 64bit running under virtual box. It works fine if I do it in C:\ (.vdi disk), however it fails on E:\ (shared folder from host). Based on this
Can't create '\\?\e:\folder\file'
error message I assume the issue is UNC path. Is it possible to handle it with libarchive?
Here is my code:
#include <iostream>
#include <cstdio>
#include <stdexcept>
#include <archive.h>
#include <archive_entry.h>
#include "utils.h"
int copy_data(struct archive * ar, struct archive * aw) {
int r;
size_t size;
const void *buff;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
} else if (r < ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(aw) << std::endl;
return (r);
}
}
}
void handle_errors(archive * a, int r, const char * msg) {
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(a) << std::endl;
}
if (r < ARCHIVE_WARN) {
throw std::runtime_error(msg);
}
};
void extract(std::string target_file_name) {
struct archive * a = nullptr;
struct archive * ext = nullptr;
struct archive_entry * entry = nullptr;
int flags, r;
/* Select which attributes we want to restore. */
flags = ARCHIVE_EXTRACT_TIME;
flags |= ARCHIVE_EXTRACT_UNLINK;
flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT;
a = archive_read_new();
if (!a) { throw std::runtime_error("Cannot archive_read_new"); }
On_Scope_Exit([&] { archive_read_free(a); });
archive_read_support_format_tar(a);
archive_read_support_filter_gzip(a);
ext = archive_write_disk_new();
if (!ext) { throw std::runtime_error("Cannot archive_write_disk_new"); }
On_Scope_Exit([&] { archive_write_close(ext); archive_write_free(ext); });
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((r = archive_read_open_filename(a, target_file_name.c_str(), 10240))) {
throw std::runtime_error("Cannot archive_read_open_filename");
}
On_Scope_Exit([&] { archive_read_close(a); });
for (;;) {
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF) {
break;
}
handle_errors(a, r, "Encountered error while reading header.");
r = archive_write_header(ext, entry);
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(ext) << std::endl;
}
else if (archive_entry_size(entry) > 0) {
r = copy_data(a, ext);
handle_errors(ext, r, "Encountered error while copy data.");
}
r = archive_write_finish_entry(ext);
handle_errors(ext, r, "Encountered error while finishing entry.");
}
}
int main(int, char**) {
try {
std::string tmp_file_name = "file.tar.gz";
extract(tmp_file_name);
return 0;
} catch (std::exception & e) {
std::cerr << e.what() << std::endl;
return 1;
}
}
I guess I could use https://stackoverflow.com/a/2324777/781743 and chdir to non-unc path first (if it exists), that should work? But is there a way to make libarchive support UNC paths directly?
This worked for me
[verbose initial value is 0 and do_extract is 1 and flag set is ARCHIVE_EXTRACT_TIME]
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int r;
a = archive_read_new();
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
/*
* Note: archive_write_disk_set_standard_lookup() is useful
* here, but it requires library routines that can add 500k or
* more to a static executable.
*/
archive_read_support_format_tar(a);
archive_read_support_filter_gzip(a);
/*
* On my system, enabling other archive formats adds 20k-30k
* each. Enabling gzip decompression adds about 20k.
* Enabling bzip2 is more expensive because the libbz2 library
* isn't very well factored.
*/
if (filename != NULL && strcmp(filename, "-") == 0)
filename = NULL;
if ((r = archive_read_open_filename(a, filename, 10240)))
fail("archive_read_open_filename()",
archive_error_string(a), r);
for (;;) {
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF)
break;
if (r != ARCHIVE_OK)
fail("archive_read_next_header()",
archive_error_string(a), 1);
if (verbose && do_extract)
msg("x ");
if (verbose || !do_extract)
msg(archive_entry_pathname(entry));
if (do_extract) {
r = archive_write_header(ext, entry);
if (r != ARCHIVE_OK)
warn("archive_write_header()",
archive_error_string(ext));
else {
copy_data(a, ext);
r = archive_write_finish_entry(ext);
if (r != ARCHIVE_OK)
fail("archive_write_finish_entry()",
archive_error_string(ext), 1);
}
}
if (verbose || !do_extract)
msg("\n");
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
exit(0);

Unzipping with libzip loses file metadata

I have a zip file created with another application (written in Java) that compresses files using deflate method into one zip file, I verified that information like "last modified" wasn't modified to the current date, and when unzipping with Ubuntu's default archive manager it stays intact.
However, using libzip to decompress loses that data. Is there any way to avoid that behavior, or another library that guarantees metadata persistence?
Decompression code:
void decompress_zip(const std::string& zip, const std::string& out_dir, std::function<void(const std::string&)> fileListener) {
std::string finput = zip;
std::string foutput = out_dir;
if(!boost::filesystem::create_directories(foutput) && !fileExists(foutput))
throw "Failed to create directory for unzipping";
foutput += "/tmp.zip";
if (rename(finput.c_str(), foutput.c_str()))
throw "Failed to move zip to new dir";
finput = foutput;
struct zip *za;
struct zip_file *zf;
struct zip_stat sb;
char buf[100];
int err;
int i, len;
int fd;
long long sum;
if ((za = zip_open(finput.c_str(), 0, &err)) == NULL) {
zip_error_to_str(buf, sizeof(buf), err, errno);
throw "can't open zip! (" + finput + ")";
}
for (i = 0; i < zip_get_num_entries(za, 0); i++) {
if (zip_stat_index(za, i, 0, &sb) == 0) {
len = strlen(sb.name);
if (sb.name[len - 1] == '/') {
safe_create_dir(sb.name);
} else {
zf = zip_fopen_index(za, i, 0);
if (!zf) {
throw "failed to open file in zip! Probably corrupted!!!";
}
std::string cFile = out_dir + "/" + std::string(sb.name);
fd = open(cFile.c_str(), O_RDWR | O_TRUNC | O_CREAT, 0644);
if (fd < 0) {
throw "failed to create output file!";
}
sum = 0;
while (sum != sb.size) {
len = zip_fread(zf, buf, 100);
if (len < 0) {
throw "failed to read file in zip!";
}
write(fd, buf, len);
sum += len;
}
close(fd);
zip_fclose(zf);
fileListener(cFile);
}
}
}
if (zip_close(za) == -1) {
throw "Failed to close zip archive! " + finput;
}
if ( std::remove(foutput.c_str()) )
throw "Failed to remove temporary zip file! " + foutput;
}
I think libzip only stores the data, not metadata. It's your responsibility to store metadata separately if you require it.
In other words, it's a feature of the archive manager application, not libzip itself.

Loading a .WAV file for OpenAL

I am trying to load .WAV files to be played with OpenAL. I am following an example I found on the internet, but it is acting strangely. Here is the code:
struct RIFF_Header {
char chunkID[4];
long chunkSize;//size not including chunkSize or chunkID
char format[4];
};
/*
* Struct to hold fmt subchunk data for WAVE files.
*/
struct WAVE_Format {
char subChunkID[4];
long subChunkSize;
short audioFormat;
short numChannels;
long sampleRate;
long byteRate;
short blockAlign;
short bitsPerSample;
};
/*
* Struct to hold the data of the wave file
*/
struct WAVE_Data {
char subChunkID[4]; //should contain the word data
long subChunk2Size; //Stores the size of the data block
};
bool loadWavFile(std::string filename, ALuint* buffer,
ALsizei* size, ALsizei* frequency,
ALenum* format) {
//Local Declarations
FILE* soundFile = NULL;
WAVE_Format wave_format;
RIFF_Header riff_header;
WAVE_Data wave_data;
unsigned char* data;
*size = wave_data.subChunk2Size;
*frequency = wave_format.sampleRate;
if (wave_format.numChannels == 1) {
if (wave_format.bitsPerSample == 8 )
*format = AL_FORMAT_MONO8;
else if (wave_format.bitsPerSample == 16)
*format = AL_FORMAT_MONO16;
} else if (wave_format.numChannels == 2) {
if (wave_format.bitsPerSample == 8 )
*format = AL_FORMAT_STEREO8;
else if (wave_format.bitsPerSample == 16)
*format = AL_FORMAT_STEREO16;
}
try {
soundFile = fopen(filename.c_str(), "rb");
if (!soundFile)
throw (filename);
// Read in the first chunk into the struct
fread(&riff_header, sizeof(RIFF_Header), 1, soundFile);
//check for RIFF and WAVE tag in memeory
if ((riff_header.chunkID[0] != 'R' ||
riff_header.chunkID[1] != 'I' ||
riff_header.chunkID[2] != 'F' ||
riff_header.chunkID[3] != 'F') ||
(riff_header.format[0] != 'W' ||
riff_header.format[1] != 'A' ||
riff_header.format[2] != 'V' ||
riff_header.format[3] != 'E'))
throw ("Invalid RIFF or WAVE Header");
//Read in the 2nd chunk for the wave info
fread(&wave_format, sizeof(WAVE_Format), 1, soundFile);
//check for fmt tag in memory
if (wave_format.subChunkID[0] != 'f' ||
wave_format.subChunkID[1] != 'm' ||
wave_format.subChunkID[2] != 't' ||
wave_format.subChunkID[3] != ' ')
throw ("Invalid Wave Format");
//check for extra parameters;
if (wave_format.subChunkSize > 16)
fseek(soundFile, sizeof(short), SEEK_CUR);
//Read in the the last byte of data before the sound file
fread(&wave_data, sizeof(WAVE_Data), 1, soundFile);
//check for data tag in memory
if (wave_data.subChunkID[0] != 'd' ||
wave_data.subChunkID[1] != 'a' ||
wave_data.subChunkID[2] != 't' ||
wave_data.subChunkID[3] != 'a')
throw ("Invalid data header");
//Allocate memory for data
data = new unsigned char[wave_data.subChunk2Size];
// Read in the sound data into the soundData variable
if (!fread(data, wave_data.subChunk2Size, 1, soundFile))
throw ("error loading WAVE data into struct!");
//Now we set the variables that we passed in with the
//data from the structs
*size = wave_data.subChunk2Size;
*frequency = wave_format.sampleRate;
//The format is worked out by looking at the number of
//channels and the bits per sample.
if (wave_format.numChannels == 1) {
if (wave_format.bitsPerSample == 8 )
*format = AL_FORMAT_MONO8;
else if (wave_format.bitsPerSample == 16)
*format = AL_FORMAT_MONO16;
} else if (wave_format.numChannels == 2) {
if (wave_format.bitsPerSample == 8 )
*format = AL_FORMAT_STEREO8;
else if (wave_format.bitsPerSample == 16)
*format = AL_FORMAT_STEREO16;
}
//create our openAL buffer and check for success
alGenBuffers(2, buffer);
if(alGetError() != AL_NO_ERROR) {
std::cerr << alGetError() << std::endl;
}
//now we put our data into the openAL buffer and
//check for success
alBufferData(*buffer, *format, (void*)data,
*size, *frequency);
if(alGetError() != AL_NO_ERROR) {
std::cerr << alGetError() << std::endl;
}
//clean up and return true if successful
fclose(soundFile);
delete data;
return true;
} catch(const char* error) {
//our catch statement for if we throw a string
std::cerr << error << " : trying to load "
<< filename << std::endl;
//clean up memory if wave loading fails
if (soundFile != NULL)
fclose(soundFile);
//return false to indicate the failure to load wave
delete data;
return false;
}
}
int main() {
ALuint buffer, source;
ALint state;
ALsizei size;
ALsizei frequency;
ALenum format;
ALCcontext *context;
ALCdevice *device;
device = alcOpenDevice(nullptr);
if (device == NULL)
{
cerr << "Error finding default Audio Output Device" << endl;
}
context = alcCreateContext(device,NULL);
alcMakeContextCurrent(context);
alGetError();
loadWavFile("test.wav", &buffer, &size, &frequency, &format);
alGenSources(1, &source);
alSourcei(source, AL_BUFFER, buffer);
// Play
alSourcePlay(source);
// Wait for the song to complete
do {
alGetSourcei(source, AL_SOURCE_STATE, &state);
} while (state == AL_PLAYING);
// Clean up sources and buffers
alDeleteSources(1, &source);
alDeleteBuffers(1, &buffer);
return 0;
}
I have several WAV files both around 50kb. They load and play just fine. However, when I try to load a whole song (yes, I verified the file was correctly formatted using VLC Media Player and MusicBee) it returns 'Invalid data header : trying to load test.wav', which is caused by this chunk right here:
if (wave_data.subChunkID[0] != 'd' ||
wave_data.subChunkID[1] != 'a' ||
wave_data.subChunkID[2] != 't' ||
wave_data.subChunkID[3] != 'a')
throw ("Invalid data header");
I suspect that it is something size-related that is throwing off that header, as it seems that things only under 1000kb work (haven't totally tested that, its hard to find perfectly sized sound files floating around on my computer and on the internet). That is only a guess though, I really haven't a clue what is going on. Help is appreciated!
I know this question is around for a long time but I found a tutorial and I tested it. It works. Try this:
//http://www.youtube.com/user/thecplusplusguy
//Playing 3D sound with OpenAL, and loading a wav file manually
#include <iostream>
#include <fstream>
#include <cstring>
#include <al.h>
#include <alc.h>
bool isBigEndian()
{
int a = 1;
return !((char*)&a)[0];
}
int convertToInt(char* buffer, int len)
{
int a = 0;
if (!isBigEndian())
for (int i = 0; i<len; i++)
((char*)&a)[i] = buffer[i];
else
for (int i = 0; i<len; i++)
((char*)&a)[3 - i] = buffer[i];
return a;
}
char* loadWAV(const char* fn, int& chan, int& samplerate, int& bps, int& size)
{
char buffer[4];
std::ifstream in(fn, std::ios::binary);
in.read(buffer, 4);
if (strncmp(buffer, "RIFF", 4) != 0)
{
std::cout << "this is not a valid WAVE file" << std::endl;
return NULL;
}
in.read(buffer, 4);
in.read(buffer, 4); //WAVE
in.read(buffer, 4); //fmt
in.read(buffer, 4); //16
in.read(buffer, 2); //1
in.read(buffer, 2);
chan = convertToInt(buffer, 2);
in.read(buffer, 4);
samplerate = convertToInt(buffer, 4);
in.read(buffer, 4);
in.read(buffer, 2);
in.read(buffer, 2);
bps = convertToInt(buffer, 2);
in.read(buffer, 4); //data
in.read(buffer, 4);
size = convertToInt(buffer, 4);
char* data = new char[size];
in.read(data, size);
return data;
}
int main(int argc, char** argv)
{
int channel, sampleRate, bps, size;
char* data = loadWAV("C:/Users/Gizego/Desktop/Youtube/Músicas/TheFatRat+-+Time+Lapse.wav", channel, sampleRate, bps, size);
ALCdevice* device = alcOpenDevice(NULL);
if (device == NULL)
{
std::cout << "cannot open sound card" << std::endl;
return 0;
}
ALCcontext* context = alcCreateContext(device, NULL);
if (context == NULL)
{
std::cout << "cannot open context" << std::endl;
return 0;
}
alcMakeContextCurrent(context);
unsigned int bufferid, format;
alGenBuffers(1, &bufferid);
if (channel == 1)
{
if (bps == 8)
{
format = AL_FORMAT_MONO8;
}
else {
format = AL_FORMAT_MONO16;
}
}
else {
if (bps == 8)
{
format = AL_FORMAT_STEREO8;
}
else {
format = AL_FORMAT_STEREO16;
}
}
alBufferData(bufferid, format, data, size, sampleRate);
unsigned int sourceid;
alGenSources(1, &sourceid);
alSourcei(sourceid, AL_BUFFER, bufferid);
alSourcePlay(sourceid);
while (true)
{
}
alDeleteSources(1, &sourceid);
alDeleteBuffers(1, &bufferid);
alcDestroyContext(context);
alcCloseDevice(device);
delete[] data;
return 0;
}
This question is a bit old, yet unanswered. I happen to have written a WAV file loader, and I've stumbled upon the exact same issue as you did.
As a matter of fact, the "data" part is not guaranteed to exist where you expect it to be. There are other blocks that may be specified, like in my case a "cue" one. This is kind of a really hidden information, and I've spent much time trying to find this: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#cue
In my case, I then simply check if there is a "cue" part and simply ignore its data. I do not (yet) check for other block types, since I do not have any testing material for it.
In C++ code, this does the job:
// std::ifstream file("...", std::ios_base::in | std::ios_base::binary);
// std::array<uint8_t, 4> bytes {};
file.read(reinterpret_cast<char*>(bytes.data()), 4); // Supposed to be "data"
// A "cue " field can be specified; if so, the given amount of bytes will be ignored
if (bytes[0] == 'c' && bytes[1] == 'u' && bytes[2] == 'e' && bytes[3] == ' ') {
file.read(reinterpret_cast<char*>(bytes.data()), 4); // Cue data size
const uint32_t cueDataSize = fromLittleEndian(bytes);
file.ignore(cueDataSize);
file.read(reinterpret_cast<char*>(bytes.data()), 4); // "data"
}
// A LIST segment may be specified; see the edit below
// "data" is now properly expected
if (bytes[0] != 'd' && bytes[1] != 'a' && bytes[2] != 't' && bytes[3] != 'a')
return false;
Edit: I now have a "LIST" tag too, which is to be expected. This can be ignored the same way as the cue:
if (bytes[0] == 'L' && bytes[1] == 'I' && bytes[2] == 'S' && bytes[3] == 'T') {
file.read(reinterpret_cast<char*>(bytes.data()), 4); // List data size
const uint32_t listDataSize = fromLittleEndian(bytes);
file.ignore(listDataSize);
file.read(reinterpret_cast<char*>(bytes.data()), 4);
}
Yeah, this is one of the simplest solutions but only for RIFF files. Instead:
while (true){};
better to insert:
// Wait for the song to complete
ALint state;
do {
alGetSourcei(sourceid, AL_SOURCE_STATE, &state);
} while (state == AL_PLAYING);

How can I find the size of all files located inside a folder?

Is there any API in c++ for getting the size of a specified folder?
If not, how can I get the total size of a folder including all subfolders and files?
How about letting OS do it for you:
long long int getFolderSize(string path)
{
// command to be executed
std::string cmd("du -sb ");
cmd.append(path);
cmd.append(" | cut -f1 2>&1");
// execute above command and get the output
FILE *stream = popen(cmd.c_str(), "r");
if (stream) {
const int max_size = 256;
char readbuf[max_size];
if (fgets(readbuf, max_size, stream) != NULL) {
return atoll(readbuf);
}
pclose(stream);
}
// return error val
return -1;
}
Actually I don't want to use any third party library. Just want to
implement in pure c++.
If you use MSVC++ you have <filesystem> "as standard C++".
But using boost or MSVC - both are "pure C++".
If you don’t want to use boost, and only the C++ std:: library this answer is somewhat close. As you can see here, there is a Filesystem Library Proposal (Revision 4). Here you can read:
The Boost version of the library has been in widespread use for ten
years. The Dinkumware version of the library, based on N1975
(equivalent to version 2 of the Boost library), ships with Microsoft
Visual C++ 2012.
To illustrate the use, I adapted the answer of #Nayana Adassuriya , with very minor modifications (OK, he forgot to initialize one variable, and I use unsigned long long, and most important was to use: path filePath(complete (dirIte->path(), folderPath)); to restore the complete path before the call to other functions). I have tested and it work well in windows 7.
#include <iostream>
#include <string>
#include <filesystem>
using namespace std;
using namespace std::tr2::sys;
void getFoldersize(string rootFolder,unsigned long long & f_size)
{
path folderPath(rootFolder);
if (exists(folderPath))
{
directory_iterator end_itr;
for (directory_iterator dirIte(rootFolder); dirIte != end_itr; ++dirIte )
{
path filePath(complete (dirIte->path(), folderPath));
try{
if (!is_directory(dirIte->status()) )
{
f_size = f_size + file_size(filePath);
}else
{
getFoldersize(filePath,f_size);
}
}catch(exception& e){ cout << e.what() << endl; }
}
}
}
int main()
{
unsigned long long f_size=0;
getFoldersize("C:\\Silvio",f_size);
cout << f_size << endl;
system("pause");
return 0;
}
You may use boost in this way. You can try to optimize it some deeper.
#include <iostream>
#include <string>
#include <boost/filesystem.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/algorithm/string.hpp>
using namespace std;
namespace bsfs = boost::filesystem;
void getFoldersize(string rootFolder,long & file_size){
boost::replace_all(rootFolder, "\\\\", "\\");
bsfs::path folderPath(rootFolder);
if (bsfs::exists(folderPath)){
bsfs::directory_iterator end_itr;
for (bsfs::directory_iterator dirIte(rootFolder); dirIte != end_itr; ++dirIte )
{
bsfs::path filePath(dirIte->path());
try{
if (!bsfs::is_directory(dirIte->status()) )
{
file_size = file_size + bsfs::file_size(filePath);
}else{
getFoldersize(filePath.string(),file_size);
}
}catch(exception& e){
cout << e.what() << endl;
}
}
}
}
int main(){
long file_size =0;
getFoldersize("C:\\logs",file_size);
cout << file_size << endl;
system("pause");
return 0;
}
Something like this would be better to avoid adding symbolic(soft) links:
std::uintmax_t directorySize(const std::filesystem::path& directory)
{
std::uintmax_t size{ 0 };
for (const auto& entry : std::filesystem::recursive_directory_iterator(directory))
{
if (entry.is_regular_file() && !entry.is_symlink())
{
size += entry.file_size();
}
}
return size;
}
Size of files in a folder
Please have a look at this link
#include <iostream>
#include <windows.h>
#include <string>
using namespace std;
__int64 TransverseDirectory(string path)
{
WIN32_FIND_DATA data;
__int64 size = 0;
string fname = path + "\\*.*";
HANDLE h = FindFirstFile(fname.c_str(),&data);
if(h != INVALID_HANDLE_VALUE)
{
do {
if( (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) )
{
// make sure we skip "." and "..". Have to use strcmp here because
// some file names can start with a dot, so just testing for the
// first dot is not suffient.
if( strcmp(data.cFileName,".") != 0 &&strcmp(data.cFileName,"..") != 0)
{
// We found a sub-directory, so get the files in it too
fname = path + "\\" + data.cFileName;
// recurrsion here!
size += TransverseDirectory(fname);
}
}
else
{
LARGE_INTEGER sz;
// All we want here is the file size. Since file sizes can be larger
// than 2 gig, the size is reported as two DWORD objects. Below we
// combine them to make one 64-bit integer.
sz.LowPart = data.nFileSizeLow;
sz.HighPart = data.nFileSizeHigh;
size += sz.QuadPart;
}
}while( FindNextFile(h,&data) != 0);
FindClose(h);
}
return size;
}
int main(int argc, char* argv[])
{
__int64 size = 0;
string path;
size = TransverseDirectory("c:\\dvlp");
cout << "\n\nDirectory Size = " << size << "\n";
cin.ignore();
return 0;
}
For more detail PLease CLick Here
The file system functions are integral part of each operative system, written mostly in C and assembler, not C++, each C++ library implementation for this are in one way or another a wrapper of this functions. Taking on count the effort and if you will not use your implementation in different OS, maybe is a good idea to use this functions directly and save some overhead and time.
Best regards.
I have my types definition file with:
typedef std::wstring String;
typedef std::vector<String> StringVector;
typedef unsigned long long uint64_t;
and code is:
uint64_t CalculateDirSize(const String &path, StringVector *errVect = NULL, uint64_t size = 0)
{
WIN32_FIND_DATA data;
HANDLE sh = NULL;
sh = FindFirstFile((path + L"\\*").c_str(), &data);
if (sh == INVALID_HANDLE_VALUE )
{
//if we want, store all happened error
if (errVect != NULL)
errVect ->push_back(path);
return size;
}
do
{
// skip current and parent
if (!IsBrowsePath(data.cFileName))
{
// if found object is ...
if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY)
// directory, then search it recursievly
size = CalculateDirSize(path + L"\\" + data.cFileName, NULL, size);
else
// otherwise get object size and add it to directory size
size += (uint64_t) (data.nFileSizeHigh * (MAXDWORD ) + data.nFileSizeLow);
}
} while (FindNextFile(sh, &data)); // do
FindClose(sh);
return size;
}
bool IsBrowsePath(const String& path)
{
return (path == _T(".") || path == _T(".."));
}
This uses UNICODE and returns failed dirs if you want that.
To call use:
StringVector vect;
CalculateDirSize(L"C:\\boost_1_52_0", &vect);
CalculateDirSize(L"C:\\boost_1_52_0");
But never pass size
//use FAT32
#undef UNICODE // to flag window deactive unicode
#include<Windows.h> //to use windows api
#include<iostream>
#include<iomanip>
#include<cstring>
#include<string>
#include<algorithm>
using namespace std;
#pragma pack(1) //tell compiler do'nt do prag
struct BPB
{
BYTE JMP[3];
BYTE OEM[8];
WORD NumberOfBytesPerSector;
BYTE NumberOfSectorsPerCluster;
WORD NumberOfReservedSectors;
BYTE NumberOfFATs;
WORD NumberOfRootEntries16;
WORD LowNumbferOfSectors;
BYTE MediaDescriptor;
WORD NumberOfSectorsPerFAT16;
WORD NumberOfSectorsPerTrack;
WORD NumberOfHeads;
DWORD NumberOfHiddenSectors;
DWORD HighNumberOfSectors;
DWORD NumberOfSectorsPerFAT32;
WORD Flags;
WORD FATVersionNumber;
DWORD RootDirectoryClusterNumber;
WORD FSInfoSector;
WORD BackupSector;
BYTE Reserver[12];
BYTE BiosDrive;
BYTE WindowsNTFlag;
BYTE Signature;
DWORD VolumeSerial;
BYTE VolumeLabel[11];
BYTE SystemID[8];
BYTE CODE[420];
WORD BPBSignature;
};
//-----------------------------------------------------------
struct DirectoryEntry
{
BYTE Name[11];
BYTE Attributes;
BYTE Reserved;
BYTE CreationTimeTenth;
WORD CreationTime;
WORD CreationDate;
WORD LastAccessTime;
WORD HiClusterNumber;
WORD WriteTime;
WORD WriteDate;
WORD LowClusterNumber;
DWORD FileSize; //acual size of file
};
//---------------------------------------------------
void dirFunction(string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
for (int i = 0; i < NumberOfEntries; i++)
{
if (root[i].Name[0] == 0)//there no entery after this
break;
if (root[i].Name[0] == 0xE5)
continue;
if ((root[i].Attributes & 0xF) == 0xF)
continue;
for (int j = 0; j < 8; j++)
cout << root[i].Name[j];
if((root[i].Attributes & 0x10) != 0x10){
cout<<".";
for (int j = 8; j < 11; j++)
cout << root[i].Name[j];
}
if ((root[i].Attributes & 0x10) == 0x10){
cout << "\t<Folder>" ;
}else{
cout<<"\t<File>" ;
}
clusterNumber = root[i].HiClusterNumber << 16;
clusterNumber |= root[i].LowClusterNumber;
cout <<"\t"<<root[i].FileSize<<"bytes" << "\t" << clusterNumber<<"cluster" << endl;
}
CloseHandle(hFile);
}
//---------------------------------------------------------------
string convertLowerToUpper(string f){
string temp = "";
for (int i = 0; i < f.size(); i++){
temp += toupper(f[i]);
}
return temp;
}
//---------------------------------------------------------------
string getFileName(BYTE filename[11]){
string name = "";
for (int i = 0; i < 8; i++){
if (filename[i] != ' ')
name += filename[i];
}
return (name);
}
//------------------------------------------------------------------
int findEntryNumber(DirectoryEntry* root, int NumberOfEntries, string required){
string n;
int j = 0;
for (int i = 0; i < NumberOfEntries; i++){
if (strcmp((getFileName(root[i].Name).c_str()), convertLowerToUpper(required).c_str()) == 0){
return i;
}
}
return -1;
}
//---------------------------------------------------------------
void typeFunction(string fileName, string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
int index = findEntryNumber(root, NumberOfEntries, fileName);
if (index == -1){
cout << "File is not found" << endl;
return;
}
if (((root[index].Attributes & 0x10) == 0x10) ){
cout << "Is not file name" << endl;
return;
}
clusterNumber = root[index].HiClusterNumber << 16;
clusterNumber |= root[index].LowClusterNumber;
ULONG temp = (clusterNumber - 2) * clusterSize;
distance += temp;
t = 0;
SetFilePointer(hFile, distance, &t, FILE_BEGIN);
BYTE* buffer = new BYTE[clusterSize];
readBytes = 0;
ReadFile(hFile, (BYTE*)buffer, clusterSize, &readBytes, 0);
for (int i = 0; i < root[index].FileSize; i++){
cout << buffer[i];
}
cout << endl;
CloseHandle(hFile);
}
//----------------------------------------------------------------------
void delFunction(string filename, string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
readBytes = 0;
t = 0;
int index = findEntryNumber(root, NumberOfEntries, filename);
if (index == -1){
cout << "FIle is not found" << endl;
return;
}
if ((root[index].Attributes & 0x10) == 0x10){
cout << "Is not file name" << endl;
return;
}
//delete file
root[index].Name[0] = 0xE5;
SetFilePointer(hFile, distance, &t, FILE_BEGIN);
WriteFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
cout<<filename<<" is deleted\n";
CloseHandle(hFile);
}
//----------------------------------------------------------------------
string removeExtention(string s){
string t = "";
for (int i = 0; i < s.size(); i++){
if (s[i] == '.')break;
t += s[i];
}
return t;
}
//-------------------------------------------------------------------
void main()
{
string swich_value;
string directory;
string file_name;
//dirFunction("G");
cout<<"plz, Enter single Partition character ------> example E or G\n\n";
cin>>directory;
string path = "\\\\.\\" + directory + ":";
cout<<"current directory is "<<path<<endl;
cout<<"Enter Options: \n1- dir \n2- type file_name.extention \n3- del file_name.extention\n\n";
again:
cin>>swich_value;
if(swich_value.at(1)!='i')
cin>>file_name;
string answer;
switch(swich_value.at(1)){
case 'i':
dirFunction(directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
case 'y':
typeFunction(removeExtention(file_name), directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
case 'e':
delFunction(removeExtention(file_name), directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
}
}
You can use "boost::filesystem"
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
unsigned long long int get_directory_size(const fs::path& directory){
if (!fs::exists(directory)) return 0;
if (fs::is_directory(directory)){
unsigned long long int ret_size = 0;
fs::directory_iterator m_dir_itr(directory);
for (m_dir_itr = fs::begin(m_dir_itr); m_dir_itr != fs::end(m_dir_itr); ++m_dir_itr){
fs::directory_entry m_dir_entry = *m_dir_itr;
if (fs::is_regular_file(m_dir_entry.path())){
ret_size += fs::file_size(m_dir_entry.path());
}else if (fs::is_directory(m_dir_entry.path())){
ret_size += get_directory_size(m_dir_entry.path());
}
}
return ret_size;
} else if (fs::is_regular_file(directory)){
return fs::file_size(directory);
}
return 0;
}
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
// Assuming 'C:/Folder' be any directory then its size can be found using
auto folder_size = get_directory_size("C:/Folder");
printf("Size of 'C:/Folder' is %d\n",folder_size);
return 0;
}
With the introduction of std::filesystem, you no more have to use any system APIs or any external libraries.
#include <filesystem>
namespace n_fs = ::std::filesystem;
double archive::getFolderSize(std::string path)
{
double r = 0.0;
try{
if (!n_fs::is_directory(path))
{
r += (double)n_fs::file_size(path);
}
else
{
for(auto entry: n_fs::directory_iterator(path))
getFolderSize(entry.path().string());
}
}
catch(exception& e)
{
std::cout << e.what() << std::endl();
}
return r;
}
int main(){
double folderSize = getFolderSize("~/dev/"); //Replace with your path
std::cout << "Size of Folder: " << folderSize;
}
Try using GetFileSizeEx function. Following is some sample code for this. You need to get the size from the LARGE_INTEGER union though.
#include <iostream>
#include <windows.h>
#include <stdio.h>
#include <io.h>
using namespace std;
int main()
{
FILE *fp;
fp = fopen("C:\test.txt","r");
int fileNo = _fileno(fp);
HANDLE cLibHandle = (HANDLE)_get_osfhandle(fileNo);
long int fileSize = 0;
LARGE_INTEGER fileSizeL;
GetFileSizeEx(cLibHandle, &fileSizeL);
return 0;
}
5 years and not a simple solution with standard C++, that's why I would like to contribute my solution to this question:
uint64_t GetDirSize(const std::string &path)
{
uint64_t size = 0;
for (const auto & entry : std::experimental::filesystem::directory_iterator(path))
{
if(entry.status().type() == std::experimental::filesystem::file_type::regular)
size += std::experimental::filesystem::file_size(entry.path());
if (entry.status().type() == std::experimental::filesystem::file_type::directory)
size += GetDirSize(entry.path().generic_string());
}
return size;
}
Use it for example by calling
GetDirSize("C:\\dir_name")
if you're using Windows.
Calculating a folder size in bytes on Windows.
size_t GetFolderSizeInBytes(std::wstring path)
{
size_t result = 0;
WIN32_FIND_DATA findData;
HANDLE hFileHandle;
std::wstring sourcePath(path);
if (GetFileAttributes(sourcePath.c_str()) & FILE_ATTRIBUTE_DIRECTORY)
sourcePath.push_back(L'\\');
std::wstring fileName(sourcePath);
fileName.append(L"*");
hFileHandle = FindFirstFileEx(
fileName.data(),
FindExInfoStandard,
&findData,
FindExSearchNameMatch,
NULL,
FIND_FIRST_EX_ON_DISK_ENTRIES_ONLY);
if (hFileHandle != INVALID_HANDLE_VALUE)
{
do
{
if (!wcscmp(findData.cFileName, L".") || !wcscmp(findData.cFileName, L".."))
continue;
if ((findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
{
// Folder
std::wstring newPath = path + L"\\" + findData.cFileName;
result += GetFolderSizeInBytes(newPath);
}
else
{
// File
unsigned long high = findData.nFileSizeHigh;
unsigned long low = findData.nFileSizeLow;
size_t size = size_t(high * (MAXWORD + 1)) + low;
result += size;
}
} while (FindNextFile(hFileHandle, &findData));
FindClose(hFileHandle);
}
return result;
}

Generate SHA hash in C++ using OpenSSL library

How can I generate SHA1 or SHA2 hashes using the OpenSSL libarary?
I searched google and could not find any function or example code.
From the command line, it's simply:
printf "compute sha1" | openssl sha1
You can invoke the library like this:
#include <stdio.h>
#include <string.h>
#include <openssl/sha.h>
int main()
{
unsigned char ibuf[] = "compute sha1";
unsigned char obuf[20];
SHA1(ibuf, strlen(ibuf), obuf);
int i;
for (i = 0; i < 20; i++) {
printf("%02x ", obuf[i]);
}
printf("\n");
return 0;
}
OpenSSL has a horrible documentation with no code examples, but here you are:
#include <openssl/sha.h>
bool simpleSHA256(void* input, unsigned long length, unsigned char* md)
{
SHA256_CTX context;
if(!SHA256_Init(&context))
return false;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return false;
if(!SHA256_Final(md, &context))
return false;
return true;
}
Usage:
unsigned char md[SHA256_DIGEST_LENGTH]; // 32 bytes
if(!simpleSHA256(<data buffer>, <data length>, md))
{
// handle error
}
Afterwards, md will contain the binary SHA-256 message digest. Similar code can be used for the other SHA family members, just replace "256" in the code.
If you have larger data, you of course should feed data chunks as they arrive (multiple SHA256_Update calls).
Adaptation of #AndiDog version for big file:
static const int K_READ_BUF_SIZE{ 1024 * 16 };
std::optional<std::string> CalcSha256(std::string filename)
{
// Initialize openssl
SHA256_CTX context;
if(!SHA256_Init(&context))
{
return std::nullopt;
}
// Read file and update calculated SHA
char buf[K_READ_BUF_SIZE];
std::ifstream file(filename, std::ifstream::binary);
while (file.good())
{
file.read(buf, sizeof(buf));
if(!SHA256_Update(&context, buf, file.gcount()))
{
return std::nullopt;
}
}
// Get Final SHA
unsigned char result[SHA256_DIGEST_LENGTH];
if(!SHA256_Final(result, &context))
{
return std::nullopt;
}
// Transform byte-array to string
std::stringstream shastr;
shastr << std::hex << std::setfill('0');
for (const auto &byte: result)
{
shastr << std::setw(2) << (int)byte;
}
return shastr.str();
}
correct syntax at command line should be
echo -n "compute sha1" | openssl sha1
otherwise you'll hash the trailing newline character as well.
Here is OpenSSL example of calculating sha-1 digest using BIO:
#include <openssl/bio.h>
#include <openssl/evp.h>
std::string sha1(const std::string &input)
{
BIO * p_bio_md = nullptr;
BIO * p_bio_mem = nullptr;
try
{
// make chain: p_bio_md <-> p_bio_mem
p_bio_md = BIO_new(BIO_f_md());
if (!p_bio_md) throw std::bad_alloc();
BIO_set_md(p_bio_md, EVP_sha1());
p_bio_mem = BIO_new_mem_buf((void*)input.c_str(), input.length());
if (!p_bio_mem) throw std::bad_alloc();
BIO_push(p_bio_md, p_bio_mem);
// read through p_bio_md
// read sequence: buf <<-- p_bio_md <<-- p_bio_mem
std::vector<char> buf(input.size());
for (;;)
{
auto nread = BIO_read(p_bio_md, buf.data(), buf.size());
if (nread < 0) { throw std::runtime_error("BIO_read failed"); }
if (nread == 0) { break; } // eof
}
// get result
char md_buf[EVP_MAX_MD_SIZE];
auto md_len = BIO_gets(p_bio_md, md_buf, sizeof(md_buf));
if (md_len <= 0) { throw std::runtime_error("BIO_gets failed"); }
std::string result(md_buf, md_len);
// clean
BIO_free_all(p_bio_md);
return result;
}
catch (...)
{
if (p_bio_md) { BIO_free_all(p_bio_md); }
throw;
}
}
Though it's longer than just calling SHA1 function from OpenSSL, but it's more universal and can be reworked for using with file streams (thus processing data of any length).
C version of #Nayfe code, generating SHA1 hash from file:
#include <stdio.h>
#include <openssl/sha.h>
static const int K_READ_BUF_SIZE = { 1024 * 16 };
unsigned char* calculateSHA1(char *filename)
{
if (!filename) {
return NULL;
}
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
return NULL;
}
unsigned char* sha1_digest = malloc(sizeof(char)*SHA_DIGEST_LENGTH);
SHA_CTX context;
if(!SHA1_Init(&context))
return NULL;
unsigned char buf[K_READ_BUF_SIZE];
while (!feof(fp))
{
size_t total_read = fread(buf, 1, sizeof(buf), fp);
if(!SHA1_Update(&context, buf, total_read))
{
return NULL;
}
}
fclose(fp);
if(!SHA1_Final(sha1_digest, &context))
return NULL;
return sha1_digest;
}
It can be used as follows:
unsigned char *sha1digest = calculateSHA1("/tmp/file1");
The res variable contains the sha1 hash.
You can print it on the screen using the following for-loop:
char *sha1hash = (char *)malloc(sizeof(char) * 41);
sha1hash[40] = '\0';
int i;
for (i = 0; i < SHA_DIGEST_LENGTH; i++)
{
sprintf(&sha1hash[i*2], "%02x", sha1digest[i]);
}
printf("SHA1 HASH: %s\n", sha1hash);