I have a zip file created with another application (written in Java) that compresses files using deflate method into one zip file, I verified that information like "last modified" wasn't modified to the current date, and when unzipping with Ubuntu's default archive manager it stays intact.
However, using libzip to decompress loses that data. Is there any way to avoid that behavior, or another library that guarantees metadata persistence?
Decompression code:
void decompress_zip(const std::string& zip, const std::string& out_dir, std::function<void(const std::string&)> fileListener) {
std::string finput = zip;
std::string foutput = out_dir;
if(!boost::filesystem::create_directories(foutput) && !fileExists(foutput))
throw "Failed to create directory for unzipping";
foutput += "/tmp.zip";
if (rename(finput.c_str(), foutput.c_str()))
throw "Failed to move zip to new dir";
finput = foutput;
struct zip *za;
struct zip_file *zf;
struct zip_stat sb;
char buf[100];
int err;
int i, len;
int fd;
long long sum;
if ((za = zip_open(finput.c_str(), 0, &err)) == NULL) {
zip_error_to_str(buf, sizeof(buf), err, errno);
throw "can't open zip! (" + finput + ")";
}
for (i = 0; i < zip_get_num_entries(za, 0); i++) {
if (zip_stat_index(za, i, 0, &sb) == 0) {
len = strlen(sb.name);
if (sb.name[len - 1] == '/') {
safe_create_dir(sb.name);
} else {
zf = zip_fopen_index(za, i, 0);
if (!zf) {
throw "failed to open file in zip! Probably corrupted!!!";
}
std::string cFile = out_dir + "/" + std::string(sb.name);
fd = open(cFile.c_str(), O_RDWR | O_TRUNC | O_CREAT, 0644);
if (fd < 0) {
throw "failed to create output file!";
}
sum = 0;
while (sum != sb.size) {
len = zip_fread(zf, buf, 100);
if (len < 0) {
throw "failed to read file in zip!";
}
write(fd, buf, len);
sum += len;
}
close(fd);
zip_fclose(zf);
fileListener(cFile);
}
}
}
if (zip_close(za) == -1) {
throw "Failed to close zip archive! " + finput;
}
if ( std::remove(foutput.c_str()) )
throw "Failed to remove temporary zip file! " + foutput;
}
I think libzip only stores the data, not metadata. It's your responsibility to store metadata separately if you require it.
In other words, it's a feature of the archive manager application, not libzip itself.
Related
When trying to unzip an image file using libzip, I have run across the issue where in the image data, I hit a null byte and libzip zip_fread sees this as EOF and stops reading the file, resulting in a corrupted image. What is the best way to get handle the null byte when reading an image and extract the full image?
To clarify, text only files extract perfectly fine.
Below is the code used:
int FileHandler::ExtractFiles(std::string& path, std::string& file, bool is_test)
{
int err = 0;
std::string fullPath = path + "\\" + file;
zip* za = zip_open(fullPath.c_str(), 0, &err);
struct zip_stat st;
zip_stat_init(&st);
int number_of_entries = zip_get_num_entries(za, NULL);
for (zip_uint64_t i = 0; i < number_of_entries; ++i)
{
const char* name = zip_get_name(za, i, NULL);
std::string s_name = name;
size_t pos;
std::string backsl = "\\";
while ((pos = s_name.find(47)) != std::string::npos)
{
s_name.replace(pos, 1, backsl);
}
std::string fullFilePath = path + "\\" + s_name;
if(!is_test)
printf("Extracting: %s...\n", s_name.c_str());
std::string fullDir;
size_t found;
found = fullFilePath.find_last_of("\\");
if (found != std::string::npos)
{
fullDir = fullFilePath.substr(0, found);
}
struct zip_stat ist;
zip_stat_init(&ist);
zip_stat(za, name, 0, &ist);
char* contents = new char[ist.size];
zip_file* f = zip_fopen(za, name, 0);
// zip_fread to contents buffer
zip_fread(f, contents, ist.size);
if (CreateDirectory(fullDir.c_str(), NULL) || ERROR_ALREADY_EXISTS == GetLastError())
{
// writing buffer to file
if (!std::ofstream(fullFilePath).write(contents, ist.size))
{
return EXIT_FAILURE;
}
}
zip_fclose(f);
}
zip_close(za);
return EXIT_SUCCESS;
}
gerum was able to point me in the right direction. For anyone that is wondering or has the same issue, I had to open the ofstream in binary mode and that resolved the issue.
Original code:
// writing buffer to file
if (!std::ofstream(fullFilePath).write(contents, ist.size))
{
return EXIT_FAILURE;
}
Solution:
// writing buffer to file
if (!std::ofstream(fullFilePath, std::ios::binary).write(contents, ist.size))
{
return EXIT_FAILURE;
}
Update
The example below does work. I had misread the intention of the flag ARCHIVE_EXTRACT_TIME and was expecting to see the newly created file. Instead the file was being extracted correctly but with its original creation date. It was a long week! :-)
I have the following data stored in a Tar archive:
opt/
fw.bin
ad/
bin/
installer.sh
I have the following code, running on Ubuntu (g++ version 5.2.1), to extract the contents of the archive in memory using libarchive, largely taken from the examples:
int copy_data(struct archive *ar, struct archive *aw)
{
int r;
const void *buff;
size_t size;
#if ARCHIVE_VERSION_NUMBER >= 3000000
int64_t offset;
#else
off_t offset;
#endif
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
return (ARCHIVE_OK);
if (r != ARCHIVE_OK)
return (r);
r = archive_write_data(aw, buff, size);
if (size != r)
{
std::cerr << "Failed to write " << (size - r) << " Bytes\n";
}
}
}
}
int main(int argc, char** argv)
{
std::ifstream input("archive.tar.gz", std::ios::binary | std::ios::ate );
auto const size = input.tellg();
char* buffer = new char[size];
input.seekg(0);
input.read(buffer, size);
input.close();
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int retCode;
int flags = ARCHIVE_EXTRACT_TIME;
a = archive_read_new();
archive_read_support_format_all(a);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((retCode = archive_read_open_memory(a, buffer, size)))
for (;;) {
retCode = archive_read_next_header(a, &entry);
if (retCode == ARCHIVE_EOF)
{
break;
}
if (retCode != ARCHIVE_OK)
{
return -1;
}
retCode = archive_write_header(ext, entry);
if (retCode != ARCHIVE_OK)
{
return -1;
}
else
{
if (archive_entry_size(entry) > 0)
{
copy_data(a, ext);
}
retCode = archive_write_finish_entry(ext);
if (retCode != ARCHIVE_OK)
{
return -1;
}
}
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
return 0;
}
It unpacks the directories correctly as above and writes the installer.sh file. However, the binary file (fw.bin) is missing.
Are there specific settings I am missing in order to write binary files to disk? Am I using the wrong API calls?
I am a beginner programmer trying to inflate text stream from pdfs. I have adopted and slightly altered some open source code which uses zlib, and generally it works very well. However, I have been testing on some different pdfs lately and some of the inflated streams are returning blank. Could anybody advise me as to why?
I have come across this question below which seems to address the same problem but does not really give a definitive answer
zLib inflate has empty result in some cases
#include <iostream>
#include <fstream>
#include <string>
#include "zlib.h"
int main()
{
//Discard existing output:
//Open the PDF source file:
std::ifstream filei("C:\\Users\\dpbowe\\Desktop\\PIDSearch\\P&ID.PDF", std::ios::in|std::ios::binary|std::ios::ate);
if (!filei) std::cout << "Error Opening Input File" << std::endl;
//decoded output
std::ofstream fileo;
fileo.open("C:\\Users\\dpbowe\\Desktop\\Decoded.txt", std::ios::binary | std::ofstream::out);
if (!fileother) std::cout << "Error opening output file" << std::endl;
if (filei && fileo)
{
//Get the file length:
long filelen = filei.tellg(); //fseek==0 if ok
filei.seekg(0, std::ios::beg);
//Read the entire file into memory (!):
char* buffer = new char [filelen];
if (buffer == NULL) {fputs("Memory error", stderr); exit(EXIT_FAILURE);}
filei.read(buffer,filelen);
if (buffer == '\0') {fputs("Reading error", stderr); exit(EXIT_FAILURE);}
bool morestreams = true;
//Now search the buffer repeated for streams of data
while (morestreams)
{
//Search for stream, endstream. Should check the filter of the object to make sure it if FlateDecode, but skip that for now!
size_t streamstart = FindStringInBuffer (buffer, "stream", filelen); //This is my own search function
size_t streamend = FindStringInBuffer (buffer, "endstream", filelen); //This is my own search function
if (streamstart>0 && streamend>streamstart)
{
//Skip to beginning and end of the data stream:
streamstart += 6;
if (buffer[streamstart]==0x0d && buffer[streamstart+1]==0x0a) streamstart+=2;
else if (buffer[streamstart]==0x0a) streamstart++;
if (buffer[streamend-2]==0x0d && buffer[streamend-1]==0x0a) streamend-=2;
else if (buffer[streamend-1]==0x0a) streamend--;
//Assume output will fit into 10 times input buffer:
size_t outsize = (streamend - streamstart)*10;
char* output = new char [outsize]; ZeroMemory(output, outsize);
//Now use zlib to inflate:
z_stream zstrm; ZeroMemory(&zstrm, sizeof(zstrm));
zstrm.avail_in = streamend - streamstart + 1;
zstrm.avail_out = outsize;
zstrm.next_in = (Bytef*)(buffer + streamstart);
zstrm.next_out = (Bytef*)output;
int rsti = inflateInit(&zstrm);
if (rsti == Z_OK)
{
int rst2 = inflate (&zstrm, Z_FINISH);
if (rst2 >= 0)
{
size_t totout = zstrm.total_out;
//Write inflated output to file "Decoded.txt"
fileother<<output;
fileother<<"\r\nStream End\r\n\r\n";
}
else std::cout<<"output uncompressed stream is blank"<<std::endl;
}
delete[] output; output=0;
buffer+= streamend + 7;
filelen = filelen - (streamend+7);
}
else
{
morestreams = false;
std::cout<<"End of File"<<std::endl;
}
}
filei.close();
}
else
{
std::cout << "File Could Not Be Accessed\n";
}
if (fileo) fileo.close();
}
I'm currently using libzip in a C++11 program to extract the contents of a compressed file and store them into a data structure that will also hold metadata related to the file.
I'm using the current method to explode the zip file and get the content of each file in it:
void explodeArchive(const string& path, vector<ZipFileModel>& files) {
int error = 0;
zip *zip = zip_open(path.c_str(), 0, &error);
if (zip == nullptr) {
throw logic_error("Could not extract content of file " + path);
}
const zip_int64_t n_entries = zip_get_num_entries(zip, ZIP_FL_UNCHANGED);
for (zip_int64_t i = 0; i < n_entries; i++) {
const char *file_name = zip_get_name(zip, i, ZIP_FL_ENC_GUESS);
struct zip_stat st;
zip_stat_init(&st);
zip_stat(zip, file_name, ZIP_FL_NOCASE, &st);
char *content = new char[st.size];
std::cerr << file_name << std::endl;
zip_file *file = zip_fopen(zip, file_name, ZIP_FL_NOCASE);
const zip_int64_t did_read = zip_fread(file, content, st.size);
if (did_read <= 0) {
continue;
}
if (strlen(content) < st.size) {
LOG(WARNING)<< "File " << file_name << " is truncated.";
}
if (strlen(content) > st.size) {
content[st.size] = '\0';
}
ZipFileModel model;
model.name = string(file_name);
model.content = string(content);
model.order = -1;
files.push_back(model);
zip_fclose(file);
delete[] content;
}
zip_close(zip);
}
My problem is that I get random segmentation faults with gdb pointing to zip_fclose(file);:
Program received signal SIGSEGV, Segmentation fault.
0x00000001001ef8a0 in zip_source_close (src=0x105001b00) at /Users/xxx/Projects/xxx/xxx/src/libzip/zip_source_close.c:48
48 (void)src->cb.l(src->src, src->ud, NULL, 0, ZIP_SOURCE_CLOSE);
What's the best way to debug this? As I said it happens intermittently so it's hard to pin down the exact cause.
You aren't closing the zip_file when there's nothing to read.
First you open the file inside:
zip_file *file = zip_fopen(zip, file_name, ZIP_FL_NOCASE);
Then try to read something:
const zip_int64_t did_read = zip_fread(file, content, st.size);
and if there's nothing to read you continue and the file is never closed.
if (did_read <= 0) {
continue;
}
So, just add:
if (did_read <= 0) {
zip_fclose(file);
continue;
}
Is there someone that can explain me how i can get files from directories inside a zipfile.
I use c++ and miniz(code.google.com/p/miniz/). thank you in advance.
here my code i have that i use right now:
size_t uncomp_size;
mz_bool status;
mz_zip_archive zip_archive;
memset(&zip_archive, 0, sizeof(zip_archive));
status = mz_zip_reader_init_file(&zip_archive, "data.zip", 0);
if (!status){
puts("failed to open zip file\n");
return 0;
}
try{
void* p = NULL;
std::string file_to_extract = "data//test.txt";
int file_index = mz_zip_reader_locate_file(&zip_archive, file_to_extract.c_str(), NULL, MZ_ZIP_FLAG_IGNORE_PATH);
if (file_index < 0)
{
mz_bool is_dir = mz_zip_reader_is_file_a_directory(&zip_archive,file_index);
if(is_dir){
throw std::exception("file_index = folder");
}else{
throw std::exception("cannot find file in zip(0)");
}
}
p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, &uncomp_size, NULL);
if(!p){
throw std::exception("cannot find file in zip(1)");
}
std::fstream fp1("test.txt",ios::binary|ios::out);
fp1.write(reinterpret_cast<char*>(p),uncomp_size);
fp1.close();
delete p;
}catch(std::exception ex){
cout << ex.what() << endl;
}
mz_zip_reader_end(&zip_archive);
This code is working for me using miniz from here.
string str_zip; // The zip archive in string form.
string str_unzip; // The uncompressed contents of the first file in the zip archive.
// Read in or assign zip contents to the string.
// In my case I receive the zip file via a web service.
// The processing all takes place in memory.
// But you can easily read a file's contents into the zipfile string, as well.
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint;
mz_zip_archive zip_archive;
mz_bool status;
// Now try to open the archive.
memset(&zip_archive, 0, sizeof(zip_archive));
// You can provide the zip data in memory as I did...
status = mz_zip_reader_init_mem(&zip_archive, str_zip.c_str(), str_zip.size(), 0);
// Or you can just give a filename...
// status = mz_zip_reader_init_file(&zip_archive, "myfile.zip", 0);
if (!status)
{
cout << "zip file appears invalid..." << endl;
return;
}
// Get the first file in the archive.
if (mz_zip_reader_get_num_files(&zip_archive) != 1)
{
cout << "zip file does not contain our 1 file..." << endl;
return;
}
mz_zip_archive_file_stat file_stat;
if (!mz_zip_reader_file_stat(&zip_archive, 0, &file_stat))
{
cout << "zip file read error..." << endl;
mz_zip_reader_end(&zip_archive);
return;
}
// Unzip the file to heap.
size_t uncompressed_size = file_stat.m_uncomp_size;
void* p = mz_zip_reader_extract_file_to_heap(&zip_archive, file_stat.m_filename, &uncompressed_size, 0);
if (!p)
{
cout << "mz_zip_reader_extract_file_to_heap() failed..." << endl;
mz_zip_reader_end(&zip_archive);
return;
}
str_unzip.assign((const char*)p,uncompressed_size);
// Close the archive, freeing any resources it was using
mz_free(p);
mz_zip_reader_end(&zip_archive);
I came up with a solution for this.
Lets we have;
In your C directory:
+-- ZipFile /
+-- folder1 /
+-- file1
+-- folder2 /
+--file2.1
+--file2.2
+--file2.3
bool decompress_folders_inside_zip()
{
const std::string archive_name = "ZipFile.zip";
const std::string decompress_path = "C:\\";
mz_zip_archive archive {};
boost::filesystem::path dec_path { decompress_path + archive_name };
if (!mz_zip_reader_init_file(&archive, dec_path.string().c_str(), 0))
{
return false;
}
const int file_cnt = (int)mz_zip_reader_get_num_files(&archive);
if (0 == file_cnt)
{
return false;
}
mz_zip_archive_file_stat file_stat;
if (!mz_zip_reader_file_stat(&archive, 0, &file_stat))
{
mz_zip_reader_end(&archive);
return false;
}
for (int i = 0; i < file_cnt; ++i)
{
mz_zip_reader_file_stat(&archive, i, &file_stat);
if (mz_zip_reader_is_file_a_directory(&archive, i))
{
boost::filesystem::path dir(decompress_path + file_stat.m_filename);
boost::filesystem::create_directories(dir.parent_path());
continue;
}
boost::filesystem::path file_out(decompress_path + file_stat.m_filename);
boost::filesystem::path out_file(file_out.parent_path().generic_string() + "/" + file_out.filename().string());
if (!mz_zip_reader_extract_to_file(&archive, i, out_file.string().c_str(), 0))
{
mz_zip_reader_end(&archive);
return false;
}
}
if (!mz_zip_reader_end(&archive))
{
return false;
}
std::cout << "Completed" << std::endl;
return true;
}