Extracting .tar.gz using libarchive on UNC path? - c++

I'm trying to use libarchive to extract .tar.gz file to current folder. Test machine if win7 64bit running under virtual box. It works fine if I do it in C:\ (.vdi disk), however it fails on E:\ (shared folder from host). Based on this
Can't create '\\?\e:\folder\file'
error message I assume the issue is UNC path. Is it possible to handle it with libarchive?
Here is my code:
#include <iostream>
#include <cstdio>
#include <stdexcept>
#include <archive.h>
#include <archive_entry.h>
#include "utils.h"
int copy_data(struct archive * ar, struct archive * aw) {
int r;
size_t size;
const void *buff;
int64_t offset;
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF) {
return (ARCHIVE_OK);
} else if (r < ARCHIVE_OK) {
return (r);
}
r = archive_write_data_block(aw, buff, size, offset);
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(aw) << std::endl;
return (r);
}
}
}
void handle_errors(archive * a, int r, const char * msg) {
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(a) << std::endl;
}
if (r < ARCHIVE_WARN) {
throw std::runtime_error(msg);
}
};
void extract(std::string target_file_name) {
struct archive * a = nullptr;
struct archive * ext = nullptr;
struct archive_entry * entry = nullptr;
int flags, r;
/* Select which attributes we want to restore. */
flags = ARCHIVE_EXTRACT_TIME;
flags |= ARCHIVE_EXTRACT_UNLINK;
flags |= ARCHIVE_EXTRACT_SECURE_NODOTDOT;
a = archive_read_new();
if (!a) { throw std::runtime_error("Cannot archive_read_new"); }
On_Scope_Exit([&] { archive_read_free(a); });
archive_read_support_format_tar(a);
archive_read_support_filter_gzip(a);
ext = archive_write_disk_new();
if (!ext) { throw std::runtime_error("Cannot archive_write_disk_new"); }
On_Scope_Exit([&] { archive_write_close(ext); archive_write_free(ext); });
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((r = archive_read_open_filename(a, target_file_name.c_str(), 10240))) {
throw std::runtime_error("Cannot archive_read_open_filename");
}
On_Scope_Exit([&] { archive_read_close(a); });
for (;;) {
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF) {
break;
}
handle_errors(a, r, "Encountered error while reading header.");
r = archive_write_header(ext, entry);
if (r < ARCHIVE_OK) {
std::cerr << archive_error_string(ext) << std::endl;
}
else if (archive_entry_size(entry) > 0) {
r = copy_data(a, ext);
handle_errors(ext, r, "Encountered error while copy data.");
}
r = archive_write_finish_entry(ext);
handle_errors(ext, r, "Encountered error while finishing entry.");
}
}
int main(int, char**) {
try {
std::string tmp_file_name = "file.tar.gz";
extract(tmp_file_name);
return 0;
} catch (std::exception & e) {
std::cerr << e.what() << std::endl;
return 1;
}
}
I guess I could use https://stackoverflow.com/a/2324777/781743 and chdir to non-unc path first (if it exists), that should work? But is there a way to make libarchive support UNC paths directly?

This worked for me
[verbose initial value is 0 and do_extract is 1 and flag set is ARCHIVE_EXTRACT_TIME]
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int r;
a = archive_read_new();
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
/*
* Note: archive_write_disk_set_standard_lookup() is useful
* here, but it requires library routines that can add 500k or
* more to a static executable.
*/
archive_read_support_format_tar(a);
archive_read_support_filter_gzip(a);
/*
* On my system, enabling other archive formats adds 20k-30k
* each. Enabling gzip decompression adds about 20k.
* Enabling bzip2 is more expensive because the libbz2 library
* isn't very well factored.
*/
if (filename != NULL && strcmp(filename, "-") == 0)
filename = NULL;
if ((r = archive_read_open_filename(a, filename, 10240)))
fail("archive_read_open_filename()",
archive_error_string(a), r);
for (;;) {
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF)
break;
if (r != ARCHIVE_OK)
fail("archive_read_next_header()",
archive_error_string(a), 1);
if (verbose && do_extract)
msg("x ");
if (verbose || !do_extract)
msg(archive_entry_pathname(entry));
if (do_extract) {
r = archive_write_header(ext, entry);
if (r != ARCHIVE_OK)
warn("archive_write_header()",
archive_error_string(ext));
else {
copy_data(a, ext);
r = archive_write_finish_entry(ext);
if (r != ARCHIVE_OK)
fail("archive_write_finish_entry()",
archive_error_string(ext), 1);
}
}
if (verbose || !do_extract)
msg("\n");
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
exit(0);

Related

Enumerate removable drives on Linux and macOS

On Windows I am able to enumerate removable drives using FindFirstVolume, FindNextVolume, GetVolumePathNamesForVolumeName and the GetDriveType functions. How can I achieve this on Linux and macOS?
On macOS you need to work with IOKit: https://developer.apple.com/documentation/iokit
It can looks like the following code:
#include <cstdlib>
#include <iostream>
#include <mach/mach_port.h>
#include <IOKit/IOKitLib.h>
#include <IOKit/IOTypes.h>
#include <IOKit/IOKitKeys.h>
#include <IOKit/IOCFPlugIn.h>
#include <IOKit/usb/IOUSBLib.h>
std::string get_descriptor_idx(IOUSBDeviceInterface **dev, UInt8 idx);
int main(int argc, char *argv[]) {
/*
* Get the IO registry
*/
auto entry = IORegistryGetRootEntry(kIOMasterPortDefault);
if (entry == 0)
return EXIT_FAILURE;
io_iterator_t iter{};
auto kret = IORegistryEntryCreateIterator(entry, kIOUSBPlane, kIORegistryIterateRecursively, &iter);
if (kret != KERN_SUCCESS || iter == 0)
return EXIT_FAILURE;
io_service_t service {};
std::cout << std::endl;
while ((service = IOIteratorNext(iter))) {
IOCFPlugInInterface **plug = nullptr;
IOUSBDeviceInterface **dev = nullptr;
io_string_t path;
SInt32 score = 0;
IOReturn ioret;
kret = IOCreatePlugInInterfaceForService(service, kIOUSBDeviceUserClientTypeID, kIOCFPlugInInterfaceID, &plug, &score);
IOObjectRelease(service);
if (kret != KERN_SUCCESS || plug == nullptr) {
continue;
}
/*
* USB
*/
ioret = (*plug)->QueryInterface(plug, CFUUIDGetUUIDBytes(kIOUSBDeviceInterfaceID),
static_cast<LPVOID *>((void *) &dev));
(*plug)->Release(plug);
if (ioret != kIOReturnSuccess || dev == nullptr) {
continue;
}
if (IORegistryEntryGetPath(service, kIOServicePlane, path) != KERN_SUCCESS) {
(*dev)->Release(dev);
continue;
}
std::cout << "USB Path: " << path << std::endl;
UInt8 si;
UInt16 u16v;
if ((*dev)->GetDeviceVendor(dev, &u16v) == kIOReturnSuccess)
std::cout << "VID: "<< u16v << std::endl;
if ((*dev)->GetDeviceProduct(dev, &u16v) == kIOReturnSuccess)
std::cout << "PID: "<< u16v << std::endl;
if ((*dev)->USBGetManufacturerStringIndex(dev, &si) == kIOReturnSuccess) {
std::cout << "Manufacturer: " << get_descriptor_idx(dev, si) << std::endl;
}
if ((*dev)->USBGetProductStringIndex(dev, &si) == kIOReturnSuccess) {
std::cout << "Product: " << get_descriptor_idx(dev, si) << std::endl;
}
(*dev)->Release(dev);
std::cout << std::endl;
}
IOObjectRelease(iter);
return 0;
}
/* a quick version */
std::string get_descriptor_idx(IOUSBDeviceInterface **dev, UInt8 idx)
{
IOUSBDevRequest request;
IOReturn ioret;
char buffer[4086] = { 0 };
CFStringRef cfstr;
CFIndex len;
request.bmRequestType = USBmakebmRequestType(kUSBIn, kUSBStandard, kUSBDevice);
request.bRequest = kUSBRqGetDescriptor;
request.wValue = (kUSBStringDesc << 8) | idx;
request.wIndex = 0x409;
request.wLength = sizeof(buffer);
request.pData = buffer;
ioret = (*dev)->DeviceRequest(dev, &request);
if (ioret != kIOReturnSuccess)
return "n/a";
if (request.wLenDone <= 2)
return "n/a";
cfstr = CFStringCreateWithBytes(nullptr, (const UInt8 *)buffer+2, request.wLenDone-2, kCFStringEncodingUTF16LE, 0);
len = CFStringGetMaximumSizeForEncoding(CFStringGetLength(cfstr), kCFStringEncodingUTF8) + 1;
if (len < 0) {
CFRelease(cfstr);
return "n/a";
}
std::string str; str.resize(len);
CFStringGetCString(cfstr, str.data(), len, kCFStringEncodingUTF8);
CFRelease(cfstr);
return str;
}
In my case the result ıs:
USB Path: IOService:/AppleARMPE/arm-io/xxxx/USB2.1 Hub#01100000
VID: 1507
PID: 1552
Manufacturer: GenesysLogic
Product: USB2.1 USB

strange behaviour saving file

I use lzmaspec.cpp from 7zip library lzma1900/cpp/7zip/Bundles/Lzmaspec to decompress lzma file.
it works fine and save file correctly.
when I take main2 function and and make header file to use it in my project,it does not save the whole file.
this is lzmaspec.h
/*
* LzmaSpec.h
*
* Created on: Oct 14, 2020
* Author: za2ponubuntu1844
*/
#ifndef LZMASPEC_H_
#define LZMASPEC_H_
int main2(const char *,const char *);
#endif /* LZMASPEC_H_ */
and this is my modified main2 function
/* LzmaSpec.cpp -- LZMA Reference Decoder
2015-06-14 : Igor Pavlov : Public domain */
// This code implements LZMA file decoding according to LZMA specification.
// This code is not optimized for speed.
#include <stdio.h>
#ifdef _MSC_VER
#pragma warning(disable : 4710) // function not inlined
#pragma warning(disable : 4996) // This function or variable may be unsafe
#endif
typedef unsigned char Byte;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
typedef unsigned long UInt32;
#else
typedef unsigned int UInt32;
#endif
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef unsigned __int64 UInt64;
#else
typedef unsigned long long int UInt64;
#endif
struct CInputStream
{
FILE *File;
UInt64 Processed;
void Init() { Processed = 0; }
Byte ReadByte()
{
int c = getc(File);
if (c < 0)
throw "Unexpected end of file";
Processed++;
return (Byte)c;
}
};
struct COutStream
{
FILE *File;
UInt64 Processed;
void Init() { Processed = 0; }
void WriteByte(Byte b)
{
if (putc(b, File) == EOF)
throw "File writing error";
Processed++;
}
};
class COutWindow
{
Byte *Buf;
UInt32 Pos;
UInt32 Size;
bool IsFull;
public:
unsigned TotalPos;
COutStream OutStream;
COutWindow(): Buf(NULL) {}
~COutWindow() { delete []Buf; }
void Create(UInt32 dictSize)
{
Buf = new Byte[dictSize];
Pos = 0;
Size = dictSize;
IsFull = false;
TotalPos = 0;
}
void PutByte(Byte b)
{
TotalPos++;
Buf[Pos++] = b;
if (Pos == Size)
{
Pos = 0;
IsFull = true;
}
OutStream.WriteByte(b);
}
Byte GetByte(UInt32 dist) const
{
return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
}
void CopyMatch(UInt32 dist, unsigned len)
{
for (; len > 0; len--)
PutByte(GetByte(dist));
}
bool CheckDistance(UInt32 dist) const
{
return dist <= Pos || IsFull;
}
bool IsEmpty() const
{
return Pos == 0 && !IsFull;
}
};
#define kNumBitModelTotalBits 11
#define kNumMoveBits 5
typedef UInt16 CProb;
#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
#define INIT_PROBS(p) \
{ for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
class CRangeDecoder
{
UInt32 Range;
UInt32 Code;
void Normalize();
public:
CInputStream *InStream;
bool Corrupted;
bool Init();
bool IsFinishedOK() const { return Code == 0; }
UInt32 DecodeDirectBits(unsigned numBits);
unsigned DecodeBit(CProb *prob);
};
bool CRangeDecoder::Init()
{
Corrupted = false;
Range = 0xFFFFFFFF;
Code = 0;
Byte b = InStream->ReadByte();
for (int i = 0; i < 4; i++)
Code = (Code << 8) | InStream->ReadByte();
if (b != 0 || Code == Range)
Corrupted = true;
return b == 0;
}
#define kTopValue ((UInt32)1 << 24)
void CRangeDecoder::Normalize()
{
if (Range < kTopValue)
{
Range <<= 8;
Code = (Code << 8) | InStream->ReadByte();
}
}
UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
{
UInt32 res = 0;
do
{
Range >>= 1;
Code -= Range;
UInt32 t = 0 - ((UInt32)Code >> 31);
Code += Range & t;
if (Code == Range)
Corrupted = true;
Normalize();
res <<= 1;
res += t + 1;
}
while (--numBits);
return res;
}
unsigned CRangeDecoder::DecodeBit(CProb *prob)
{
unsigned v = *prob;
UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
unsigned symbol;
if (Code < bound)
{
v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
Range = bound;
symbol = 0;
}
else
{
v -= v >> kNumMoveBits;
Code -= bound;
Range -= bound;
symbol = 1;
}
*prob = (CProb)v;
Normalize();
return symbol;
}
unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
{
unsigned m = 1;
unsigned symbol = 0;
for (unsigned i = 0; i < numBits; i++)
{
unsigned bit = rc->DecodeBit(&probs[m]);
m <<= 1;
m += bit;
symbol |= (bit << i);
}
return symbol;
}
template <unsigned NumBits>
class CBitTreeDecoder
{
CProb Probs[(unsigned)1 << NumBits];
public:
void Init()
{
INIT_PROBS(Probs);
}
unsigned Decode(CRangeDecoder *rc)
{
unsigned m = 1;
for (unsigned i = 0; i < NumBits; i++)
m = (m << 1) + rc->DecodeBit(&Probs[m]);
return m - ((unsigned)1 << NumBits);
}
unsigned ReverseDecode(CRangeDecoder *rc)
{
return BitTreeReverseDecode(Probs, NumBits, rc);
}
};
#define kNumPosBitsMax 4
#define kNumStates 12
#define kNumLenToPosStates 4
#define kNumAlignBits 4
#define kStartPosModelIndex 4
#define kEndPosModelIndex 14
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
#define kMatchMinLen 2
class CLenDecoder
{
CProb Choice;
CProb Choice2;
CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
CBitTreeDecoder<8> HighCoder;
public:
void Init()
{
Choice = PROB_INIT_VAL;
Choice2 = PROB_INIT_VAL;
HighCoder.Init();
for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
{
LowCoder[i].Init();
MidCoder[i].Init();
}
}
unsigned Decode(CRangeDecoder *rc, unsigned posState)
{
if (rc->DecodeBit(&Choice) == 0)
return LowCoder[posState].Decode(rc);
if (rc->DecodeBit(&Choice2) == 0)
return 8 + MidCoder[posState].Decode(rc);
return 16 + HighCoder.Decode(rc);
}
};
unsigned UpdateState_Literal(unsigned state)
{
if (state < 4) return 0;
else if (state < 10) return state - 3;
else return state - 6;
}
unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; }
unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; }
unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
#define LZMA_DIC_MIN (1 << 12)
class CLzmaDecoder
{
public:
CRangeDecoder RangeDec;
COutWindow OutWindow;
bool markerIsMandatory;
unsigned lc, pb, lp;
UInt32 dictSize;
UInt32 dictSizeInProperties;
void DecodeProperties(const Byte *properties)
{
unsigned d = properties[0];
if (d >= (9 * 5 * 5))
throw "Incorrect LZMA properties";
lc = d % 9;
d /= 9;
pb = d / 5;
lp = d % 5;
dictSizeInProperties = 0;
for (int i = 0; i < 4; i++)
dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
dictSize = dictSizeInProperties;
if (dictSize < LZMA_DIC_MIN)
dictSize = LZMA_DIC_MIN;
}
CLzmaDecoder(): LitProbs(NULL) {}
~CLzmaDecoder() { delete []LitProbs; }
void Create()
{
OutWindow.Create(dictSize);
CreateLiterals();
}
int Decode(bool unpackSizeDefined, UInt64 unpackSize);
private:
CProb *LitProbs;
void CreateLiterals()
{
LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
}
void InitLiterals()
{
UInt32 num = (UInt32)0x300 << (lc + lp);
for (UInt32 i = 0; i < num; i++)
LitProbs[i] = PROB_INIT_VAL;
}
void DecodeLiteral(unsigned state, UInt32 rep0)
{
unsigned prevByte = 0;
if (!OutWindow.IsEmpty())
prevByte = OutWindow.GetByte(1);
unsigned symbol = 1;
unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
CProb *probs = &LitProbs[(UInt32)0x300 * litState];
if (state >= 7)
{
unsigned matchByte = OutWindow.GetByte(rep0 + 1);
do
{
unsigned matchBit = (matchByte >> 7) & 1;
matchByte <<= 1;
unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
symbol = (symbol << 1) | bit;
if (matchBit != bit)
break;
}
while (symbol < 0x100);
}
while (symbol < 0x100)
symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
OutWindow.PutByte((Byte)(symbol - 0x100));
}
CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
CBitTreeDecoder<kNumAlignBits> AlignDecoder;
CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
void InitDist()
{
for (unsigned i = 0; i < kNumLenToPosStates; i++)
PosSlotDecoder[i].Init();
AlignDecoder.Init();
INIT_PROBS(PosDecoders);
}
unsigned DecodeDistance(unsigned len)
{
unsigned lenState = len;
if (lenState > kNumLenToPosStates - 1)
lenState = kNumLenToPosStates - 1;
unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
if (posSlot < 4)
return posSlot;
unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
if (posSlot < kEndPosModelIndex)
dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
else
{
dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
dist += AlignDecoder.ReverseDecode(&RangeDec);
}
return dist;
}
CProb IsMatch[kNumStates << kNumPosBitsMax];
CProb IsRep[kNumStates];
CProb IsRepG0[kNumStates];
CProb IsRepG1[kNumStates];
CProb IsRepG2[kNumStates];
CProb IsRep0Long[kNumStates << kNumPosBitsMax];
CLenDecoder LenDecoder;
CLenDecoder RepLenDecoder;
void Init()
{
InitLiterals();
InitDist();
INIT_PROBS(IsMatch);
INIT_PROBS(IsRep);
INIT_PROBS(IsRepG0);
INIT_PROBS(IsRepG1);
INIT_PROBS(IsRepG2);
INIT_PROBS(IsRep0Long);
LenDecoder.Init();
RepLenDecoder.Init();
}
};
#define LZMA_RES_ERROR 0
#define LZMA_RES_FINISHED_WITH_MARKER 1
#define LZMA_RES_FINISHED_WITHOUT_MARKER 2
int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
{
if (!RangeDec.Init())
return LZMA_RES_ERROR;
Init();
UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
unsigned state = 0;
for (;;)
{
if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
if (RangeDec.IsFinishedOK())
return LZMA_RES_FINISHED_WITHOUT_MARKER;
unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
{
if (unpackSizeDefined && unpackSize == 0)
return LZMA_RES_ERROR;
DecodeLiteral(state, rep0);
state = UpdateState_Literal(state);
unpackSize--;
continue;
}
unsigned len;
if (RangeDec.DecodeBit(&IsRep[state]) != 0)
{
if (unpackSizeDefined && unpackSize == 0)
return LZMA_RES_ERROR;
if (OutWindow.IsEmpty())
return LZMA_RES_ERROR;
if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
{
if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
{
state = UpdateState_ShortRep(state);
OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
unpackSize--;
continue;
}
}
else
{
UInt32 dist;
if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
dist = rep1;
else
{
if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
dist = rep2;
else
{
dist = rep3;
rep3 = rep2;
}
rep2 = rep1;
}
rep1 = rep0;
rep0 = dist;
}
len = RepLenDecoder.Decode(&RangeDec, posState);
state = UpdateState_Rep(state);
}
else
{
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
len = LenDecoder.Decode(&RangeDec, posState);
state = UpdateState_Match(state);
rep0 = DecodeDistance(len);
if (rep0 == 0xFFFFFFFF)
return RangeDec.IsFinishedOK() ?
LZMA_RES_FINISHED_WITH_MARKER :
LZMA_RES_ERROR;
if (unpackSizeDefined && unpackSize == 0)
return LZMA_RES_ERROR;
if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
return LZMA_RES_ERROR;
}
len += kMatchMinLen;
bool isError = false;
if (unpackSizeDefined && unpackSize < len)
{
len = (unsigned)unpackSize;
isError = true;
}
OutWindow.CopyMatch(rep0 + 1, len);
unpackSize -= len;
if (isError)
return LZMA_RES_ERROR;
}
}
static void Print(const char *s)
{
fputs(s, stdout);
}
static void PrintError(const char *s)
{
fputs(s, stderr);
}
#define CONVERT_INT_TO_STR(charType, tempSize) \
void ConvertUInt64ToString(UInt64 val, char *s)
{
char temp[32];
unsigned i = 0;
while (val >= 10)
{
temp[i++] = (char)('0' + (unsigned)(val % 10));
val /= 10;
}
*s++ = (char)('0' + (unsigned)val);
while (i != 0)
{
i--;
*s++ = temp[i];
}
*s = 0;
}
void PrintUInt64(const char *title, UInt64 v)
{
Print(title);
Print(" : ");
char s[32];
ConvertUInt64ToString(v, s);
Print(s);
Print(" bytes \n");
}
int main2(const char * infile, const char *outfile)
{
try
{
Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n");
//if (numArgs == 1)
// Print("\nUse: lzmaSpec a.lzma outFile");
//if (numArgs != 3)
// throw "you must specify two parameters";
CInputStream inStream;
inStream.File = fopen(infile, "rb");
inStream.Init();
if (inStream.File == 0)
throw "Can't open input file";
CLzmaDecoder lzmaDecoder;
lzmaDecoder.OutWindow.OutStream.File = fopen(outfile, "wb+");
lzmaDecoder.OutWindow.OutStream.Init();
if (inStream.File == 0)
throw "Can't open output file";
Byte header[13];
int i;
for (i = 0; i < 13; i++)
header[i] = inStream.ReadByte();
lzmaDecoder.DecodeProperties(header);
printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize);
UInt64 unpackSize = 0;
bool unpackSizeDefined = false;
for (i = 0; i < 8; i++)
{
Byte b = header[5 + i];
if (b != 0xFF)
unpackSizeDefined = true;
unpackSize |= (UInt64)b << (8 * i);
}
lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
Print("\n");
if (unpackSizeDefined)
PrintUInt64("Uncompressed Size", unpackSize);
else
Print("End marker is expected\n");
lzmaDecoder.RangeDec.InStream = &inStream;
Print("\n");
lzmaDecoder.Create();
int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
PrintUInt64("Read ", inStream.Processed);
PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
if (res == LZMA_RES_ERROR)
throw "LZMA decoding error";
else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
Print("Finished without end marker");
else if (res == LZMA_RES_FINISHED_WITH_MARKER)
{
if (unpackSizeDefined)
{
if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
throw "Finished with end marker before than specified size";
Print("Warning: ");
}
Print("Finished with end marker");
}
else
throw "Internal Error";
Print("\n");
if (lzmaDecoder.RangeDec.Corrupted)
{
Print("\nWarning: LZMA stream is corrupted\n");
}
//15-10-2020
}
catch (const char *s)
{
PrintError("\nError:\n");
PrintError(s);
PrintError("\n");
return 1;
}
catch(...)
{
PrintError("\nError\n");
return 1;
}
return 0;
}
/*
int
#ifdef _MSC_VER
__cdecl
#endif
main(int numArgs, const char *args[])
{
try { return main2(numArgs, args); }
catch (const char *s)
{
PrintError("\nError:\n");
PrintError(s);
PrintError("\n");
return 1;
}
catch(...)
{
PrintError("\nError\n");
return 1;
}
}
*/
when i debug it i find that function
void WriteByte(Byte b)
works fine but the resulting file does not have correct size.
i used this file
13h_ticks.bi5 from dukascopy
i uploaded it here
https://github.com/ahmed-allam/starting_coding
I do not know why this happens.
when i try to decompress same file using lzmaspec.cpp outside project ,it works fine.
I need guide line where the error could be?

Fail to extract binary file from Tar archive using libarchive

Update
The example below does work. I had misread the intention of the flag ARCHIVE_EXTRACT_TIME and was expecting to see the newly created file. Instead the file was being extracted correctly but with its original creation date. It was a long week! :-)
I have the following data stored in a Tar archive:
opt/
fw.bin
ad/
bin/
installer.sh
I have the following code, running on Ubuntu (g++ version 5.2.1), to extract the contents of the archive in memory using libarchive, largely taken from the examples:
int copy_data(struct archive *ar, struct archive *aw)
{
int r;
const void *buff;
size_t size;
#if ARCHIVE_VERSION_NUMBER >= 3000000
int64_t offset;
#else
off_t offset;
#endif
for (;;) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
return (ARCHIVE_OK);
if (r != ARCHIVE_OK)
return (r);
r = archive_write_data(aw, buff, size);
if (size != r)
{
std::cerr << "Failed to write " << (size - r) << " Bytes\n";
}
}
}
}
int main(int argc, char** argv)
{
std::ifstream input("archive.tar.gz", std::ios::binary | std::ios::ate );
auto const size = input.tellg();
char* buffer = new char[size];
input.seekg(0);
input.read(buffer, size);
input.close();
struct archive *a;
struct archive *ext;
struct archive_entry *entry;
int retCode;
int flags = ARCHIVE_EXTRACT_TIME;
a = archive_read_new();
archive_read_support_format_all(a);
ext = archive_write_disk_new();
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
if ((retCode = archive_read_open_memory(a, buffer, size)))
for (;;) {
retCode = archive_read_next_header(a, &entry);
if (retCode == ARCHIVE_EOF)
{
break;
}
if (retCode != ARCHIVE_OK)
{
return -1;
}
retCode = archive_write_header(ext, entry);
if (retCode != ARCHIVE_OK)
{
return -1;
}
else
{
if (archive_entry_size(entry) > 0)
{
copy_data(a, ext);
}
retCode = archive_write_finish_entry(ext);
if (retCode != ARCHIVE_OK)
{
return -1;
}
}
}
archive_read_close(a);
archive_read_free(a);
archive_write_close(ext);
archive_write_free(ext);
return 0;
}
It unpacks the directories correctly as above and writes the installer.sh file. However, the binary file (fw.bin) is missing.
Are there specific settings I am missing in order to write binary files to disk? Am I using the wrong API calls?

Getting a "vector subscript out of range" error when passing a vector into a function: c++ [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
I am trying to write a c++ program that assembles MIPS instructions. While debugging, it keeps throwing an error at line 74 of my main:
myassembler.add(lexed[i].labels[0], lexed[i].name, tokens, i);
my main is here:
#include <fstream>
#include <iostream>
#include <iomanip>
#include <memory>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
#include "exceptions.h"
#include "lexer.h"
#include "util.h"
#include "assembler.h"
std::string read_file(const std::string& name) {
std::ifstream file(name);
if (!file.is_open()) {
std::string error = "Could not open file: ";
error += name;
throw std::runtime_error(error);
}
std::stringstream stream;
stream << file.rdbuf();
return std::move(stream.str());
}
int main(int argc, char** argv) {
// Adjusting -- argv[0] is always filename.
--argc;
++argv;
if (argc == 0) {
std::cerr << "Need a file" << std::endl;
return 1;
}
assembler myassembler;
for (int i = 0; i < argc; ++i) {
std::string asmName(argv[i]);
if (!util::ends_with_subseq(asmName, std::string(".asm"))) {
std::cerr << "Need a valid file name (that ends in .asm)" << std::endl;
std::cerr << "(Bad name: " << asmName << ")" << std::endl;
return 1;
}
// 4 is len(".asm")
auto length = asmName.size() - string_length(".asm");
std::string baseName(asmName.begin(), asmName.begin() + length);
std::string objName = baseName + ".obj";
try {
auto text = read_file(asmName);
try {
auto lexed = lexer::analyze(text); // Parses the entire file and returns a vector of instructions
for (int i =0; i < (int)lexed.size(); i++){
if(lexed[i].labels.size() > 0) // Checking if there is a label in the current instruction
std::cout << "label = " << lexed[i].labels[0] << "\n"; // Prints the label
std::cout<< "instruction name = " << lexed[i].name<< "\n"; // Prints the name of instruction
std::cout << "tokens = ";
std::vector<lexer::token> tokens = lexed[i].args;
for(int j=0; j < (int)tokens.size(); j++){ // Prints all the tokens of this instruction like $t1, $t2, $t3
if (tokens[j].type == lexer::token::Integer)
std::cout << tokens[j].integer() << " ";
else
std::cout << tokens[j].string() << " ";
}
myassembler.add(lexed[i].labels[0], lexed[i].name, tokens, i);
myassembler.p();
std::cout << "\n\n\n";
}
} catch(const bad_asm& e) {
std::stringstream error;
error << "Cannot assemble the assembly code at line " << e.line;
throw std::runtime_error(error.str());
} catch(const bad_label& e) {
std::stringstream error;
error << "Undefined label " << e.what() << " at line " << e.line;
throw std::runtime_error(error.str());
}
} catch (const std::runtime_error& err) {
std::cout << err.what() << std::endl;
return 1;
}
}
/*getchar();*/
return 0;
}
assembler.h:
#include "lexer.h"
#include <fstream>
#include <vector>
#include <string>
struct symbol
{
std::string label = "";
int slinenum;
};
struct relocation
{
std::string instruct = "";
std::string label = "";
int rlinenum;
int rt = 0;
int rs = 0;
};
struct opcode
{
std::string instruct = "";
int opc = 0;
bool isloadstore = false;
int extType = 0;
bool isbranch = false;
};
struct function
{
std::string instruct = "";
int funct = 0;
bool isjr = false;
bool isshift = false;
};
struct regs
{
std::string name;
int num;
};
enum instrtype
{
R, I, neither
};
class assembler
{
public:
assembler();
void oinit(void);
void finit(void);
void rinit(void);
void printToFile(std::fstream &file);
void savesymb(std::string label, int line);
void saverel(std::string instr, std::string label, int line, int rt, int rs);
std::vector<int> formatr(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line);
int formatr(std::string instr, lexer::token toke, int line);
std::vector<int> formati(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line);
std::vector<int> formati(std::string instr, lexer::token toke1, lexer::token toke2, int line);
int findnum(std::string regname);
void add(std::string label, std::string instr, const std::vector<lexer::token> &tokens, int linen);
void secAdd(void);
int rassemble(std::string instr, int rd, int rs, int rt, int shamt);
int iassemble(std::string instr, int rt, int rs, int imm);
void p();
private:
std::vector<int> results;
std::vector<symbol> symbtable;
std::vector<relocation> reloctable;
std::vector<opcode> ops;
std::vector<function> functions;
std::vector<regs> registers;
instrtype type = neither;
};
and assembler.cpp:
// ECE 2500
// Project 1: myAssembler
// assembler.cpp
// Sheila Zhu
#include "lexer.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include "assembler.h"
assembler::assembler()
{
oinit();
finit();
rinit();
}
void assembler::oinit()
{
opcode myop;
myop.instruct = "addi";
myop.opc = 8;
myop.extType = 1;
ops.push_back(myop);
// more of the same
}
void assembler::finit()
{
function myfunc;
myfunc.instruct = "add";
myfunc.funct = 32;
functions.push_back(myfunc);
// more of the same
}
void assembler::rinit()
{
regs myreg;
myreg.name = "$zero";
myreg.num = 0;
registers.push_back(myreg);
//more of the same
}
void assembler::printToFile(std::fstream &file)
{
for (int i = 0; i < (int)results.size(); i++)
file << results.at(i) << std::endl;
}
void assembler::savesymb(std::string label, int line)
{
symbol symb;
symb.label = label;
symb.slinenum = line * 4;
symbtable.push_back(symb);
}
void assembler::saverel(std::string instr, std::string label, int line, int rt, int rs)
{
relocation re;
re.instruct = instr;
re.label = label;
re.rlinenum = line;
re.rt = rt;
re.rs = rs;
}
int assembler::findnum(std::string regname)
{
for (int i = 0; i < (int)registers.size(); i++)
{
if (regname == registers.at(i).name)
return registers.at(i).num;
}
return -1;
}
std::vector<int> assembler::formatr(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line)
{
int rd = 0, rs = 0, rt = 0, shamt = 0;
std::vector<int> x;
function currf;
for (int i = 0; i < (int)functions.size(); i++)
{
if (instr == functions.at(i).instruct)
currf = functions.at(i);
}
try
{
if (currf.isshift)
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rd = findnum(toke1.string());
if (rd == -1)
throw 2;
}
if (toke2.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
{
shamt = toke3.integer();
if (shamt < 0)
throw 3;
}
else
throw 1;
}
else
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rd = findnum(toke1.string());
if (rd == -1)
throw 2;
}
if (toke2.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke3.string());
if (rt == -1)
throw 2;
}
}
}
catch (int e)
{
if (e == 1)
std::cerr << "Wrong argument in line " << line << std::endl;
else if (e == 2)
std::cerr << "Invalid register name in line " << line << std::endl;
else
std::cerr << "Shift amount cannot be negative in line " << line << std::endl;
}
x.push_back(rd);
x.push_back(rs);
x.push_back(rt);
x.push_back(shamt);
return x;
}
int assembler::formatr(std::string instr, lexer::token toke, int line)
{
int rs = 0;
try
{
if (toke.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke.string());
if (rs == -1)
throw 2;
}
}
catch (int e)
{
if (e == 1)
std::cerr << "Wrong argument in line " << line << std::endl;
else
std::cerr << "Invalid register name in line " << line << std::endl;
}
return rs;
}
std::vector<int> assembler::formati(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line)
{
int rt = 0, rs = 0, imm = 0;
std::vector<int> x;
opcode currop;
for (int i = 0; i < (int)ops.size(); i++)
{
if (instr == ops.at(i).instruct)
currop = ops.at(i);
}
try
{
if (currop.isbranch)
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke2.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
imm = toke3.integer();
else
saverel(instr, toke3.string(), line, rt, rs);
}
else if (currop.isloadstore)
{
if ((instr == "lbu") || (instr == "sb"))
{
if (toke2.type == lexer::token::String)
throw 1;
else
{
if (toke2.integer() < 0)
imm = (0xFFFF << 16) + (0xFF << 8) + toke2.integer();
else
imm = toke2.integer();
}
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
}
else
{
if (toke2.type == lexer::token::String)
throw 1;
else
{
if (toke2.integer() < 0)
imm = (0xFFFF << 16) + toke2.integer();
else
imm = toke2.integer();
}
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
}
}
else
{
if ((instr == "andi") || (instr == "ori"))
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke2.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
imm = toke3.integer();
else
throw 1;
}
else
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke2.type == lexer::token::Integer)
throw 1;
else
{
rs = findnum(toke2.string());
if (rs == -1)
throw 2;
}
if (toke3.type == lexer::token::Integer)
{
if (toke3.integer() < 0)
imm = (0xFFFF << 16) + toke2.integer();
else
imm = toke3.integer();
}
else
throw 1;
}
}
}
catch (int e)
{
if (e == 1)
std::cerr << "Wrong argument in line " << line << std::endl;
else
std::cerr << "Invalid register name in line " << line << std::endl;
}
x.push_back(rt);
x.push_back(rs);
x.push_back(imm);
return x;
}
std::vector<int> assembler::formati(std::string instr, lexer::token toke1, lexer::token toke2, int line)
{
int rt = 0, imm = 0;
std::vector<int> rval;
try
{
if (toke1.type == lexer::token::Integer)
throw 1;
else
{
rt = findnum(toke1.string());
if (rt == -1)
throw 2;
}
if (toke2.type == lexer::token::String)
throw 1;
else
imm = toke2.integer();
}
catch (int e)
{
if (e == 1)
std::cerr << "Wrong argument in line " << line << std::endl;
else
std::cerr << "Invalid register name in line " << line << std::endl;
}
rval.push_back(rt);
rval.push_back(imm);
return rval;
}
void assembler::add(std::string label, std::string instr, const std::vector<lexer::token> &token, int linen)
{
int assembled = 0, rd = 0, rt = 0;
std::vector<int> argh;
int arg;
if (label.length() > 0)
savesymb(label, linen);
for (int i = 0; i < (int)functions.size(); i++)
{
if (instr == functions.at(i).instruct)
type = R;
}
for (int i = 0; i < (int)ops.size(); i++)
{
if (instr == ops.at(i).instruct)
type = I;
}
if (type == R)
{
try
{
if (instr == "jr")
{
if ((int)token.size() == 1)
{
arg = formatr(instr, token.at(0), linen);
assembled = rassemble(instr, rd, arg, rt, 0);
}
else
throw 1;
}
else
{
if ((int)token.size() == 3)
{
argh = formatr(instr, token.at(0), token.at(2), token.at(3), linen);
assembled = rassemble(instr, argh[0], argh[1], argh[2], argh[3]);
}
else
throw 1;
}
}
catch (int e)
{
if (e == 1)
std::cerr << "Wrong number of arguments at line " << linen << std::endl;
}
}
else if (type == I)
{
try
{
if (instr == "lui")
{
if ((int)token.size() == 2)
{
argh = formati(instr, token.at(0), token.at(1), linen);
assembled = iassemble(instr, argh[0], 0, argh[1]);
}
else
throw 1;
}
else
{
if ((int)token.size() == 3)
{
argh = formati(instr, token.at(0), token.at(1), token.at(2), linen);
assembled = iassemble(instr, argh[0], argh[1], argh[2]);
}
else
throw 1;
}
}
catch (int e)
{
if (e == 1)
std::cout << "Wrong number of arguments at line " << linen << std::endl;
}
}
else
std::cerr << "Instruction not recognized at line " << linen << std::endl;
results.push_back(assembled);
}
void assembler::secAdd(void)
{
std::vector<int>::iterator iter = results.begin();
for (int i = 0; i < (int)reloctable.size(); i++)
{
for (unsigned int j = 0; j < symbtable.size(); j++)
{
if (reloctable.at(i).label == symbtable.at(j).label)
{
int assembled = 0;
iter += (reloctable.at(i).rlinenum / 4);
for (unsigned int k = 0; k < ops.size(); k++)
{
if (reloctable.at(i).instruct == ops.at(k).instruct)
type = I;
}
if (type == I)
assembled = iassemble(reloctable.at(i).instruct, reloctable.at(i).rt, reloctable.at(i).rs, symbtable.at(i).slinenum);
else
std::cerr << "Instruction not recognized at line " << reloctable.at(i).rlinenum << std::endl;
results.erase(iter);
results.insert(iter, assembled);
}
}
}
}
int assembler::rassemble(std::string instr, int rd, int rs, int rt, int shamt)
{
int func = 0;
int code = 0;
for (int i = 0; i < (int)functions.size(); i++)
{
if (instr == functions.at(i).instruct)
{
func = functions.at(i).funct;
break;
}
else
{
if (i == (functions.size() - 1))
return -1;
}
}
code = (rs << 21) + (rt << 16) + (rd << 11) + (shamt << 6) + func;
return code;
}
int assembler::iassemble(std::string instr, int rt, int rs, int imm)
{
int op = 0;
int code = 0;
for (int i = 0; i < (int)ops.size(); i++)
{
if (instr == ops.at(i).instruct)
{
op = ops.at(i).opc;
break;
}
else
{
if (i == (ops.size() - 1))
return -1;
}
}
code = (op << 26) + (rs << 21) + (rt << 16) + imm;
return code;
}
void assembler::p()
{
for (int i = 0; i < (int)results.size(); i++)
std::cout << results.at(i) << " ";
std::cout << std::endl;
}
When debugging, the tokens parameter triggers the error, and the this pointer in the vector code shows that the vector size changes to 0 at these lines:
#if _ITERATOR_DEBUG_LEVEL == 2
if (size() <= _Pos)
What exactly is happening?
Sorry if my formatting is bad/wrong, etc., and please let me know if I should make any edits/provide more code.
Thanks in advance.
The error is caused by accessing by index vector element that does not exist.
In you case lexed[i] should be valid. So, the only possible issue may be with empty labels vector. Validate this vector before accessing its elements, for example
myassembler.add(lexed[i].labels.empty() ? "" : lexed[i].labels[0],
lexed[i].name, tokens, i);
Actually there is one more bug for very large lexed arrays when integer index may overflow. You should not cast result of .size() to int. Instead proper type should be used for i:
for (size_t i = 0; i < lexed.size(); i++)

How to use LZMA SDK in C++?

i have difficulties in using LZMA SDK in my application.
I would like to create a kind of single file compression tool. I dont need any directory support, just need only the LZMA2 stream. But i have no idea on how LZMA SDK is to be used for this.
Please can anyone give me a little example on how the LZMA SDK can be used under C++?
I think that it's a properly little example to use LZMA SDK.
/* LzmaUtil.c -- Test application for LZMA compression
2008-08-05
Igor Pavlov
public domain */
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../LzmaDec.h"
#include "../LzmaEnc.h"
#include "../Alloc.h"
const char *kCantReadMessage = "Can not read input file";
const char *kCantWriteMessage = "Can not write output file";
const char *kCantAllocateMessage = "Can not allocate memory";
const char *kDataErrorMessage = "Data error";
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
#define kInBufferSize (1 << 15)
#define kOutBufferSize (1 << 15)
unsigned char g_InBuffer[kInBufferSize];
unsigned char g_OutBuffer[kOutBufferSize];
size_t MyReadFile(FILE *file, void *data, size_t size)
{ return fread(data, 1, size, file); }
int MyReadFileAndCheck(FILE *file, void *data, size_t size)
{ return (MyReadFile(file, data, size) == size); }
size_t MyWriteFile(FILE *file, const void *data, size_t size)
{
if (size == 0)
return 0;
return fwrite(data, 1, size, file);
}
int MyWriteFileAndCheck(FILE *file, const void *data, size_t size)
{ return (MyWriteFile(file, data, size) == size); }
long MyGetFileLength(FILE *file)
{
long length;
fseek(file, 0, SEEK_END);
length = ftell(file);
fseek(file, 0, SEEK_SET);
return length;
}
void PrintHelp(char *buffer)
{
strcat(buffer, "\nLZMA Utility 4.58 Copyright (c) 1999-2008 Igor Pavlov 2008-04-11\n"
"\nUsage: lzma <e|d> inputFile outputFile\n"
" e: encode file\n"
" d: decode file\n");
}
int PrintError(char *buffer, const char *message)
{
strcat(buffer, "\nError: ");
strcat(buffer, message);
strcat(buffer, "\n");
return 1;
}
int PrintErrorNumber(char *buffer, SRes val)
{
sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val);
return 1;
}
int PrintUserError(char *buffer)
{
return PrintError(buffer, "Incorrect command");
}
#define IN_BUF_SIZE (1 << 16)
#define OUT_BUF_SIZE (1 << 16)
static int Decode(FILE *inFile, FILE *outFile, char *rs)
{
UInt64 unpackSize;
int thereIsSize; /* = 1, if there is uncompressed size in headers */
int i;
int res = 0;
CLzmaDec state;
/* header: 5 bytes of LZMA properties and 8 bytes of uncompressed size */
unsigned char header[LZMA_PROPS_SIZE + 8];
/* Read and parse header */
if (!MyReadFileAndCheck(inFile, header, sizeof(header)))
return PrintError(rs, kCantReadMessage);
unpackSize = 0;
thereIsSize = 0;
for (i = 0; i < 8; i++)
{
unsigned char b = header[LZMA_PROPS_SIZE + i];
if (b != 0xFF)
thereIsSize = 1;
unpackSize += (UInt64)b << (i * 8);
}
LzmaDec_Construct(&state);
res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
if (res != SZ_OK)
return res;
{
Byte inBuf[IN_BUF_SIZE];
Byte outBuf[OUT_BUF_SIZE];
size_t inPos = 0, inSize = 0, outPos = 0;
LzmaDec_Init(&state);
for (;;)
{
if (inPos == inSize)
{
inSize = MyReadFile(inFile, inBuf, IN_BUF_SIZE);
inPos = 0;
}
{
SizeT inProcessed = inSize - inPos;
SizeT outProcessed = OUT_BUF_SIZE - outPos;
ELzmaFinishMode finishMode = LZMA_FINISH_ANY;
ELzmaStatus status;
if (thereIsSize && outProcessed > unpackSize)
{
outProcessed = (SizeT)unpackSize;
finishMode = LZMA_FINISH_END;
}
res = LzmaDec_DecodeToBuf(&state, outBuf + outPos, &outProcessed,
inBuf + inPos, &inProcessed, finishMode, &status);
inPos += (UInt32)inProcessed;
outPos += outProcessed;
unpackSize -= outProcessed;
if (outFile != 0)
MyWriteFile(outFile, outBuf, outPos);
outPos = 0;
if (res != SZ_OK || thereIsSize && unpackSize == 0)
break;
if (inProcessed == 0 && outProcessed == 0)
{
if (thereIsSize || status != LZMA_STATUS_FINISHED_WITH_MARK)
res = SZ_ERROR_DATA;
break;
}
}
}
}
LzmaDec_Free(&state, &g_Alloc);
return res;
}
typedef struct _CFileSeqInStream
{
ISeqInStream funcTable;
FILE *file;
} CFileSeqInStream;
static SRes MyRead(void *p, void *buf, size_t *size)
{
if (*size == 0)
return SZ_OK;
*size = MyReadFile(((CFileSeqInStream*)p)->file, buf, *size);
/*
if (*size == 0)
return SZE_FAIL;
*/
return SZ_OK;
}
typedef struct _CFileSeqOutStream
{
ISeqOutStream funcTable;
FILE *file;
} CFileSeqOutStream;
static size_t MyWrite(void *pp, const void *buf, size_t size)
{
return MyWriteFile(((CFileSeqOutStream *)pp)->file, buf, size);
}
static SRes Encode(FILE *inFile, FILE *outFile, char *rs)
{
CLzmaEncHandle enc;
SRes res;
CFileSeqInStream inStream;
CFileSeqOutStream outStream;
CLzmaEncProps props;
enc = LzmaEnc_Create(&g_Alloc);
if (enc == 0)
return SZ_ERROR_MEM;
inStream.funcTable.Read = MyRead;
inStream.file = inFile;
outStream.funcTable.Write = MyWrite;
outStream.file = outFile;
LzmaEncProps_Init(&props);
res = LzmaEnc_SetProps(enc, &props);
if (res == SZ_OK)
{
Byte header[LZMA_PROPS_SIZE + 8];
size_t headerSize = LZMA_PROPS_SIZE;
UInt64 fileSize;
int i;
res = LzmaEnc_WriteProperties(enc, header, &headerSize);
fileSize = MyGetFileLength(inFile);
for (i = 0; i < 8; i++)
header[headerSize++] = (Byte)(fileSize >> (8 * i));
if (!MyWriteFileAndCheck(outFile, header, headerSize))
return PrintError(rs, "writing error");
if (res == SZ_OK)
res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable,
NULL, &g_Alloc, &g_Alloc);
}
LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
return res;
}
int main2(int numArgs, const char *args[], char *rs)
{
FILE *inFile = 0;
FILE *outFile = 0;
char c;
int res;
int encodeMode;
if (numArgs == 1)
{
PrintHelp(rs);
return 0;
}
if (numArgs < 3 || numArgs > 4 || strlen(args[1]) != 1)
return PrintUserError(rs);
c = args[1][0];
encodeMode = (c == 'e' || c == 'E');
if (!encodeMode && c != 'd' && c != 'D')
return PrintUserError(rs);
{
size_t t4 = sizeof(UInt32);
size_t t8 = sizeof(UInt64);
if (t4 != 4 || t8 != 8)
return PrintError(rs, "LZMA UTil needs correct UInt32 and UInt64");
}
inFile = fopen(args[2], "rb");
if (inFile == 0)
return PrintError(rs, "Can not open input file");
if (numArgs > 3)
{
outFile = fopen(args[3], "wb+");
if (outFile == 0)
return PrintError(rs, "Can not open output file");
}
else if (encodeMode)
PrintUserError(rs);
if (encodeMode)
{
res = Encode(inFile, outFile, rs);
}
else
{
res = Decode(inFile, outFile, rs);
}
if (outFile != 0)
fclose(outFile);
fclose(inFile);
if (res != SZ_OK)
{
if (res == SZ_ERROR_MEM)
return PrintError(rs, kCantAllocateMessage);
else if (res == SZ_ERROR_DATA)
return PrintError(rs, kDataErrorMessage);
else
return PrintErrorNumber(rs, res);
}
return 0;
}
int MY_CDECL main(int numArgs, const char *args[])
{
char rs[800] = { 0 };
int res = main2(numArgs, args, rs);
printf(rs);
return res;
}
Also you can see it at:
http://read.pudn.com/downloads151/sourcecode/zip/656407/7z460/C/LzmaUtil/LzmaUtil.c__.htm
http://read.pudn.com/downloads157/sourcecode/zip/698262/LZMA/LzmaUtil.c__.htm
I recently found a nice example, written in C++. Credit goes to GH user Treeki who published the original gist:
// note: -D_7ZIP_ST is required when compiling on non-Windows platforms
// g++ -o lzma_sample -std=c++14 -D_7ZIP_ST lzma_sample.cpp LzmaDec.c LzmaEnc.c LzFind.c
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <memory>
#include "LzmaEnc.h"
#include "LzmaDec.h"
static void *_lzmaAlloc(ISzAllocPtr, size_t size) {
return new uint8_t[size];
}
static void _lzmaFree(ISzAllocPtr, void *addr) {
if (!addr)
return;
delete[] reinterpret_cast<uint8_t *>(addr);
}
static ISzAlloc _allocFuncs = {
_lzmaAlloc, _lzmaFree
};
std::unique_ptr<uint8_t[]> lzmaCompress(const uint8_t *input, uint32_t inputSize, uint32_t *outputSize) {
std::unique_ptr<uint8_t[]> result;
// set up properties
CLzmaEncProps props;
LzmaEncProps_Init(&props);
if (inputSize >= (1 << 20))
props.dictSize = 1 << 20; // 1mb dictionary
else
props.dictSize = inputSize; // smaller dictionary = faster!
props.fb = 40;
// prepare space for the encoded properties
SizeT propsSize = 5;
uint8_t propsEncoded[5];
// allocate some space for the compression output
// this is way more than necessary in most cases...
// but better safe than sorry
// (a smarter implementation would use a growing buffer,
// but this requires a bunch of fuckery that is out of
/// scope for this simple example)
SizeT outputSize64 = inputSize * 1.5;
if (outputSize64 < 1024)
outputSize64 = 1024;
auto output = std::make_unique<uint8_t[]>(outputSize64);
int lzmaStatus = LzmaEncode(
output.get(), &outputSize64, input, inputSize,
&props, propsEncoded, &propsSize, 0,
NULL,
&_allocFuncs, &_allocFuncs);
*outputSize = outputSize64 + 13;
if (lzmaStatus == SZ_OK) {
// tricky: we have to generate the LZMA header
// 5 bytes properties + 8 byte uncompressed size
result = std::make_unique<uint8_t[]>(outputSize64 + 13);
uint8_t *resultData = result.get();
memcpy(resultData, propsEncoded, 5);
for (int i = 0; i < 8; i++)
resultData[5 + i] = (inputSize >> (i * 8)) & 0xFF;
memcpy(resultData + 13, output.get(), outputSize64);
}
return result;
}
std::unique_ptr<uint8_t[]> lzmaDecompress(const uint8_t *input, uint32_t inputSize, uint32_t *outputSize) {
if (inputSize < 13)
return NULL; // invalid header!
// extract the size from the header
UInt64 size = 0;
for (int i = 0; i < 8; i++)
size |= (input[5 + i] << (i * 8));
if (size <= (256 * 1024 * 1024)) {
auto blob = std::make_unique<uint8_t[]>(size);
ELzmaStatus lzmaStatus;
SizeT procOutSize = size, procInSize = inputSize - 13;
int status = LzmaDecode(blob.get(), &procOutSize, &input[13], &procInSize, input, 5, LZMA_FINISH_END, &lzmaStatus, &_allocFuncs);
if (status == SZ_OK && procOutSize == size) {
*outputSize = size;
return blob;
}
}
return NULL;
}
void hexdump(const uint8_t *buf, int size) {
int lines = (size + 15) / 16;
for (int i = 0; i < lines; i++) {
printf("%08x | ", i * 16);
int lineMin = i * 16;
int lineMax = lineMin + 16;
int lineCappedMax = (lineMax > size) ? size : lineMax;
for (int j = lineMin; j < lineCappedMax; j++)
printf("%02x ", buf[j]);
for (int j = lineCappedMax; j < lineMax; j++)
printf(" ");
printf("| ");
for (int j = lineMin; j < lineCappedMax; j++) {
if (buf[j] >= 32 && buf[j] <= 127)
printf("%c", buf[j]);
else
printf(".");
}
printf("\n");
}
}
void testIt(const uint8_t *input, int size) {
printf("Test Input:\n");
hexdump(input, size);
uint32_t compressedSize;
auto compressedBlob = lzmaCompress(input, size, &compressedSize);
if (compressedBlob) {
printf("Compressed:\n");
hexdump(compressedBlob.get(), compressedSize);
} else {
printf("Nope, we screwed it\n");
return;
}
// let's try decompressing it now
uint32_t decompressedSize;
auto decompressedBlob = lzmaDecompress(compressedBlob.get(), compressedSize, &decompressedSize);
if (decompressedBlob) {
printf("Decompressed:\n");
hexdump(decompressedBlob.get(), decompressedSize);
} else {
printf("Nope, we screwed it (part 2)\n");
return;
}
printf("----------\n");
}
void testIt(const char *string) {
testIt((const uint8_t *)string, strlen(string));
}
int main(int argc, char **argv) {
testIt("a");
testIt("here is a cool string");
testIt("here's something that should compress pretty well: abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef");
return 0;
}
You can refer to this file on how to use lzma2。
https://github.com/Tencent/libpag/blob/aab6391e455193c8ec5b8e2031b495b3fe77b034/test/framework/utils/LzmaUtil.cpp
/////////////////////////////////////////////////////////////////////////////////////////////////
//
// Tencent is pleased to support the open source community by making libpag available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// unless required by applicable law or agreed to in writing, software distributed under the
// license is distributed on an "as is" basis, without warranties or conditions of any kind,
// either express or implied. see the license for the specific language governing permissions
// and limitations under the license.
//
/////////////////////////////////////////////////////////////////////////////////////////////////
#include "LzmaUtil.h"
#include "test/framework/lzma/Lzma2DecMt.h"
#include "test/framework/lzma/Lzma2Enc.h"
namespace pag {
static void* LzmaAlloc(ISzAllocPtr, size_t size) {
return new uint8_t[size];
}
static void LzmaFree(ISzAllocPtr, void* address) {
if (!address) {
return;
}
delete[] reinterpret_cast<uint8_t*>(address);
}
static ISzAlloc gAllocFuncs = {LzmaAlloc, LzmaFree};
class SequentialOutStream {
public:
virtual ~SequentialOutStream() = default;
virtual bool write(const void* data, size_t size) = 0;
};
class SequentialInStream {
public:
virtual ~SequentialInStream() = default;
virtual bool read(void* data, size_t size, size_t* processedSize) = 0;
};
struct CSeqInStreamWrap {
ISeqInStream vt;
std::unique_ptr<SequentialInStream> inStream;
};
struct CSeqOutStreamWrap {
ISeqOutStream vt;
std::unique_ptr<SequentialOutStream> outStream;
};
class BuffPtrInStream : public SequentialInStream {
public:
explicit BuffPtrInStream(const uint8_t* buffer, size_t bufferSize)
: buffer(buffer), bufferSize(bufferSize) {
}
bool read(void* data, size_t size, size_t* processedSize) override {
if (processedSize) {
*processedSize = 0;
}
if (size == 0 || position >= bufferSize) {
return true;
}
auto remain = bufferSize - position;
if (remain > size) {
remain = size;
}
memcpy(data, static_cast<const uint8_t*>(buffer) + position, remain);
position += remain;
if (processedSize) {
*processedSize = remain;
}
return true;
}
private:
const uint8_t* buffer = nullptr;
size_t bufferSize = 0;
size_t position = 0;
};
class VectorOutStream : public SequentialOutStream {
public:
explicit VectorOutStream(std::vector<uint8_t>* buffer) : buffer(buffer) {
}
bool write(const void* data, size_t size) override {
auto oldSize = buffer->size();
buffer->resize(oldSize + size);
memcpy(&(*buffer)[oldSize], data, size);
return true;
}
private:
std::vector<uint8_t>* buffer;
};
class BuffPtrSeqOutStream : public SequentialOutStream {
public:
BuffPtrSeqOutStream(uint8_t* buffer, size_t size) : buffer(buffer), bufferSize(size) {
}
bool write(const void* data, size_t size) override {
auto remain = bufferSize - position;
if (remain > size) {
remain = size;
}
if (remain != 0) {
memcpy(buffer + position, data, remain);
position += remain;
}
return remain != 0 || size == 0;
}
private:
uint8_t* buffer = nullptr;
size_t bufferSize = 0;
size_t position = 0;
};
static const size_t kStreamStepSize = 1 << 31;
static SRes MyRead(const ISeqInStream* p, void* data, size_t* size) {
CSeqInStreamWrap* wrap = CONTAINER_FROM_VTBL(p, CSeqInStreamWrap, vt);
auto curSize = (*size < kStreamStepSize) ? *size : kStreamStepSize;
if (!wrap->inStream->read(data, curSize, &curSize)) {
return SZ_ERROR_READ;
}
*size = curSize;
return SZ_OK;
}
static size_t MyWrite(const ISeqOutStream* p, const void* buf, size_t size) {
auto* wrap = CONTAINER_FROM_VTBL(p, CSeqOutStreamWrap, vt);
if (wrap->outStream->write(buf, size)) {
return size;
}
return 0;
}
class Lzma2Encoder {
public:
Lzma2Encoder() {
encoder = Lzma2Enc_Create(&gAllocFuncs, &gAllocFuncs);
}
~Lzma2Encoder() {
Lzma2Enc_Destroy(encoder);
}
std::shared_ptr<Data> code(const std::shared_ptr<Data>& inputData) {
if (encoder == nullptr || inputData == nullptr || inputData->size() == 0) {
return nullptr;
}
auto inputSize = inputData->size();
CLzma2EncProps lzma2Props;
Lzma2EncProps_Init(&lzma2Props);
lzma2Props.lzmaProps.dictSize = inputSize;
lzma2Props.lzmaProps.level = 9;
lzma2Props.numTotalThreads = 4;
Lzma2Enc_SetProps(encoder, &lzma2Props);
std::vector<uint8_t> outBuf;
outBuf.resize(1 + 8);
outBuf[0] = Lzma2Enc_WriteProperties(encoder);
for (int i = 0; i < 8; i++) {
outBuf[1 + i] = static_cast<uint8_t>(inputSize >> (8 * i));
}
CSeqInStreamWrap inWrap = {};
inWrap.vt.Read = MyRead;
inWrap.inStream = std::make_unique<BuffPtrInStream>(
static_cast<const uint8_t*>(inputData->data()), inputSize);
CSeqOutStreamWrap outStream = {};
outStream.vt.Write = MyWrite;
outStream.outStream = std::make_unique<VectorOutStream>(&outBuf);
auto status =
Lzma2Enc_Encode2(encoder, &outStream.vt, nullptr, nullptr, &inWrap.vt, nullptr, 0, nullptr);
if (status != SZ_OK) {
return nullptr;
}
return Data::MakeWithCopy(&outBuf[0], outBuf.size());
}
private:
CLzma2EncHandle encoder = nullptr;
};
std::shared_ptr<Data> LzmaUtil::Compress(const std::shared_ptr<Data>& pixelData) {
Lzma2Encoder encoder;
return encoder.code(pixelData);
}
class Lzma2Decoder {
public:
Lzma2Decoder() {
decoder = Lzma2DecMt_Create(&gAllocFuncs, &gAllocFuncs);
}
~Lzma2Decoder() {
if (decoder) {
Lzma2DecMt_Destroy(decoder);
}
}
std::shared_ptr<Data> code(const std::shared_ptr<Data>& inputData) {
if (decoder == nullptr || inputData == nullptr || inputData->size() == 0) {
return nullptr;
}
auto input = static_cast<const uint8_t*>(inputData->data());
auto inputSize = inputData->size() - 9;
Byte prop = static_cast<const Byte*>(input)[0];
CLzma2DecMtProps props;
Lzma2DecMtProps_Init(&props);
props.inBufSize_ST = inputSize;
props.numThreads = 1;
UInt64 outBufferSize = 0;
for (int i = 0; i < 8; i++) {
outBufferSize |= (input[1 + i] << (i * 8));
}
auto outBuffer = new uint8_t[outBufferSize];
CSeqInStreamWrap inWrap = {};
inWrap.vt.Read = MyRead;
inWrap.inStream = std::make_unique<BuffPtrInStream>(input + 9, inputSize);
CSeqOutStreamWrap outWrap = {};
outWrap.vt.Write = MyWrite;
outWrap.outStream = std::make_unique<BuffPtrSeqOutStream>(outBuffer, outBufferSize);
UInt64 inProcessed = 0;
int isMT = false;
auto res = Lzma2DecMt_Decode(decoder, prop, &props, &outWrap.vt, &outBufferSize, 1, &inWrap.vt,
&inProcessed, &isMT, nullptr);
if (res == SZ_OK && inputSize == inProcessed) {
return Data::MakeAdopted(outBuffer, outBufferSize, Data::DeleteProc);
}
delete[] outBuffer;
return nullptr;
}
private:
CLzma2DecMtHandle decoder = nullptr;
};
std::shared_ptr<Data> LzmaUtil::Decompress(const std::shared_ptr<Data>& data) {
Lzma2Decoder decoder;
return decoder.code(data);
}
} // namespace pag