How to strip newlines from a char-array? - c++

I've put the contents of a file in a char-array using this function:
void Read::readFile(){
FILE * fp = fopen(this->filename,"rt");
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *pData = new char[size + 1];
fread(pData, sizeof(char), size, fp);
fclose(fp);
this->data = pData;
}
Now I want to strip all line-endings from the char-array.
How do I do this without casting the char-array into a string first?
btw. this is part of a homework where we aren't allowed to use the string-library.

#include <algorithm>
size = std::remove(pData, pData + size, '\n') - pData;
pData[size] = 0; // optional
For some C++11 lambda fun:
#include <algorithm>
size = std::remove_if(pData, pData + size, [](char c) { return c == '\n'; }) - pData;
pData[size] = 0; // optional

The easiest approach is to make a second buffer the size of the original array.
int len = size;
char* newBufer = calloc(len,sizeof(char));
int i = 0;
int j = 0;
int nlCount = 0;
for(i=0; i<len; i++) {
if(pData[i] != '\n') {
newBuffer[j++] = pData[i];
} else {
nlCount++;
}
}
printf("Finished copying array without newlines. Total newlines removed: %d",nlCount);
The added benefit here is since you calloc'ed instead of malloc'ing your array, all values are zero initially, so in this case, once you are done copying, the data at (len-nlCount) through to (len) will all be zero (ie: '\0') so it is automatically null-terminated, like a string would be anyways. Don't forget to free() the array when you are done.

In place removal:
void strip_newlines(char* p) {
char* q = p;
while (p != 0 && *p != '\0') {
if (*p == '\n') {
p++;
*q = *p;
}
else {
*q++ = *p++;
}
}
*q = '\0';
}

Something like this:
void Read::readFile()
{
FILE * fp = fopen(this->filename,"rt");
if (fp)
{
char *pData = NULL;
fseek(fp, 0, SEEK_END);
long size = ftell(fp);
if (size != -1L)
{
pData = new char[size];
if (size > 0)
{
fseek(fp, 0, SEEK_SET);
size = fread(pData, sizeof(char), size, fp);
}
}
fclose(fp);
if (size < 0)
{
delete[] pData;
pData = NULL;
}
else if (size > 0)
{
char *start = pData;
char *end = start + size;
char *ptr = (char*) memchr(pData, '\n', size);
while (ptr)
{
int len = 1;
if ((ptr > start) && ((*ptr-1) == '\r'))
{
--ptr;
++len;
}
memmove(ptr, ptr+len, end - (ptr+len));
end -= len;
ptr = (char*) memchr(ptr, '\n', end - ptr);
}
size = (end - start);
}
this->data = pData;
this->size = size;
}
}

Related

Find a Pattern in a Process Heap Memory

I am trying to scan all heap memory regions from a Process and scan a pattern in them.
I am using x64, and Windows 10. I am inside the target Process just for testing purposes.
My code is:
std::vector<__int64> matches; // Holds all pattern matches
int FindPattern(__int64 patternAddress, char * mask) {
SYSTEM_INFO sysInfo; // Holds System Information
GetSystemInfo(&sysInfo);
__int64 procMin = (__int64)sysInfo.lpMinimumApplicationAddress; // Minimum memory address of process
__int64 procMax = (__int64)sysInfo.lpMaximumApplicationAddress; // Maximum memory address of process
MEMORY_BASIC_INFORMATION mBI, mBINext;
DWORD firstOldProtect = NULL;
DWORD secondOldProtect = NULL;
HMODULE hdll;
DWORD patternSize = (DWORD)strlen(mask);
while (procMin < procMax) { // While still scanning memory
VirtualQueryEx(GetCurrentProcess(), (LPVOID)procMin, &mBI, sizeof(MEMORY_BASIC_INFORMATION)); // Get memory page details
if (mBI.State == MEM_COMMIT) {
VirtualProtect((LPVOID)procMin, mBI.RegionSize, PAGE_EXECUTE_READWRITE, &firstOldProtect); // Set page to read/write/execute
for (auto n = (__int64)mBI.BaseAddress; n < (__int64)mBI.BaseAddress + mBI.RegionSize; n += 0x01) { // For each byte in this page
if (n + patternSize > procMax) { // If our pattern will extend past the maximum memory address, break
break;
}
if (*(char*)n == (*(char*)patternAddress)) { // If first byte of pattern matches current byte
if (n + patternSize < (UINT)mBI.BaseAddress + mBI.RegionSize) { // If entire length of pattern is within this page
if (ComparePattern((__int64)n, patternAddress, mask)) { // Test if full pattern matches
matches.push_back((__int64)n); // If it does, add it to the vector
}
}
else { // If it isn't within the same page
VirtualQueryEx(GetCurrentProcess(), (LPVOID)(procMin + mBI.RegionSize), &mBINext, sizeof(MEMORY_BASIC_INFORMATION)); // Same memory page stuff with next page
if (mBINext.State == MEM_COMMIT) {
VirtualProtect((LPVOID)(procMin + mBI.RegionSize), mBINext.RegionSize, PAGE_EXECUTE_READWRITE, &secondOldProtect);
if (ComparePattern((__int64)n, patternAddress, mask)) {
matches.push_back((__int64)n);
}
}
}
}
}
VirtualProtect((LPVOID)procMin, mBI.RegionSize, firstOldProtect, NULL); // Reset memory page state of first page
if (secondOldProtect) { // If we scanned into the second page
VirtualProtect((LPVOID)procMin, mBINext.RegionSize, secondOldProtect, NULL); // Reset memory page state of second page
secondOldProtect = NULL;
}
}
procMin = procMin + (__int64) mBI.RegionSize; // Start scanning next page
}
return 0;
}
Then the ComparePattern function is:
bool ComparePattern(__int64 address, __int64 patternAddress, char * mask) {
int patternLen = strlen(mask);
for (auto i = 1; i < patternLen; i++) {
if (mask[i] != *"?" && *(char*)(address + i) != *(char*)(patternAddress + i)) { // Compare each byte of the pattern with each byte after the current scanning address
return false;
}
}
if (address != patternAddress) { // Make sure we aren't returning a match for the pattern defined within your DLLMain
return true;
}
return false;
}
I retrieve several memory blocks, but I am not being able to retrieve the specific memory region/block where the pattern is located using this VirtualQueryEx code.
To test this and the weird part is that if I use the Heap APIs I am able to identify the memory allocated and the specified pattern:
__int64 ReturnMachHeapAPI(__int64 patternAddress, char * mask) {
HANDLE hHeaps[250];
DWORD numHeaps = GetProcessHeaps(250, hHeaps);
unsigned long i;
if (numHeaps <= 250)
{
for (i = 0; i < numHeaps; i++) {
HeapLock(hHeaps[i]);
PROCESS_HEAP_ENTRY entry;
memset(&entry, '\0', sizeof entry);
bool found = false;
while (!found && HeapWalk(hHeaps[i], &entry) != FALSE)
{
for (auto ii = (__int64)entry.lpData; ii < (__int64)entry.lpData + entry.cbData; ii += 0x01) {
if (ComparePattern((__int64)ii, patternAddress, mask)) {
return ii;
}
}
}
HeapUnlock(hHeaps[i]);
}
}
return 0;
}
I appreciate any hints on why the VirtualQueryEx code is not working as expected. One point worth to mention is that my Process has several modules (DLLs) along with the main executable.
Thanks so much.
EDIT:
I re-wrote the VirtualQueryEx loop, using ReadProcessMemory now. It is working perfect now.
The working code is:
char* InScan(char* pattern, char* mask, char* begin, unsigned int size)
{
//strlen the mask, not the pattern if you use the pattern
//you will get short length because null terminator
unsigned int patternLength = strlen(mask);
for (unsigned int i = 0; i < size - patternLength; i++)
{
bool found = true;
for (unsigned int j = 0; j < patternLength; j++)
{
if (mask[j] != '?' && pattern[j] != *(begin + i + j))
{
found = false;
break;
}
}
if (found)
{
return (begin + i);
}
}
return 0;
}
char * PatternScan(char* pattern, char* mask)
{
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
__int64 end = (__int64)sysInfo.lpMaximumApplicationAddress;
char* currentChunk = 0;
char* match = nullptr;
SIZE_T bytesRead;
while (currentChunk < (char *) end)
{
MEMORY_BASIC_INFORMATION mbi;
HANDLE process = GetCurrentProcess();
int hr = GetLastError();
if (!VirtualQueryEx(process, currentChunk, &mbi, sizeof(mbi)))
{
return 0;
}
char* buffer = 0;
if (mbi.State == MEM_COMMIT && mbi.Protect != PAGE_NOACCESS)
{
buffer = new char[mbi.RegionSize];
DWORD oldprotect;
if (VirtualProtectEx(process, mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &oldprotect))
{
ReadProcessMemory(process, mbi.BaseAddress, buffer, mbi.RegionSize, &bytesRead);
VirtualProtectEx(process, mbi.BaseAddress, mbi.RegionSize, oldprotect, &oldprotect);
char* internalAddress = InScan(pattern, mask, buffer, bytesRead);
if (internalAddress != 0)
{
//calculate from internal to external
__int64 offsetFromBuffer = internalAddress - buffer;
match = currentChunk + offsetFromBuffer;
delete[] buffer;
break;
}
}
}
currentChunk = currentChunk + mbi.RegionSize;
if (buffer) delete[] buffer;
buffer = 0;
}
return match;
}
I re-wrote the VirtualQueryEx loop, including the use of ReadProcessMemory and works perfect.
char* InScan(char* pattern, char* mask, char* begin, unsigned int size)
{
unsigned int patternLength = strlen(mask);
for (unsigned int i = 0; i < size - patternLength; i++)
{
bool found = true;
for (unsigned int j = 0; j < patternLength; j++)
{
if (mask[j] != '?' && pattern[j] != *(begin + i + j))
{
found = false;
break;
}
}
if (found)
{
return (begin + i);
}
}
return 0;
}
char * PatternScan(char* pattern, char* mask)
{
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
__int64 end = (__int64)sysInfo.lpMaximumApplicationAddress;
char* currentChunk = 0;
char* match = nullptr;
SIZE_T bytesRead;
while (currentChunk < (char *) end)
{
MEMORY_BASIC_INFORMATION mbi;
HANDLE process = GetCurrentProcess();
int hr = GetLastError();
if (!VirtualQueryEx(process, currentChunk, &mbi, sizeof(mbi)))
{
return 0;
}
char* buffer = 0;
if (mbi.State == MEM_COMMIT && mbi.Protect != PAGE_NOACCESS)
{
buffer = new char[mbi.RegionSize];
DWORD oldprotect;
if (VirtualProtectEx(process, mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &oldprotect))
{
ReadProcessMemory(process, mbi.BaseAddress, buffer, mbi.RegionSize, &bytesRead);
VirtualProtectEx(process, mbi.BaseAddress, mbi.RegionSize, oldprotect, &oldprotect);
char* internalAddress = InScan(pattern, mask, buffer, bytesRead);
if (internalAddress != 0)
{
//calculate from internal to external
__int64 offsetFromBuffer = internalAddress - buffer;
match = currentChunk + offsetFromBuffer;
delete[] buffer;
break;
}
}
}
currentChunk = currentChunk + mbi.RegionSize;
if (buffer) delete[] buffer;
buffer = 0;
}
return match;
}

How does one locate a pointer error?

I am attempting to create a program to create a Markov chain but I am having pointer problems. When I run the Program I get a segmentation fault.
#include <stdio.h>
#include <cstring>
#include <cstdlib>
struct word;
struct nextword
{
word* sourceword;
word* next = 0;
};
int wordcount;
struct word
{
char* wordstr;
struct word* next = 0;
nextword* followingword = 0;
int nextwordcount = 0;
};
int main()
{
word* firstword = 0;
char * buffer = 0;
long length;
FILE * f = fopen ("alice.txt", "rb");
if (f)
{
fseek (f, 0, SEEK_END);
length = ftell (f);
fseek (f, 0, SEEK_SET);
buffer = (char *)malloc (length);
if (buffer)
{
fread (buffer, 1, length, f);
}
fclose (f);
}
if (buffer)
{
char wordbuffer[500];
int fileindex = 0;
while(fileindex < length-1)
{
int wordindex = 0;
while(buffer[fileindex] != ' ')
{
wordbuffer[wordindex] = buffer[fileindex];
wordindex++;
fileindex++;
}
if(wordindex != 0)
{
wordbuffer[wordindex] = '\0';
word* newword = (word*)malloc(sizeof(word));
char* newwordstr = (char*)malloc((strlen(wordbuffer)+1)*sizeof(char));
strcpy(newword->wordstr, newwordstr);
if(!firstword)
{
firstword = newword;
}
else
{
word* testword = firstword;
while(!testword->next)
{
testword = (testword->next);
}
testword->next = newword;
printf(newword->wordstr);
}
}
return 0;
}
}
else
{
return 1;
}
}
I attempted to remove the file reading part and replace it with a hard coded string, but the problem remained.
You might want to read about STL and use a list. Or use a C list, see a couple of examples,
Adding node in front of linklist
How to pop element from tail in linked list?
Trying to make linkedlist in C
Several problems. Fixed some. compiles.
I have annotated the code with places where you need to fix bounds checking, and the big problem was likely the strcpy to the struct word->wordstr uninitialized char*,
#include <stdio.h>
#include <cstring>
#include <cstdlib>
struct word;
struct nextword
{
word* sourceword;
word* next = 0;
};
int wordcount;
struct word
{
char* wordstr; //what do you think this pointer points to?
struct word* next = 0;
nextword* followingword = 0;
int nextwordcount = 0;
};
int main()
{
FILE* fh = NULL;
word* firstword = 0;
char* buffer = 0;
char* fname = "alice.txt";
long length = 0; //you did not initialize length
if ( (fh = fopen ("alice.txt", "rb")) )
{
//why not use fstat to get file size?
//why not use mmap to read file?
fseek (fh, 0, SEEK_END);
length = ftell (fh); //ok, length set here
fseek (fh, 0, SEEK_SET);
if( (buffer = (char *)malloc (length)) )
{
fread (buffer, 1, length, fh);
}
fclose (fh);
}
else
{
printf("error: cannot open %s",fname);
exit(1);
}
printf("read %s, %ld\n",fname,length);
if (!buffer)
{
printf("error: cannot open %s",fname);
exit(1);
//use exit, to return from main() //return 1;
}
//already checked buffer
{
int fileindex = 0;
//put wordbuffer after fileindex, avoids stackoverflow overwrite
char wordbuffer[500]; //500 bytes on stack, initialize?
memset(wordbuffer,0,sizeof(wordbuffer));
while(fileindex < length-1)
{
int wordindex = 0;
//several errors in this line, check for null terminator,
//check for newline, tab, basically any whitespace
//while(buffer[fileindex] != ' ')
while( buffer[fileindex] && buffer[fileindex] != ' ' )
{
wordbuffer[wordindex] = buffer[fileindex];
wordindex++;
fileindex++;
//here is another error, do not overflow your stack based buffer
if( wordindex>sizeof(buffer)-1 ) break; //do not overflow buffer
}
wordbuffer[wordindex] = '\0'; //terminate wordbuffer
//since you chose wordindex signed, you want it > 0
if(wordindex > 0)
{
//use a constructor
word* newword = (word*)malloc(sizeof(word));
//use a constructor
//or just use strdup, since it is just a cstring
char* newwordstr = strdup(wordbuffer);
//no, just set pointer to the above allocated string
//strcpy(newword->wordstr, newwordstr);
newword->wordstr = newwordstr;
if(!firstword)
{
firstword = newword;
}
else
{
word* testword = firstword;
while(!testword->next)
{
testword = (testword->next);
}
testword->next = newword;
printf(newword->wordstr);
}
}
return 0;
}
}
exit(0); //done
}
This compiles and runs without error, you need to look up linked list handling. You should implement a linked list, and then add word elements to list.

How to use LZMA SDK in C++?

i have difficulties in using LZMA SDK in my application.
I would like to create a kind of single file compression tool. I dont need any directory support, just need only the LZMA2 stream. But i have no idea on how LZMA SDK is to be used for this.
Please can anyone give me a little example on how the LZMA SDK can be used under C++?
I think that it's a properly little example to use LZMA SDK.
/* LzmaUtil.c -- Test application for LZMA compression
2008-08-05
Igor Pavlov
public domain */
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../LzmaDec.h"
#include "../LzmaEnc.h"
#include "../Alloc.h"
const char *kCantReadMessage = "Can not read input file";
const char *kCantWriteMessage = "Can not write output file";
const char *kCantAllocateMessage = "Can not allocate memory";
const char *kDataErrorMessage = "Data error";
static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
static void SzFree(void *p, void *address) { p = p; MyFree(address); }
static ISzAlloc g_Alloc = { SzAlloc, SzFree };
#define kInBufferSize (1 << 15)
#define kOutBufferSize (1 << 15)
unsigned char g_InBuffer[kInBufferSize];
unsigned char g_OutBuffer[kOutBufferSize];
size_t MyReadFile(FILE *file, void *data, size_t size)
{ return fread(data, 1, size, file); }
int MyReadFileAndCheck(FILE *file, void *data, size_t size)
{ return (MyReadFile(file, data, size) == size); }
size_t MyWriteFile(FILE *file, const void *data, size_t size)
{
if (size == 0)
return 0;
return fwrite(data, 1, size, file);
}
int MyWriteFileAndCheck(FILE *file, const void *data, size_t size)
{ return (MyWriteFile(file, data, size) == size); }
long MyGetFileLength(FILE *file)
{
long length;
fseek(file, 0, SEEK_END);
length = ftell(file);
fseek(file, 0, SEEK_SET);
return length;
}
void PrintHelp(char *buffer)
{
strcat(buffer, "\nLZMA Utility 4.58 Copyright (c) 1999-2008 Igor Pavlov 2008-04-11\n"
"\nUsage: lzma <e|d> inputFile outputFile\n"
" e: encode file\n"
" d: decode file\n");
}
int PrintError(char *buffer, const char *message)
{
strcat(buffer, "\nError: ");
strcat(buffer, message);
strcat(buffer, "\n");
return 1;
}
int PrintErrorNumber(char *buffer, SRes val)
{
sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val);
return 1;
}
int PrintUserError(char *buffer)
{
return PrintError(buffer, "Incorrect command");
}
#define IN_BUF_SIZE (1 << 16)
#define OUT_BUF_SIZE (1 << 16)
static int Decode(FILE *inFile, FILE *outFile, char *rs)
{
UInt64 unpackSize;
int thereIsSize; /* = 1, if there is uncompressed size in headers */
int i;
int res = 0;
CLzmaDec state;
/* header: 5 bytes of LZMA properties and 8 bytes of uncompressed size */
unsigned char header[LZMA_PROPS_SIZE + 8];
/* Read and parse header */
if (!MyReadFileAndCheck(inFile, header, sizeof(header)))
return PrintError(rs, kCantReadMessage);
unpackSize = 0;
thereIsSize = 0;
for (i = 0; i < 8; i++)
{
unsigned char b = header[LZMA_PROPS_SIZE + i];
if (b != 0xFF)
thereIsSize = 1;
unpackSize += (UInt64)b << (i * 8);
}
LzmaDec_Construct(&state);
res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
if (res != SZ_OK)
return res;
{
Byte inBuf[IN_BUF_SIZE];
Byte outBuf[OUT_BUF_SIZE];
size_t inPos = 0, inSize = 0, outPos = 0;
LzmaDec_Init(&state);
for (;;)
{
if (inPos == inSize)
{
inSize = MyReadFile(inFile, inBuf, IN_BUF_SIZE);
inPos = 0;
}
{
SizeT inProcessed = inSize - inPos;
SizeT outProcessed = OUT_BUF_SIZE - outPos;
ELzmaFinishMode finishMode = LZMA_FINISH_ANY;
ELzmaStatus status;
if (thereIsSize && outProcessed > unpackSize)
{
outProcessed = (SizeT)unpackSize;
finishMode = LZMA_FINISH_END;
}
res = LzmaDec_DecodeToBuf(&state, outBuf + outPos, &outProcessed,
inBuf + inPos, &inProcessed, finishMode, &status);
inPos += (UInt32)inProcessed;
outPos += outProcessed;
unpackSize -= outProcessed;
if (outFile != 0)
MyWriteFile(outFile, outBuf, outPos);
outPos = 0;
if (res != SZ_OK || thereIsSize && unpackSize == 0)
break;
if (inProcessed == 0 && outProcessed == 0)
{
if (thereIsSize || status != LZMA_STATUS_FINISHED_WITH_MARK)
res = SZ_ERROR_DATA;
break;
}
}
}
}
LzmaDec_Free(&state, &g_Alloc);
return res;
}
typedef struct _CFileSeqInStream
{
ISeqInStream funcTable;
FILE *file;
} CFileSeqInStream;
static SRes MyRead(void *p, void *buf, size_t *size)
{
if (*size == 0)
return SZ_OK;
*size = MyReadFile(((CFileSeqInStream*)p)->file, buf, *size);
/*
if (*size == 0)
return SZE_FAIL;
*/
return SZ_OK;
}
typedef struct _CFileSeqOutStream
{
ISeqOutStream funcTable;
FILE *file;
} CFileSeqOutStream;
static size_t MyWrite(void *pp, const void *buf, size_t size)
{
return MyWriteFile(((CFileSeqOutStream *)pp)->file, buf, size);
}
static SRes Encode(FILE *inFile, FILE *outFile, char *rs)
{
CLzmaEncHandle enc;
SRes res;
CFileSeqInStream inStream;
CFileSeqOutStream outStream;
CLzmaEncProps props;
enc = LzmaEnc_Create(&g_Alloc);
if (enc == 0)
return SZ_ERROR_MEM;
inStream.funcTable.Read = MyRead;
inStream.file = inFile;
outStream.funcTable.Write = MyWrite;
outStream.file = outFile;
LzmaEncProps_Init(&props);
res = LzmaEnc_SetProps(enc, &props);
if (res == SZ_OK)
{
Byte header[LZMA_PROPS_SIZE + 8];
size_t headerSize = LZMA_PROPS_SIZE;
UInt64 fileSize;
int i;
res = LzmaEnc_WriteProperties(enc, header, &headerSize);
fileSize = MyGetFileLength(inFile);
for (i = 0; i < 8; i++)
header[headerSize++] = (Byte)(fileSize >> (8 * i));
if (!MyWriteFileAndCheck(outFile, header, headerSize))
return PrintError(rs, "writing error");
if (res == SZ_OK)
res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable,
NULL, &g_Alloc, &g_Alloc);
}
LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
return res;
}
int main2(int numArgs, const char *args[], char *rs)
{
FILE *inFile = 0;
FILE *outFile = 0;
char c;
int res;
int encodeMode;
if (numArgs == 1)
{
PrintHelp(rs);
return 0;
}
if (numArgs < 3 || numArgs > 4 || strlen(args[1]) != 1)
return PrintUserError(rs);
c = args[1][0];
encodeMode = (c == 'e' || c == 'E');
if (!encodeMode && c != 'd' && c != 'D')
return PrintUserError(rs);
{
size_t t4 = sizeof(UInt32);
size_t t8 = sizeof(UInt64);
if (t4 != 4 || t8 != 8)
return PrintError(rs, "LZMA UTil needs correct UInt32 and UInt64");
}
inFile = fopen(args[2], "rb");
if (inFile == 0)
return PrintError(rs, "Can not open input file");
if (numArgs > 3)
{
outFile = fopen(args[3], "wb+");
if (outFile == 0)
return PrintError(rs, "Can not open output file");
}
else if (encodeMode)
PrintUserError(rs);
if (encodeMode)
{
res = Encode(inFile, outFile, rs);
}
else
{
res = Decode(inFile, outFile, rs);
}
if (outFile != 0)
fclose(outFile);
fclose(inFile);
if (res != SZ_OK)
{
if (res == SZ_ERROR_MEM)
return PrintError(rs, kCantAllocateMessage);
else if (res == SZ_ERROR_DATA)
return PrintError(rs, kDataErrorMessage);
else
return PrintErrorNumber(rs, res);
}
return 0;
}
int MY_CDECL main(int numArgs, const char *args[])
{
char rs[800] = { 0 };
int res = main2(numArgs, args, rs);
printf(rs);
return res;
}
Also you can see it at:
http://read.pudn.com/downloads151/sourcecode/zip/656407/7z460/C/LzmaUtil/LzmaUtil.c__.htm
http://read.pudn.com/downloads157/sourcecode/zip/698262/LZMA/LzmaUtil.c__.htm
I recently found a nice example, written in C++. Credit goes to GH user Treeki who published the original gist:
// note: -D_7ZIP_ST is required when compiling on non-Windows platforms
// g++ -o lzma_sample -std=c++14 -D_7ZIP_ST lzma_sample.cpp LzmaDec.c LzmaEnc.c LzFind.c
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <memory>
#include "LzmaEnc.h"
#include "LzmaDec.h"
static void *_lzmaAlloc(ISzAllocPtr, size_t size) {
return new uint8_t[size];
}
static void _lzmaFree(ISzAllocPtr, void *addr) {
if (!addr)
return;
delete[] reinterpret_cast<uint8_t *>(addr);
}
static ISzAlloc _allocFuncs = {
_lzmaAlloc, _lzmaFree
};
std::unique_ptr<uint8_t[]> lzmaCompress(const uint8_t *input, uint32_t inputSize, uint32_t *outputSize) {
std::unique_ptr<uint8_t[]> result;
// set up properties
CLzmaEncProps props;
LzmaEncProps_Init(&props);
if (inputSize >= (1 << 20))
props.dictSize = 1 << 20; // 1mb dictionary
else
props.dictSize = inputSize; // smaller dictionary = faster!
props.fb = 40;
// prepare space for the encoded properties
SizeT propsSize = 5;
uint8_t propsEncoded[5];
// allocate some space for the compression output
// this is way more than necessary in most cases...
// but better safe than sorry
// (a smarter implementation would use a growing buffer,
// but this requires a bunch of fuckery that is out of
/// scope for this simple example)
SizeT outputSize64 = inputSize * 1.5;
if (outputSize64 < 1024)
outputSize64 = 1024;
auto output = std::make_unique<uint8_t[]>(outputSize64);
int lzmaStatus = LzmaEncode(
output.get(), &outputSize64, input, inputSize,
&props, propsEncoded, &propsSize, 0,
NULL,
&_allocFuncs, &_allocFuncs);
*outputSize = outputSize64 + 13;
if (lzmaStatus == SZ_OK) {
// tricky: we have to generate the LZMA header
// 5 bytes properties + 8 byte uncompressed size
result = std::make_unique<uint8_t[]>(outputSize64 + 13);
uint8_t *resultData = result.get();
memcpy(resultData, propsEncoded, 5);
for (int i = 0; i < 8; i++)
resultData[5 + i] = (inputSize >> (i * 8)) & 0xFF;
memcpy(resultData + 13, output.get(), outputSize64);
}
return result;
}
std::unique_ptr<uint8_t[]> lzmaDecompress(const uint8_t *input, uint32_t inputSize, uint32_t *outputSize) {
if (inputSize < 13)
return NULL; // invalid header!
// extract the size from the header
UInt64 size = 0;
for (int i = 0; i < 8; i++)
size |= (input[5 + i] << (i * 8));
if (size <= (256 * 1024 * 1024)) {
auto blob = std::make_unique<uint8_t[]>(size);
ELzmaStatus lzmaStatus;
SizeT procOutSize = size, procInSize = inputSize - 13;
int status = LzmaDecode(blob.get(), &procOutSize, &input[13], &procInSize, input, 5, LZMA_FINISH_END, &lzmaStatus, &_allocFuncs);
if (status == SZ_OK && procOutSize == size) {
*outputSize = size;
return blob;
}
}
return NULL;
}
void hexdump(const uint8_t *buf, int size) {
int lines = (size + 15) / 16;
for (int i = 0; i < lines; i++) {
printf("%08x | ", i * 16);
int lineMin = i * 16;
int lineMax = lineMin + 16;
int lineCappedMax = (lineMax > size) ? size : lineMax;
for (int j = lineMin; j < lineCappedMax; j++)
printf("%02x ", buf[j]);
for (int j = lineCappedMax; j < lineMax; j++)
printf(" ");
printf("| ");
for (int j = lineMin; j < lineCappedMax; j++) {
if (buf[j] >= 32 && buf[j] <= 127)
printf("%c", buf[j]);
else
printf(".");
}
printf("\n");
}
}
void testIt(const uint8_t *input, int size) {
printf("Test Input:\n");
hexdump(input, size);
uint32_t compressedSize;
auto compressedBlob = lzmaCompress(input, size, &compressedSize);
if (compressedBlob) {
printf("Compressed:\n");
hexdump(compressedBlob.get(), compressedSize);
} else {
printf("Nope, we screwed it\n");
return;
}
// let's try decompressing it now
uint32_t decompressedSize;
auto decompressedBlob = lzmaDecompress(compressedBlob.get(), compressedSize, &decompressedSize);
if (decompressedBlob) {
printf("Decompressed:\n");
hexdump(decompressedBlob.get(), decompressedSize);
} else {
printf("Nope, we screwed it (part 2)\n");
return;
}
printf("----------\n");
}
void testIt(const char *string) {
testIt((const uint8_t *)string, strlen(string));
}
int main(int argc, char **argv) {
testIt("a");
testIt("here is a cool string");
testIt("here's something that should compress pretty well: abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdef");
return 0;
}
You can refer to this file on how to use lzma2。
https://github.com/Tencent/libpag/blob/aab6391e455193c8ec5b8e2031b495b3fe77b034/test/framework/utils/LzmaUtil.cpp
/////////////////////////////////////////////////////////////////////////////////////////////////
//
// Tencent is pleased to support the open source community by making libpag available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// unless required by applicable law or agreed to in writing, software distributed under the
// license is distributed on an "as is" basis, without warranties or conditions of any kind,
// either express or implied. see the license for the specific language governing permissions
// and limitations under the license.
//
/////////////////////////////////////////////////////////////////////////////////////////////////
#include "LzmaUtil.h"
#include "test/framework/lzma/Lzma2DecMt.h"
#include "test/framework/lzma/Lzma2Enc.h"
namespace pag {
static void* LzmaAlloc(ISzAllocPtr, size_t size) {
return new uint8_t[size];
}
static void LzmaFree(ISzAllocPtr, void* address) {
if (!address) {
return;
}
delete[] reinterpret_cast<uint8_t*>(address);
}
static ISzAlloc gAllocFuncs = {LzmaAlloc, LzmaFree};
class SequentialOutStream {
public:
virtual ~SequentialOutStream() = default;
virtual bool write(const void* data, size_t size) = 0;
};
class SequentialInStream {
public:
virtual ~SequentialInStream() = default;
virtual bool read(void* data, size_t size, size_t* processedSize) = 0;
};
struct CSeqInStreamWrap {
ISeqInStream vt;
std::unique_ptr<SequentialInStream> inStream;
};
struct CSeqOutStreamWrap {
ISeqOutStream vt;
std::unique_ptr<SequentialOutStream> outStream;
};
class BuffPtrInStream : public SequentialInStream {
public:
explicit BuffPtrInStream(const uint8_t* buffer, size_t bufferSize)
: buffer(buffer), bufferSize(bufferSize) {
}
bool read(void* data, size_t size, size_t* processedSize) override {
if (processedSize) {
*processedSize = 0;
}
if (size == 0 || position >= bufferSize) {
return true;
}
auto remain = bufferSize - position;
if (remain > size) {
remain = size;
}
memcpy(data, static_cast<const uint8_t*>(buffer) + position, remain);
position += remain;
if (processedSize) {
*processedSize = remain;
}
return true;
}
private:
const uint8_t* buffer = nullptr;
size_t bufferSize = 0;
size_t position = 0;
};
class VectorOutStream : public SequentialOutStream {
public:
explicit VectorOutStream(std::vector<uint8_t>* buffer) : buffer(buffer) {
}
bool write(const void* data, size_t size) override {
auto oldSize = buffer->size();
buffer->resize(oldSize + size);
memcpy(&(*buffer)[oldSize], data, size);
return true;
}
private:
std::vector<uint8_t>* buffer;
};
class BuffPtrSeqOutStream : public SequentialOutStream {
public:
BuffPtrSeqOutStream(uint8_t* buffer, size_t size) : buffer(buffer), bufferSize(size) {
}
bool write(const void* data, size_t size) override {
auto remain = bufferSize - position;
if (remain > size) {
remain = size;
}
if (remain != 0) {
memcpy(buffer + position, data, remain);
position += remain;
}
return remain != 0 || size == 0;
}
private:
uint8_t* buffer = nullptr;
size_t bufferSize = 0;
size_t position = 0;
};
static const size_t kStreamStepSize = 1 << 31;
static SRes MyRead(const ISeqInStream* p, void* data, size_t* size) {
CSeqInStreamWrap* wrap = CONTAINER_FROM_VTBL(p, CSeqInStreamWrap, vt);
auto curSize = (*size < kStreamStepSize) ? *size : kStreamStepSize;
if (!wrap->inStream->read(data, curSize, &curSize)) {
return SZ_ERROR_READ;
}
*size = curSize;
return SZ_OK;
}
static size_t MyWrite(const ISeqOutStream* p, const void* buf, size_t size) {
auto* wrap = CONTAINER_FROM_VTBL(p, CSeqOutStreamWrap, vt);
if (wrap->outStream->write(buf, size)) {
return size;
}
return 0;
}
class Lzma2Encoder {
public:
Lzma2Encoder() {
encoder = Lzma2Enc_Create(&gAllocFuncs, &gAllocFuncs);
}
~Lzma2Encoder() {
Lzma2Enc_Destroy(encoder);
}
std::shared_ptr<Data> code(const std::shared_ptr<Data>& inputData) {
if (encoder == nullptr || inputData == nullptr || inputData->size() == 0) {
return nullptr;
}
auto inputSize = inputData->size();
CLzma2EncProps lzma2Props;
Lzma2EncProps_Init(&lzma2Props);
lzma2Props.lzmaProps.dictSize = inputSize;
lzma2Props.lzmaProps.level = 9;
lzma2Props.numTotalThreads = 4;
Lzma2Enc_SetProps(encoder, &lzma2Props);
std::vector<uint8_t> outBuf;
outBuf.resize(1 + 8);
outBuf[0] = Lzma2Enc_WriteProperties(encoder);
for (int i = 0; i < 8; i++) {
outBuf[1 + i] = static_cast<uint8_t>(inputSize >> (8 * i));
}
CSeqInStreamWrap inWrap = {};
inWrap.vt.Read = MyRead;
inWrap.inStream = std::make_unique<BuffPtrInStream>(
static_cast<const uint8_t*>(inputData->data()), inputSize);
CSeqOutStreamWrap outStream = {};
outStream.vt.Write = MyWrite;
outStream.outStream = std::make_unique<VectorOutStream>(&outBuf);
auto status =
Lzma2Enc_Encode2(encoder, &outStream.vt, nullptr, nullptr, &inWrap.vt, nullptr, 0, nullptr);
if (status != SZ_OK) {
return nullptr;
}
return Data::MakeWithCopy(&outBuf[0], outBuf.size());
}
private:
CLzma2EncHandle encoder = nullptr;
};
std::shared_ptr<Data> LzmaUtil::Compress(const std::shared_ptr<Data>& pixelData) {
Lzma2Encoder encoder;
return encoder.code(pixelData);
}
class Lzma2Decoder {
public:
Lzma2Decoder() {
decoder = Lzma2DecMt_Create(&gAllocFuncs, &gAllocFuncs);
}
~Lzma2Decoder() {
if (decoder) {
Lzma2DecMt_Destroy(decoder);
}
}
std::shared_ptr<Data> code(const std::shared_ptr<Data>& inputData) {
if (decoder == nullptr || inputData == nullptr || inputData->size() == 0) {
return nullptr;
}
auto input = static_cast<const uint8_t*>(inputData->data());
auto inputSize = inputData->size() - 9;
Byte prop = static_cast<const Byte*>(input)[0];
CLzma2DecMtProps props;
Lzma2DecMtProps_Init(&props);
props.inBufSize_ST = inputSize;
props.numThreads = 1;
UInt64 outBufferSize = 0;
for (int i = 0; i < 8; i++) {
outBufferSize |= (input[1 + i] << (i * 8));
}
auto outBuffer = new uint8_t[outBufferSize];
CSeqInStreamWrap inWrap = {};
inWrap.vt.Read = MyRead;
inWrap.inStream = std::make_unique<BuffPtrInStream>(input + 9, inputSize);
CSeqOutStreamWrap outWrap = {};
outWrap.vt.Write = MyWrite;
outWrap.outStream = std::make_unique<BuffPtrSeqOutStream>(outBuffer, outBufferSize);
UInt64 inProcessed = 0;
int isMT = false;
auto res = Lzma2DecMt_Decode(decoder, prop, &props, &outWrap.vt, &outBufferSize, 1, &inWrap.vt,
&inProcessed, &isMT, nullptr);
if (res == SZ_OK && inputSize == inProcessed) {
return Data::MakeAdopted(outBuffer, outBufferSize, Data::DeleteProc);
}
delete[] outBuffer;
return nullptr;
}
private:
CLzma2DecMtHandle decoder = nullptr;
};
std::shared_ptr<Data> LzmaUtil::Decompress(const std::shared_ptr<Data>& data) {
Lzma2Decoder decoder;
return decoder.code(data);
}
} // namespace pag

Reading Multiple Files Parallel into a buffer

Am working in a project where i have to read a set of files and put it in a buffer.The List comprises of small as well as large files.I have to read these files and for more efficiency i tried implementing it in multiple threads.Each thread will take a file from vector of file names and start reading it put it into a buffer and these buffer have to be put in a queue.I happened to have some error in program and i i don't know where exactly in my program the error occurs also don't know why ? Please help me whether there is any mistake in my logic or in my code and how to correct it. Thanks in advance
using namespace std;
#define MAX_THREADS 2
#define BUFFER_SIZE 8388608
vector<string>files;
deque<string>bufferq;
CRITICAL_SECTION Readlock;
int count = 0;
DWORD WINAPI ReadThread(LPVOID s);
int main(int argc,char *argv[])
{
HANDLE ReadT[MAX_THREADS];
char *filelist[5];
DWORD threadid;
filelist[0] = "1.txt";
filelist[1] = "cloudy.jpg";
filelist[2] = "connectify.exe";
filelist[3] = "VMware.exe";
filelist[4] = "Sherlock.mp4";
for(int i=0;i<5;i++)
files.push_back(filelist[i]);
InitializeCriticalSection(&Readlock);
long t1 = GetTickCount();
for(int k = 0; k< MAX_THREADS; k++)
ReadT[k] = CreateThread(NULL,0,ReadThread,NULL,NULL,&threadid);
WaitForMultipleObjects(MAX_THREADS,ReadT,TRUE,INFINITE);
cout << " Time Taken "<< GetTickCount()-t1 << "ms" ;
system("pause");
return 0;
}
DWORD WINAPI ReadThread(LPVOID s)
{
long pending = 0;
//int freespace = BUFFER_SIZE;
char *filename = new char[50];
char fsize[10];
string file;
char *buf;
buf = new char[BUFFER_SIZE];
long filesize = 0;
int numfiles = files.size();
int filled = 0;
int i = 0;
FILE *fp;
char* ptr;
ptr = buf;
while(true)
{
EnterCriticalSection(&Readlock);
if(files.empty())
{
LeaveCriticalSection(&Readlock);
break;
}
else
{
file = files.front();
files.erase(files.begin());
LeaveCriticalSection(&Readlock);
}
bool buff_full = false;
buf = ptr;
int freespace = BUFFER_SIZE;
memset(buf,0,BUFFER_SIZE);
if(!buff_full)
{
if(pending == 0)
{
fp = fopen(file.c_str(),"rb");
if(!fp)
{
cout<<"\nNo such file";
cout<<files[i];
system("pause");
return 0;
}
int r1 =fseek(fp, 0L, SEEK_END);
filesize = ftell(fp);
int r2 =fseek(fp, 0L, SEEK_SET);
sprintf(fsize, "%ld", filesize);
if(freespace >= (strlen(fsize) + strlen(file.c_str()) + 2))
{
count++;
memcpy(buf, file.c_str(), strlen(file.c_str())+1);
freespace = freespace - strlen(file.c_str()) - 1;
buf += strlen(file.c_str()) + 1;
memcpy(buf,fsize,strlen(fsize)+1);
buf += strlen(fsize) + 1;
freespace = freespace - strlen(fsize) - 1;
cout<<"Files read is "<<count<<"\n";
if(freespace == 0)
{
buff_full = true;
pending = filesize;
break;
}
}
else
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
if(freespace >= filesize)
{
fread(buf, 1, filesize, fp);
buf += filesize;
freespace = freespace - filesize;
bufferq.push_back(buf);
//cout << "pop"<<bufferq.size();
//i++;
if(files.empty())
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
fclose(fp);
}
else
{
fread(buf, 1, freespace, fp);
bufferq.push_back(buf);
//cout <<"pop "<<bufferq.size();
buff_full = true;
}
}
else
{
if(freespace >= pending)
{
fread(buf, 1, pending, fp);
bufferq.push_back(buf);
freespace = freespace - pending;
pending = 0;
//i++;
if(files.empty())
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
if(freespace > 0)
buf += pending;
else
buff_full = true;
fclose(fp);
}
else
{
fread(buf, 1, freespace, fp);
bufferq.push_back(buf);
cout << bufferq.size();
pending = pending - freespace;
buff_full = true;
}
}
}
if(buff_full)
{
buf = ptr;
cout << "popping buffer " << bufferq.size();
//bufferq.pop_back();
}
}
return 0;
}
In the context that bug occurs on big files, I suppose that this line can cause problems
sprintf(fsize, "%ld", filesize);
fsize is char[10], and if filesize is >= 1,000,000,000 you'll overwrite fsize array with trailing 0. This will cause "Run-Time Check Failure #2 - Stack around the variable 'fsize' was corrupted.", as you wrote. Please check the sizes of your test files.
Among others, you are filling files in loop on i, and then you wrote:
files.erase(files.begin());
// ...
cout<<"\nNo such file";
cout<<files[i];
files[i] already points to another element as you erased them, and if files are empty on the last iteration it will cause crash.
And what for are you copying file and fsize to buf if you do not copy it to the bufferq?
As bufferq is writable and is shared between threads the access to it should be protected by lock, critical section as you chose.
That's my little code review.

c++ socket binary file

i have this function to get the content of file ,
#define BUFSIZE 512
vector<int> getContFile(char* pFile) {
ifstream vCin(pFile, ios::binary);
ifstream::pos_type size;
// get vLength of file:
vCin.seekg(0, ios::end);
size = vCin.tellg();
vCin.seekg(0, ios::beg);
vector<int> vTmp;
for (int i = 0; i < size; i++)
vTmp.push_back(vCin.get());
vCin.close();
return vTmp;
}
and this to send to the server
void SendFile() {
SendS("upFileUser");
int i;
vector<int> vTmp = getContFile("/usr/home/alex/Desktop/eval.tar");
for (i = 0; i < vTmp.size(); i += BUFSIZE) {
char *vBuff = new char[BUFSIZE];
for (int j = i; j < BUFSIZE; j++)
vBuff[j] = (char(vTmp[i]));
SendS(vBuff);
}
if (i < (vTmp.size() - 1)) {
char *vBuff = new char[vTmp.size() - i];
for (int j = 0; j < vTmp.size() - i; j++)
vBuff[j + i] = (char(vTmp[j + i]));
SendS(vBuff);
}
sendS("endOfFileTransmision");
}
void SendS(char* pSir) {
int vLen = strlen(pSir);
write(pSocket, &vLen, sizeof (int));
write(pSocket, pSir, vLen);
}
this is the receve function
char* reciveS() {
char* vTmp;
int vCt = 0;
read(pSocket, &vCt, sizeof (vCt));
if (vCt != 0) {
vTmp = new char[vCt];
read(vSocket, vTmp, vCt);
} else {
vTmp = NULL;
}
return vTmp;
}
bool receveFile(void) {
char* vReceve = reciveS();
if (strcmp(vReceve, "upFileUser") == 0)
{
ofstream vCoutFile;
vCoutFile.open("data2.tar", ios::out | ios::binary);
while (true) {
char *vTmp = new char[BUFSIZ];
vTmp = reciveS();
cout<<vTmp;
if (strcmp(vTmp, "endOfFileTransmision") == 0) break;
else {
cout << vTmp;
vCoutFile << vTmp;
}
}
vCoutFile.close();
}
}
and the result are a broke pipe(i run this to freebsd 6.4 amd with g++ compiler) , so what i miss , the connection are good i can transfer text from client to server and reverse the problem are with binary file
I see two problems with your code:
You are making a lot of allocations (new) but you never free the memory.
In the SendS function you are taking the string length, but the data in that "string" is from a vector of integers and is binary. This means that the data can contain the string-terminating '\0' character (the integer 0).
Besides that, I really don't follow what you are doing. Instead of reading into a vector, create a char-buffer and allocate enough memory to put the whole file into that buffer (char *buffer = new char[length_of_file]) and send it, with the length of the buffer first.
Something like this:
std::pair<size_t, char *> getContFile(const char *pFile)
{
ifstream vCin(pFile, ios::binary);
ifstream::pos_type size;
vCin.seekg(0, ios::end);
size = vCin.tellg();
vCin.seekg(0, ios::beg);
char *buffer = new char[size];
vCin.read(buffer, size);
return std::make_pair(static_cast<size_t>(size), buffer);
}
void SendFile()
{
SendS("upFileUser", strlen("upFileUser"));
std::pair<size_t, char *> vTmp = getContFile("/usr/home/alex/Desktop/eval.tar");
SendS(vTmp.second, vTmp.first);
delete [] vTmp.second;
}
void SendS(char *buffer, size_t length)
{
// Send the length
size_t tmp = htonl(length);
write(pSocket, &tmp, sizeof(tmp));
// Send the buffer
while (length > 0)
{
ssize_t sent = write(pSocket, buffer, length);
if (sent <= 0)
{
// Some kind of error
break;
}
buffer += sent;
length -= sent;
}
}
Do something similar on the receiving side.