ifstream read reading random char for the whole stream - c++

I'm trying to implement the function from listing 5.1 here
but when copying into a buffer with read from a file I just get the same character (Í) for the whole array, where string.txt is a copy and paste from the previous link content.
Here is my code:
#include <iostream>
#include <fstream>
#include <string>
#include <cinttypes>
#include <cstdio>
#include <cstring>
const int block_size = 0x4000; //16KB
int search(char* buffer, int searchLength, char* stringToSearch, int stringToSearchLength) {
char * potentialMatch;
while (searchLength) {
potentialMatch = reinterpret_cast<char *>(memchr(buffer, *stringToSearch, searchLength));
if (potentialMatch == NULL)
break;
if (stringToSearchLength == 1) {
return 1;
} else {
if (!memcmp(potentialMatch + 1, stringToSearch + 1, stringToSearchLength - 1))
return 1;
}
searchLength -= potentialMatch - buffer + 1;
buffer = potentialMatch + 1;
}
return 0;
}
int main(int argc, char* argv[]) {
char *toSearch = "Interpreting Where";
int done = 0;
int found = 0;
char *buffer;
int64_t fileSizeLeft = 0;
std::ifstream myFile("string.txt");
if (!myFile.fail()) {
buffer = new char[block_size];
myFile.seekg(0, std::ios::end); //Get file's size
fileSizeLeft = myFile.tellg();
} else {
std::cout << "Cannot open file" << std::endl;
return 1;
}
int toSearchLength = strlen(toSearch);
int stringLeft = toSearchLength - 1;
int first_time = 1;
while (!done && fileSizeLeft > toSearchLength) {
if (first_time) {
myFile.read(buffer, block_size);
found = search(buffer, block_size, toSearch, toSearchLength);
} else {
memcpy(buffer, buffer + stringLeft, stringLeft);
myFile.read(buffer+stringLeft, fileSizeLeft-stringLeft);
found = search(buffer, block_size, toSearch, toSearchLength);
}
fileSizeLeft = fileSizeLeft - block_size;
first_time = 0;
}
if (found) {
std::cout << "String found" << std::endl;
} else {
std::cout << "String not found" << std::endl;
}
myFile.close();
delete[] buffer;
return 0;
}
I hope you can help me see what I'm doing wrong, thanks!

You are setting myFile's position to ios_base::end with seekg:
myFile.seekg(0, ios::end);
Then trying to read from it:
myFile.read(buffer, block_size);
Clearly no data will be read since myFile is already at ios_base::end. And you'll be reading whatever uninitialized data that was already in buffer
What you probably intended to do was to set your myFile position back to the beginning by doing this before reading:
myFile.seekg(0, ios::beg);

Related

How to use mmap for integer input?

I have coded a program that uses mmap as input to fill a integer 2D vector from a .txt file. The code is part of a larger program and will be submitted to a competition. Is there a way to improve the speed using mmap, or by using a different way all together? Here is the code:
#include <fstream>
#include <vector>
#include <algorithm>
#include <cstring>
#include <iostream>
// for mmap:
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
using namespace std;
const char* map_file(const char* fname, size_t& length);
int main()
{
auto start = std::chrono::high_resolution_clock::now();
size_t length;
auto f = map_file("erasmus.in", length);
auto l = f + length;
int i = 0;
bool flag = false;
string lines;
vector<vector<int> > students(10000); //The number of lines is predefined
const char* temp;
while (f && f!=l) {
string element = "";
temp = static_cast<const char*>(memchr(f, '\n', l-f));
for(f = f; f<=temp; f++)
{
if(!isspace(*f))
{
element += *f;
flag = true;
}
if(isspace(*f) && flag == true)
{
flag = false;
int assigned_element = stoi(element);
students[i].push_back(assigned_element);
element = "";
}
}
i++;
temp++;
}
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed1 = finish - start;
FILE* output = fopen("erasmus.out", "w");
for (int i = 0; i < students.size(); i++)
{
for (int j = 0; j < students[i].size(); j++)
{
fprintf(output, "%d ", students[i][j]);
}
fprintf(output, "\n");
}
std::cout << "Elapsed time: " << elapsed1.count() << " s\n";
return 0;
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* directory, size_t& length)
{
int fileDirectory = open(directory, O_RDONLY);
if (fileDirectory == -1)
handle_error("open");
// obtain file size
struct stat sb;
if (fstat(fileDirectory, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* map = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fileDirectory, 0u));
if (map == MAP_FAILED)
handle_error("mmap");
return map;
}
The file will be executed on a linux system, if this helps to find the optimal answer. At the end of each line of the .txt
there is a space character (' ') and a newline('\n')

Adding values to char* var from txt file

Example file:
[16bpp] Ponete el cinturon *-*
arfield
Nothing (cumpleanios):
Alkon^
~~|Tampon)
[16bpp] Chico Tonto.
Budin
16bpp] Leooooooooo!!!!!
Ev
16bpp] fedee
etamod
:) mAnKeAno
I want each name on a different array position...
I tried this code:
int c;
FILE *file;
file = fopen("bots.txt", "r");
if (file){
char *buffer;
char *jugadores = new char[1000];
int p;
int pos;
while ((c = getc(file)) != EOF){
if (c == '\n'){
strcpy(jugadores[p], buffer);
p++;
buffer = "";
pos = 0;
} else {
buffer[pos] = c;
pos++;
}
}
fclose(file);
}
But it doesn't even compile...
In php the right code would be something like this:
$data = file_get_contents("file.txt");
$names = explode("\n", $data);
Your code has several flaws in it. You need something more like this instead:
FILE *file = fopen("bots.txt", "r");
if (file)
{
char** lines = new char*[1000];
int maxlines = 1000;
int numlines = 0;
char *buffer = new char[1024];
int maxbuf = 1024;
int buflen = 0;
char *line;
int c;
while ((c = getc(file)) != EOF)
{
if (c == '\n')
{
if (numlines == maxlines)
{
char** tmplines = new char*[maxlines+1000];
memcpy(tmplines, lines, sizeof(char*)*maxlines);
delete[] lines;
lines = tmplines;
maxlines += 1000;
}
line = new char[buflen+1];
memcpy(line, buffer, sizeof(char)*buflen);
line[buflen] = 0;
lines[numlines] = line
numlines++;
buflen = 0;
}
else
{
if (buflen == maxbuf)
{
char* tmpbuf = new char[maxbuf+1024];
memcpy(tmpbuf, buffer, sizeof(char)*maxbuf);
delete[] buffer;
buffer = tmpbuf;
maxbuf += 1024;
}
buffer[buflen] = c;
buflen++;
}
}
fclose(file);
if (buflen > 0)
{
if (numlines == maxlines)
{
char** tmplines = new char*[maxlines+1000];
memcpy(tmplines, lines, sizeof(char*)*maxlines);
delete[] lines;
lines = tmplines;
maxlines += 1000;
}
line = new char[buflen+1];
memcpy(line, buffer, sizeof(char)*buflen);
line[buflen] = 0;
lines[numlines] = line
numlines++;
}
delete[] buffer;
// use lines up to numlines elements as needed...
for (int i = 0; i < numlines; i++)
printf("%s\n", lines[i]);
for (int i = 0; i < numlines; ++i)
delete[] lines[i];
delete[] lines;
}
With that said, since you are using C++, you should use C++ classes that will help manage everything for you. Try something more like this instead:
#include <fstream>
#include <ostream>
#include <string>
#include <vector>
std::ifstream file("bots.txt");
if (file.is_open())
{
std::string line;
std::vector<std::string> lines;
while (std::getline(file, line))
lines.push_back(line);
file.close();
// use lines as needed...
for (int i = 0; i < lines.size(); i++)
std::cout << lines[i] << std::endl;
}

Reading Multiple Files Parallel into a buffer

Am working in a project where i have to read a set of files and put it in a buffer.The List comprises of small as well as large files.I have to read these files and for more efficiency i tried implementing it in multiple threads.Each thread will take a file from vector of file names and start reading it put it into a buffer and these buffer have to be put in a queue.I happened to have some error in program and i i don't know where exactly in my program the error occurs also don't know why ? Please help me whether there is any mistake in my logic or in my code and how to correct it. Thanks in advance
using namespace std;
#define MAX_THREADS 2
#define BUFFER_SIZE 8388608
vector<string>files;
deque<string>bufferq;
CRITICAL_SECTION Readlock;
int count = 0;
DWORD WINAPI ReadThread(LPVOID s);
int main(int argc,char *argv[])
{
HANDLE ReadT[MAX_THREADS];
char *filelist[5];
DWORD threadid;
filelist[0] = "1.txt";
filelist[1] = "cloudy.jpg";
filelist[2] = "connectify.exe";
filelist[3] = "VMware.exe";
filelist[4] = "Sherlock.mp4";
for(int i=0;i<5;i++)
files.push_back(filelist[i]);
InitializeCriticalSection(&Readlock);
long t1 = GetTickCount();
for(int k = 0; k< MAX_THREADS; k++)
ReadT[k] = CreateThread(NULL,0,ReadThread,NULL,NULL,&threadid);
WaitForMultipleObjects(MAX_THREADS,ReadT,TRUE,INFINITE);
cout << " Time Taken "<< GetTickCount()-t1 << "ms" ;
system("pause");
return 0;
}
DWORD WINAPI ReadThread(LPVOID s)
{
long pending = 0;
//int freespace = BUFFER_SIZE;
char *filename = new char[50];
char fsize[10];
string file;
char *buf;
buf = new char[BUFFER_SIZE];
long filesize = 0;
int numfiles = files.size();
int filled = 0;
int i = 0;
FILE *fp;
char* ptr;
ptr = buf;
while(true)
{
EnterCriticalSection(&Readlock);
if(files.empty())
{
LeaveCriticalSection(&Readlock);
break;
}
else
{
file = files.front();
files.erase(files.begin());
LeaveCriticalSection(&Readlock);
}
bool buff_full = false;
buf = ptr;
int freespace = BUFFER_SIZE;
memset(buf,0,BUFFER_SIZE);
if(!buff_full)
{
if(pending == 0)
{
fp = fopen(file.c_str(),"rb");
if(!fp)
{
cout<<"\nNo such file";
cout<<files[i];
system("pause");
return 0;
}
int r1 =fseek(fp, 0L, SEEK_END);
filesize = ftell(fp);
int r2 =fseek(fp, 0L, SEEK_SET);
sprintf(fsize, "%ld", filesize);
if(freespace >= (strlen(fsize) + strlen(file.c_str()) + 2))
{
count++;
memcpy(buf, file.c_str(), strlen(file.c_str())+1);
freespace = freespace - strlen(file.c_str()) - 1;
buf += strlen(file.c_str()) + 1;
memcpy(buf,fsize,strlen(fsize)+1);
buf += strlen(fsize) + 1;
freespace = freespace - strlen(fsize) - 1;
cout<<"Files read is "<<count<<"\n";
if(freespace == 0)
{
buff_full = true;
pending = filesize;
break;
}
}
else
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
if(freespace >= filesize)
{
fread(buf, 1, filesize, fp);
buf += filesize;
freespace = freespace - filesize;
bufferq.push_back(buf);
//cout << "pop"<<bufferq.size();
//i++;
if(files.empty())
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
fclose(fp);
}
else
{
fread(buf, 1, freespace, fp);
bufferq.push_back(buf);
//cout <<"pop "<<bufferq.size();
buff_full = true;
}
}
else
{
if(freespace >= pending)
{
fread(buf, 1, pending, fp);
bufferq.push_back(buf);
freespace = freespace - pending;
pending = 0;
//i++;
if(files.empty())
{
filled = BUFFER_SIZE - freespace;
fclose(fp);
break;
}
if(freespace > 0)
buf += pending;
else
buff_full = true;
fclose(fp);
}
else
{
fread(buf, 1, freespace, fp);
bufferq.push_back(buf);
cout << bufferq.size();
pending = pending - freespace;
buff_full = true;
}
}
}
if(buff_full)
{
buf = ptr;
cout << "popping buffer " << bufferq.size();
//bufferq.pop_back();
}
}
return 0;
}
In the context that bug occurs on big files, I suppose that this line can cause problems
sprintf(fsize, "%ld", filesize);
fsize is char[10], and if filesize is >= 1,000,000,000 you'll overwrite fsize array with trailing 0. This will cause "Run-Time Check Failure #2 - Stack around the variable 'fsize' was corrupted.", as you wrote. Please check the sizes of your test files.
Among others, you are filling files in loop on i, and then you wrote:
files.erase(files.begin());
// ...
cout<<"\nNo such file";
cout<<files[i];
files[i] already points to another element as you erased them, and if files are empty on the last iteration it will cause crash.
And what for are you copying file and fsize to buf if you do not copy it to the bufferq?
As bufferq is writable and is shared between threads the access to it should be protected by lock, critical section as you chose.
That's my little code review.

How can I find the size of all files located inside a folder?

Is there any API in c++ for getting the size of a specified folder?
If not, how can I get the total size of a folder including all subfolders and files?
How about letting OS do it for you:
long long int getFolderSize(string path)
{
// command to be executed
std::string cmd("du -sb ");
cmd.append(path);
cmd.append(" | cut -f1 2>&1");
// execute above command and get the output
FILE *stream = popen(cmd.c_str(), "r");
if (stream) {
const int max_size = 256;
char readbuf[max_size];
if (fgets(readbuf, max_size, stream) != NULL) {
return atoll(readbuf);
}
pclose(stream);
}
// return error val
return -1;
}
Actually I don't want to use any third party library. Just want to
implement in pure c++.
If you use MSVC++ you have <filesystem> "as standard C++".
But using boost or MSVC - both are "pure C++".
If you don’t want to use boost, and only the C++ std:: library this answer is somewhat close. As you can see here, there is a Filesystem Library Proposal (Revision 4). Here you can read:
The Boost version of the library has been in widespread use for ten
years. The Dinkumware version of the library, based on N1975
(equivalent to version 2 of the Boost library), ships with Microsoft
Visual C++ 2012.
To illustrate the use, I adapted the answer of #Nayana Adassuriya , with very minor modifications (OK, he forgot to initialize one variable, and I use unsigned long long, and most important was to use: path filePath(complete (dirIte->path(), folderPath)); to restore the complete path before the call to other functions). I have tested and it work well in windows 7.
#include <iostream>
#include <string>
#include <filesystem>
using namespace std;
using namespace std::tr2::sys;
void getFoldersize(string rootFolder,unsigned long long & f_size)
{
path folderPath(rootFolder);
if (exists(folderPath))
{
directory_iterator end_itr;
for (directory_iterator dirIte(rootFolder); dirIte != end_itr; ++dirIte )
{
path filePath(complete (dirIte->path(), folderPath));
try{
if (!is_directory(dirIte->status()) )
{
f_size = f_size + file_size(filePath);
}else
{
getFoldersize(filePath,f_size);
}
}catch(exception& e){ cout << e.what() << endl; }
}
}
}
int main()
{
unsigned long long f_size=0;
getFoldersize("C:\\Silvio",f_size);
cout << f_size << endl;
system("pause");
return 0;
}
You may use boost in this way. You can try to optimize it some deeper.
#include <iostream>
#include <string>
#include <boost/filesystem.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/algorithm/string.hpp>
using namespace std;
namespace bsfs = boost::filesystem;
void getFoldersize(string rootFolder,long & file_size){
boost::replace_all(rootFolder, "\\\\", "\\");
bsfs::path folderPath(rootFolder);
if (bsfs::exists(folderPath)){
bsfs::directory_iterator end_itr;
for (bsfs::directory_iterator dirIte(rootFolder); dirIte != end_itr; ++dirIte )
{
bsfs::path filePath(dirIte->path());
try{
if (!bsfs::is_directory(dirIte->status()) )
{
file_size = file_size + bsfs::file_size(filePath);
}else{
getFoldersize(filePath.string(),file_size);
}
}catch(exception& e){
cout << e.what() << endl;
}
}
}
}
int main(){
long file_size =0;
getFoldersize("C:\\logs",file_size);
cout << file_size << endl;
system("pause");
return 0;
}
Something like this would be better to avoid adding symbolic(soft) links:
std::uintmax_t directorySize(const std::filesystem::path& directory)
{
std::uintmax_t size{ 0 };
for (const auto& entry : std::filesystem::recursive_directory_iterator(directory))
{
if (entry.is_regular_file() && !entry.is_symlink())
{
size += entry.file_size();
}
}
return size;
}
Size of files in a folder
Please have a look at this link
#include <iostream>
#include <windows.h>
#include <string>
using namespace std;
__int64 TransverseDirectory(string path)
{
WIN32_FIND_DATA data;
__int64 size = 0;
string fname = path + "\\*.*";
HANDLE h = FindFirstFile(fname.c_str(),&data);
if(h != INVALID_HANDLE_VALUE)
{
do {
if( (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) )
{
// make sure we skip "." and "..". Have to use strcmp here because
// some file names can start with a dot, so just testing for the
// first dot is not suffient.
if( strcmp(data.cFileName,".") != 0 &&strcmp(data.cFileName,"..") != 0)
{
// We found a sub-directory, so get the files in it too
fname = path + "\\" + data.cFileName;
// recurrsion here!
size += TransverseDirectory(fname);
}
}
else
{
LARGE_INTEGER sz;
// All we want here is the file size. Since file sizes can be larger
// than 2 gig, the size is reported as two DWORD objects. Below we
// combine them to make one 64-bit integer.
sz.LowPart = data.nFileSizeLow;
sz.HighPart = data.nFileSizeHigh;
size += sz.QuadPart;
}
}while( FindNextFile(h,&data) != 0);
FindClose(h);
}
return size;
}
int main(int argc, char* argv[])
{
__int64 size = 0;
string path;
size = TransverseDirectory("c:\\dvlp");
cout << "\n\nDirectory Size = " << size << "\n";
cin.ignore();
return 0;
}
For more detail PLease CLick Here
The file system functions are integral part of each operative system, written mostly in C and assembler, not C++, each C++ library implementation for this are in one way or another a wrapper of this functions. Taking on count the effort and if you will not use your implementation in different OS, maybe is a good idea to use this functions directly and save some overhead and time.
Best regards.
I have my types definition file with:
typedef std::wstring String;
typedef std::vector<String> StringVector;
typedef unsigned long long uint64_t;
and code is:
uint64_t CalculateDirSize(const String &path, StringVector *errVect = NULL, uint64_t size = 0)
{
WIN32_FIND_DATA data;
HANDLE sh = NULL;
sh = FindFirstFile((path + L"\\*").c_str(), &data);
if (sh == INVALID_HANDLE_VALUE )
{
//if we want, store all happened error
if (errVect != NULL)
errVect ->push_back(path);
return size;
}
do
{
// skip current and parent
if (!IsBrowsePath(data.cFileName))
{
// if found object is ...
if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY)
// directory, then search it recursievly
size = CalculateDirSize(path + L"\\" + data.cFileName, NULL, size);
else
// otherwise get object size and add it to directory size
size += (uint64_t) (data.nFileSizeHigh * (MAXDWORD ) + data.nFileSizeLow);
}
} while (FindNextFile(sh, &data)); // do
FindClose(sh);
return size;
}
bool IsBrowsePath(const String& path)
{
return (path == _T(".") || path == _T(".."));
}
This uses UNICODE and returns failed dirs if you want that.
To call use:
StringVector vect;
CalculateDirSize(L"C:\\boost_1_52_0", &vect);
CalculateDirSize(L"C:\\boost_1_52_0");
But never pass size
//use FAT32
#undef UNICODE // to flag window deactive unicode
#include<Windows.h> //to use windows api
#include<iostream>
#include<iomanip>
#include<cstring>
#include<string>
#include<algorithm>
using namespace std;
#pragma pack(1) //tell compiler do'nt do prag
struct BPB
{
BYTE JMP[3];
BYTE OEM[8];
WORD NumberOfBytesPerSector;
BYTE NumberOfSectorsPerCluster;
WORD NumberOfReservedSectors;
BYTE NumberOfFATs;
WORD NumberOfRootEntries16;
WORD LowNumbferOfSectors;
BYTE MediaDescriptor;
WORD NumberOfSectorsPerFAT16;
WORD NumberOfSectorsPerTrack;
WORD NumberOfHeads;
DWORD NumberOfHiddenSectors;
DWORD HighNumberOfSectors;
DWORD NumberOfSectorsPerFAT32;
WORD Flags;
WORD FATVersionNumber;
DWORD RootDirectoryClusterNumber;
WORD FSInfoSector;
WORD BackupSector;
BYTE Reserver[12];
BYTE BiosDrive;
BYTE WindowsNTFlag;
BYTE Signature;
DWORD VolumeSerial;
BYTE VolumeLabel[11];
BYTE SystemID[8];
BYTE CODE[420];
WORD BPBSignature;
};
//-----------------------------------------------------------
struct DirectoryEntry
{
BYTE Name[11];
BYTE Attributes;
BYTE Reserved;
BYTE CreationTimeTenth;
WORD CreationTime;
WORD CreationDate;
WORD LastAccessTime;
WORD HiClusterNumber;
WORD WriteTime;
WORD WriteDate;
WORD LowClusterNumber;
DWORD FileSize; //acual size of file
};
//---------------------------------------------------
void dirFunction(string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
for (int i = 0; i < NumberOfEntries; i++)
{
if (root[i].Name[0] == 0)//there no entery after this
break;
if (root[i].Name[0] == 0xE5)
continue;
if ((root[i].Attributes & 0xF) == 0xF)
continue;
for (int j = 0; j < 8; j++)
cout << root[i].Name[j];
if((root[i].Attributes & 0x10) != 0x10){
cout<<".";
for (int j = 8; j < 11; j++)
cout << root[i].Name[j];
}
if ((root[i].Attributes & 0x10) == 0x10){
cout << "\t<Folder>" ;
}else{
cout<<"\t<File>" ;
}
clusterNumber = root[i].HiClusterNumber << 16;
clusterNumber |= root[i].LowClusterNumber;
cout <<"\t"<<root[i].FileSize<<"bytes" << "\t" << clusterNumber<<"cluster" << endl;
}
CloseHandle(hFile);
}
//---------------------------------------------------------------
string convertLowerToUpper(string f){
string temp = "";
for (int i = 0; i < f.size(); i++){
temp += toupper(f[i]);
}
return temp;
}
//---------------------------------------------------------------
string getFileName(BYTE filename[11]){
string name = "";
for (int i = 0; i < 8; i++){
if (filename[i] != ' ')
name += filename[i];
}
return (name);
}
//------------------------------------------------------------------
int findEntryNumber(DirectoryEntry* root, int NumberOfEntries, string required){
string n;
int j = 0;
for (int i = 0; i < NumberOfEntries; i++){
if (strcmp((getFileName(root[i].Name).c_str()), convertLowerToUpper(required).c_str()) == 0){
return i;
}
}
return -1;
}
//---------------------------------------------------------------
void typeFunction(string fileName, string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
int index = findEntryNumber(root, NumberOfEntries, fileName);
if (index == -1){
cout << "File is not found" << endl;
return;
}
if (((root[index].Attributes & 0x10) == 0x10) ){
cout << "Is not file name" << endl;
return;
}
clusterNumber = root[index].HiClusterNumber << 16;
clusterNumber |= root[index].LowClusterNumber;
ULONG temp = (clusterNumber - 2) * clusterSize;
distance += temp;
t = 0;
SetFilePointer(hFile, distance, &t, FILE_BEGIN);
BYTE* buffer = new BYTE[clusterSize];
readBytes = 0;
ReadFile(hFile, (BYTE*)buffer, clusterSize, &readBytes, 0);
for (int i = 0; i < root[index].FileSize; i++){
cout << buffer[i];
}
cout << endl;
CloseHandle(hFile);
}
//----------------------------------------------------------------------
void delFunction(string filename, string s){
string path = "\\\\.\\" + s + ":";
HANDLE hFile = CreateFile(path.c_str(), GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_WRITE, 0, OPEN_EXISTING, 0, 0);//open partition
BPB bootSector;//var from bootSector structure
DWORD readBytes = 0;
if (hFile == INVALID_HANDLE_VALUE)
{
cout << "Error " << GetLastError()<<endl;
return;
}
ReadFile(hFile, (BYTE*)&bootSector, sizeof(bootSector), &readBytes, 0);//read partition and load bootSector information inside our structure
LONG t = 0;
ULONG distance = bootSector.NumberOfReservedSectors +
bootSector.NumberOfFATs*bootSector.NumberOfSectorsPerFAT32;//distance from begine until Root Directory or content of partetion
distance *= bootSector.NumberOfBytesPerSector;//convert distance number to bytes value
SetFilePointer(hFile, distance, &t, FILE_BEGIN);//set pointer to root directory begine or begine of data
int clusterSize = bootSector.NumberOfBytesPerSector*bootSector.NumberOfSectorsPerCluster; //cluster size
int NumberOfEntries = clusterSize / sizeof(DirectoryEntry); //number of record inside cluster
DirectoryEntry* root = new DirectoryEntry[NumberOfEntries];//descripe the partetion
ReadFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
DWORD clusterNumber;
readBytes = 0;
t = 0;
int index = findEntryNumber(root, NumberOfEntries, filename);
if (index == -1){
cout << "FIle is not found" << endl;
return;
}
if ((root[index].Attributes & 0x10) == 0x10){
cout << "Is not file name" << endl;
return;
}
//delete file
root[index].Name[0] = 0xE5;
SetFilePointer(hFile, distance, &t, FILE_BEGIN);
WriteFile(hFile, (BYTE*)root, clusterSize, &readBytes, 0);
cout<<filename<<" is deleted\n";
CloseHandle(hFile);
}
//----------------------------------------------------------------------
string removeExtention(string s){
string t = "";
for (int i = 0; i < s.size(); i++){
if (s[i] == '.')break;
t += s[i];
}
return t;
}
//-------------------------------------------------------------------
void main()
{
string swich_value;
string directory;
string file_name;
//dirFunction("G");
cout<<"plz, Enter single Partition character ------> example E or G\n\n";
cin>>directory;
string path = "\\\\.\\" + directory + ":";
cout<<"current directory is "<<path<<endl;
cout<<"Enter Options: \n1- dir \n2- type file_name.extention \n3- del file_name.extention\n\n";
again:
cin>>swich_value;
if(swich_value.at(1)!='i')
cin>>file_name;
string answer;
switch(swich_value.at(1)){
case 'i':
dirFunction(directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
case 'y':
typeFunction(removeExtention(file_name), directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
case 'e':
delFunction(removeExtention(file_name), directory);
cout<<"\nare you want to do another process: y or n?";
cin>>answer;
if (answer.at(0)=='y')
goto again;
break;
}
}
You can use "boost::filesystem"
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
unsigned long long int get_directory_size(const fs::path& directory){
if (!fs::exists(directory)) return 0;
if (fs::is_directory(directory)){
unsigned long long int ret_size = 0;
fs::directory_iterator m_dir_itr(directory);
for (m_dir_itr = fs::begin(m_dir_itr); m_dir_itr != fs::end(m_dir_itr); ++m_dir_itr){
fs::directory_entry m_dir_entry = *m_dir_itr;
if (fs::is_regular_file(m_dir_entry.path())){
ret_size += fs::file_size(m_dir_entry.path());
}else if (fs::is_directory(m_dir_entry.path())){
ret_size += get_directory_size(m_dir_entry.path());
}
}
return ret_size;
} else if (fs::is_regular_file(directory)){
return fs::file_size(directory);
}
return 0;
}
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
// Assuming 'C:/Folder' be any directory then its size can be found using
auto folder_size = get_directory_size("C:/Folder");
printf("Size of 'C:/Folder' is %d\n",folder_size);
return 0;
}
With the introduction of std::filesystem, you no more have to use any system APIs or any external libraries.
#include <filesystem>
namespace n_fs = ::std::filesystem;
double archive::getFolderSize(std::string path)
{
double r = 0.0;
try{
if (!n_fs::is_directory(path))
{
r += (double)n_fs::file_size(path);
}
else
{
for(auto entry: n_fs::directory_iterator(path))
getFolderSize(entry.path().string());
}
}
catch(exception& e)
{
std::cout << e.what() << std::endl();
}
return r;
}
int main(){
double folderSize = getFolderSize("~/dev/"); //Replace with your path
std::cout << "Size of Folder: " << folderSize;
}
Try using GetFileSizeEx function. Following is some sample code for this. You need to get the size from the LARGE_INTEGER union though.
#include <iostream>
#include <windows.h>
#include <stdio.h>
#include <io.h>
using namespace std;
int main()
{
FILE *fp;
fp = fopen("C:\test.txt","r");
int fileNo = _fileno(fp);
HANDLE cLibHandle = (HANDLE)_get_osfhandle(fileNo);
long int fileSize = 0;
LARGE_INTEGER fileSizeL;
GetFileSizeEx(cLibHandle, &fileSizeL);
return 0;
}
5 years and not a simple solution with standard C++, that's why I would like to contribute my solution to this question:
uint64_t GetDirSize(const std::string &path)
{
uint64_t size = 0;
for (const auto & entry : std::experimental::filesystem::directory_iterator(path))
{
if(entry.status().type() == std::experimental::filesystem::file_type::regular)
size += std::experimental::filesystem::file_size(entry.path());
if (entry.status().type() == std::experimental::filesystem::file_type::directory)
size += GetDirSize(entry.path().generic_string());
}
return size;
}
Use it for example by calling
GetDirSize("C:\\dir_name")
if you're using Windows.
Calculating a folder size in bytes on Windows.
size_t GetFolderSizeInBytes(std::wstring path)
{
size_t result = 0;
WIN32_FIND_DATA findData;
HANDLE hFileHandle;
std::wstring sourcePath(path);
if (GetFileAttributes(sourcePath.c_str()) & FILE_ATTRIBUTE_DIRECTORY)
sourcePath.push_back(L'\\');
std::wstring fileName(sourcePath);
fileName.append(L"*");
hFileHandle = FindFirstFileEx(
fileName.data(),
FindExInfoStandard,
&findData,
FindExSearchNameMatch,
NULL,
FIND_FIRST_EX_ON_DISK_ENTRIES_ONLY);
if (hFileHandle != INVALID_HANDLE_VALUE)
{
do
{
if (!wcscmp(findData.cFileName, L".") || !wcscmp(findData.cFileName, L".."))
continue;
if ((findData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
{
// Folder
std::wstring newPath = path + L"\\" + findData.cFileName;
result += GetFolderSizeInBytes(newPath);
}
else
{
// File
unsigned long high = findData.nFileSizeHigh;
unsigned long low = findData.nFileSizeLow;
size_t size = size_t(high * (MAXWORD + 1)) + low;
result += size;
}
} while (FindNextFile(hFileHandle, &findData));
FindClose(hFileHandle);
}
return result;
}

Is there a better way to search a file for a string?

I need to search a (non-text) file for the byte sequence "9µ}Æ" (or "\x39\xb5\x7d\xc6").
After 5 hours of searching online this is the best I could do. It works but I wanted to know if there is a better way:
char buffer;
int pos=in.tellg();
// search file for string
while(!in.eof()){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='9'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='µ'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='}'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='Æ'){
cout << "found";
}
}
}
}
in.seekg((streampos) pos);
Note:
I can't use getline(). It's not a text file so there are probably not many line breaks.
Before I tried using a multi-character buffer and then copying the buffer to a C++ string, and then using string::find(). This didn't work because there are many '\0' characters throughout the file, so the sequence in the buffer would be cut very short when it was copied to the string.
Similar to what bames53 posted; I used a vector as a buffer:
std::ifstream ifs("file.bin");
ifs.seekg(0, std::ios::end);
std::streamsize f_size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::vector<unsigned char> buffer(f_size);
ifs.read(buffer.data(), f_size);
std::vector<unsigned char> seq = {0x39, 0xb5, 0x7d, 0xc6};
bool found = std::search(buffer.begin(), buffer.end(), seq.begin(), seq.end()) != buffer.end();
If you don't mind loading the entire file into an in-memory array (or using mmap() to make it look like the file is in memory), you could then search for your character sequence in-memory, which is a bit easier to do:
// Works much like strstr(), except it looks for a binary sub-sequence rather than a string sub-sequence
const char * MemMem(const char * lookIn, int numLookInBytes, const char * lookFor, int numLookForBytes)
{
if (numLookForBytes == 0) return lookIn; // hmm, existential questions here
else if (numLookForBytes == numLookInBytes) return (memcmp(lookIn, lookFor, numLookInBytes) == 0) ? lookIn : NULL;
else if (numLookForBytes < numLookInBytes)
{
const char * startedAt = lookIn;
int matchCount = 0;
for (int i=0; i<numLookInBytes; i++)
{
if (lookIn[i] == lookFor[matchCount])
{
if (matchCount == 0) startedAt = &lookIn[i];
if (++matchCount == numLookForBytes) return startedAt;
}
else matchCount = 0;
}
}
return NULL;
}
.... then you can just call the above function on the in-memory data array:
char * ret = MemMem(theInMemoryArrayContainingFilesBytes, numBytesInFile, myShortSequence, 4);
if (ret != NULL) printf("Found it at offset %i\n", ret-theInMemoryArrayContainingFilesBytes);
else printf("It's not there.\n");
This program loads the entire file into memory and then uses std::search on it.
int main() {
std::string filedata;
{
std::ifstream fin("file.dat");
std::stringstream ss;
ss << fin.rdbuf();
filedata = ss.str();
}
std::string key = "\x39\xb5\x7d\xc6";
auto result = std::search(std::begin(filedata), std::end(filedata),
std::begin(key), std::end(key));
if (std::end(filedata) != result) {
std::cout << "found\n";
// result is an iterator pointing at '\x39'
}
}
const char delims[] = { 0x39, 0xb5, 0x7d, 0xc6 };
char buffer[4];
const size_t delim_size = 4;
const size_t last_index = delim_size - 1;
for ( size_t i = 0; i < last_index; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[last_index]) )
{
if ( memcmp( buffer, delims, delim_size ) == 0 )
break; // you are arrived
memmove( buffer, buffer + 1, last_index );
}
You are looking for 4 bytes:
unsigned int delim = 0xc67db539;
unsigned int uibuffer;
char * buffer = reinterpret_cast<char *>(&uibuffer);
for ( size_t i = 0; i < 3; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[3]) )
{
if ( uibuffer == delim )
break; // you are arrived
uibuffer >>= 8;
}
Because you said you cannot search the entire file because of null terminator characters in the string, here's an alternative for you, which reads the entire file in and uses recursion to find the first occurrence of a string inside of the whole file.
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
string readFile (char *fileName) {
ifstream fi (fileName);
if (!fi)
cerr << "ERROR: Cannot open file" << endl;
else {
string str ((istreambuf_iterator<char>(fi)), istreambuf_iterator<char>());
return str;
}
return NULL;
}
bool findFirstOccurrenceOf_r (string haystack, char *needle, int haystack_pos, int needle_pos, int needle_len) {
if (needle_pos == needle_len)
return true;
if (haystack[haystack_pos] == needle[needle_pos])
return findFirstOccurrenceOf_r (haystack, needle, haystack_pos+1, needle_pos+1, needle_len);
return false;
}
int findFirstOccurrenceOf (string haystack, char *needle, int length) {
int pos = -1;
for (int i = 0; i < haystack.length() - length; i++) {
if (findFirstOccurrenceOf_r (haystack, needle, i, 0, length))
return i;
}
return pos;
}
int main () {
char str_to_find[4] = {0x39, 0xB5, 0x7D, 0xC6};
string contents = readFile ("input");
int pos = findFirstOccurrenceOf (contents, str_to_find, 4);
cout << pos << endl;
}
If the file is not too large, your best solution would be to load the whole file into memory, so you don't need to keep reading from the drive. If the file is too large to load in at once, you would want to load in chunks of the file at a time. But if you do load in chucks, make sure you check to edges of the chunks. It's possible that your chunk happens to split right in the middle of the string you're searching for.