Finding int value in large binary file c++ - c++

I tried to make a program that loads chunks of a large (We're speaking of a few MBs) of file, and searches for a value, and prints its address and value, except my program every few times throws a !myfile , doesn't give the value except a weird symbol (Although I've used 'hex' in cout), the addresses seem to loop sorta, and it doesn't seem to find all the values at all. I've tried for a long time and I gave up, so I'm asking experiences coders out there to find the issue.
I should note that I'm trying to find a 32 bit value in this file, but all I could make was a program that checks bytes, i'd require assistance for that too.
#include <iostream>
#include <fstream>
#include <climits>
#include <sstream>
#include <windows.h>
#include <math.h>
using namespace std;
int get_file_size(std::string filename) // path to file
{
FILE *p_file = NULL;
p_file = fopen(filename.c_str(),"rb");
fseek(p_file,0,SEEK_END);
int size = ftell(p_file);
fclose(p_file);
return size;
}
int main( void )
{
ifstream myfile;
myfile.open( "file.bin", ios::binary | ios::in );
char addr_start = 0,
addr_end = 0,
temp2 = 0x40000;
bool found = false;
cout << "\nEnter address start (Little endian, hex): ";
cin >> hex >> addr_start;
cout << "\nEnter address end (Little endian, hex): ";
cin >> hex >> addr_end;
unsigned long int fsize = get_file_size("file.bin");
char buffer[100];
for(int counter = fsize; counter != 0; counter--)
{
myfile.read(buffer,100);
if(!myfile)
{
cout << "\nAn error has occurred. Bytes read: " << myfile.gcount();
myfile.clear();
}
for(int x = 0; x < 100 - 1 ; x++)
{
if(buffer[x] >= addr_start && buffer[x] <= addr_end)
cout << "Addr: " << (fsize - counter * x) << " Value: " << hex << buffer[x] << endl;
}
}
myfile.close();
system("PAUSE"); //Don't worry about its inefficiency
}

A simple program to search for a 32-bit integer in a binary file:
int main(void)
{
ifstream data_file("my_file.bin", ios::binary);
if (!data_file)
{
cerr << "Error opening my_file.bin.\n";
EXIT_FAILURE;
}
const uint32_t search_key = 0x12345678U;
uint32_t value;
while (data_file.read((char *) &value, sizeof(value))
{
if (value == search_key)
{
cout << "Found value.\n";
break;
}
}
return EXIT_SUCCESS;
}
You could augment the performance by reading into a buffer and searching the buffer.
//...
const unsigned int BUFFER_SIZE = 1024;
static uint32_t buffer[BUFFER_SIZE];
while (data_file.read((char *)&(buffer[0]), sizeof(buffer) / sizeof(uint32_t))
{
int bytes_read = data_file.gcount();
if (bytes_read > 0)
{
values_read = ((unsigned int) bytes_read) / sizeof(uint32_t);
for (unsigned int index = 0U; index < values_read; ++index)
{
if (buffer[index] == search_key)
{
cout << "Value found.\n";
break;
}
}
}
}
With the above code, when the read fails, the number of bytes should be checked, and if any bytes were read, the buffer then searched.

Related

How do I read the whole 64 bytes of a binary file?

I am writing a little program that reads a disk image file in binary and then checks its partition entry tables to display each partition, it's type, start sector and size.
So far it reads the first 16 bytes accurately but the rest of the partition entries are not recognized or have some kind of error.
The result looks like this:
EDIT: The first line of the output is supposed to look like this:
`Partition 0: Type: FAT-16 Start: 63 Size: 518760`
What am I missing? How do I fix the code so that all the partition entries give the appropriate result?
using namespace std;
#include <iostream>
#include <fstream>
struct Partition { char type; int start_sect; int size; } part_entry[4]; // 4 x partition table entry
int main(int argc, char *argv[])
{
//DECLARATIONS
int i, offset = 26, not_exist = 0;
char buf_part_table[64], vol_type[12];
char* diskdata;
int n;
streampos begin, end;
ifstream diskimage;
diskimage.open("Sample_1.dd", ios::in | ios::binary | ios::out);
diskdata = new char[begin];
begin = diskimage.tellg();
diskdata = new char[begin];
diskimage.seekg(446, ios::beg);
diskimage.read(buf_part_table, 64);
for (i = 0; i < 4; i++)
{
part_entry[i].type = *(char*)(buf_part_table + 0x04 + (i * offset));
if (part_entry[i].type == 0) not_exist++;
part_entry[i].start_sect = *(int*)(buf_part_table + 0x08 + (i * offset));
part_entry[i].size = *(int*)(buf_part_table + 0x0C + (i * offset));
switch (part_entry[i].type)
{
case 00: strcpy(vol_type, "NOT-VALID");
break;
case 06: strcpy(vol_type, "FAT-16");
break;
case 07: strcpy(vol_type, "NTFS");
break;
case 0x0B: strcpy(vol_type, "FAT-32");
break;
default: strcpy(vol_type, "NOT-DECODED");
break;
}
cout << "Partition " << i << ":" << " Type:" << vol_type << " Start: " << part_entry[i].start_sect << " Size: " << part_entry[i].size << endl;
}
return 0;
}
You unnecesary made program unreadable and harder to debug.
You can read whole boot sector at once and than display desired content.
Here is my quick example (it does not check if file exists, some may complain it should use memcpy for some fields etc.)
#include <iostream>
#include <fstream>
#include <cstdint>
#include <cstddef>
#include <iomanip>
using namespace std;
struct partition_t {
uint8_t status;
uint8_t start_CHS[3];
uint8_t type;
uint8_t end_CHS[3];
uint32_t start_LBA;
uint32_t size_LBA;
} __attribute__((packed));
struct mbr_t
{
uint8_t bootstrap[446];
partition_t partitions[4];
uint16_t signature;
} __attribute__((packed));
int main(int argc, char *argv[])
{
mbr_t mbr;
ifstream diskimage;
diskimage.open( "/tmp/mbr.dd", ios::in | ios::binary );
diskimage.read( reinterpret_cast<char*>(&mbr), sizeof(mbr) );
diskimage.close();
for( int idx = 0 ; idx < 4 ; idx++ )
{
string bootable = (mbr.partitions[idx].status == 128) ? "yes" : "no";
cout << " bootable : " << setw(5) << bootable <<
" type : " << setw(5) << (int)mbr.partitions[idx].type <<
" start LBA : " << setw(10) << mbr.partitions[idx].start_LBA <<
" size : " << setw(10) << mbr.partitions[idx].size_LBA << endl;
}
return 0;
}
It is easier to read, right?

Frequency table in C++

This is what I have so far; I am trying to have an array with probability of all chars and space in a text file, but I have a problem with the data type.
int main()
{
float x[27];
unsigned sum = 0;
struct Count {
unsigned n;
void print(unsigned index, unsigned total) {
char c = (char)index;
if (isprint(c)) cout << "'" << c << "'";
else cout << "'\\" << index << "'";
cout << " occured " << n << "/" << total << " times";
cout << ", propability is " << (double)n / total << "\n";
}
Count() : n() {}
} count[256];
ifstream myfile("C:\\text.txt"); // one \ masks the other
while (!myfile.eof()) {
char c;
myfile.get(c);
if (!myfile) break;
sum++;
count[(unsigned char)c].n++;
}
for (unsigned i = 0; i<256; i++)
{
count[i].print(i, sum);
}
x[0] = count[33];
int j=68;
for(int i=1;i<27;i++)
{
x[i]=count[j];
j++;
}
return 0;
}
#include <iostream>
#include <fstream>
#include <cctype>
using namespace std;
double probabilities[256]; // now it can be accessed by Count
int main()
{
unsigned sum = 0;
struct Count {
unsigned n;
double prob;
void print ( unsigned index, unsigned total ) {
// if ( ! n ) return;
probabilities[index] = prob = (double)n/total;
char c = (char) index;
if ( isprint(c) ) cout << "'" << c << "'";
else cout << "'\\" << index << "'";
cout<<" seen "<<n<<"/"<<total<<" times, probability is "<<prob<<endl;
}
Count(): n(), prob() {}
operator double() const { return prob; }
operator float() const { return (float)prob; }
} count[256];
ifstream myfile("C:\\text.txt"); // one \ masks the other
while(!myfile.eof()) {
char c;
myfile.get(c);
if ( !myfile ) break;
sum++;
count[(unsigned char)c].n++;
}
for ( unsigned i=0; i<256; i++ ) count[i].print(i,sum);
return 0;
}
I incorporated various changes suggested - Thanks!
Now, who finds the 4 ways to access the actual probabilities?
you are allocating a buffer with size 1000000 1 million characters.
char file[1000000] = "C:\text.txt";
This is not good as the extra values in the buffer are not guaranteed to be zero, the can be anything.
For Windows to read a file you need something like this. I will not give you the solution, you need to learn using msdn and documentation to understand this fully::
you need to include the #include <windows.h> header from the SDK first.
Look at this example here: http://msdn.microsoft.com/en-us/library/windows/desktop/aa363778(v=vs.85).aspx
this example as appending a file to another. Your solution will be similar, instead of writing list to other file, process the buffer to increment your local variables and update the state of the table.
Do not set a large number you come up with for the buffer, as there will risk of not enough buffer space, and thus overflow. You should do like example:
read some bytes in buffer
process that buffer and increment the table
repeat until you reach end of file
while (ReadFile(hFile, buff, sizeof(buff), &dwBytesRead, NULL)
&& dwBytesRead > 0)
{
// write you logic here
}

Storing numerical value from a binary file into an array- C++

I am trying to read numerical data from a binary file containing 100 double values and store it in an array called myArray. When I print myArray nothing is being shown, as if myArray has never been filled. Any help is appreciated.
int main()
{
int file_len;
ifstream infile;
infile.open("mybinfile.bin", ios::binary | ios::in);
if (!infile.is_open())
{
cout << "Error in opening the file. " << endl;
}
else
{
const int file_len = 100;
std::vector<char> myVect;
myVect.resize(file_len);
int i = 0;
infile.read((char *) (&temp), sizeof(char));
myVect[i] = temp;
while(!infile.eof())
{
myVect[i] = temp;
i++;
infile.read((char *) (&temp), sizeof(char));
}
for (int i = 0; i < 100 ; i++)
{cout << i << ": " << myVect[i]<< endl;}
}
infile.close();
return 0;
}
Here
infile.read((char*)&myArray, file_len * sizeof(double));
You pass the pointer to the pointer to double. It should be
infile.read((char*)myArray, file_len * sizeof(double));
Makes me wonder why you didn't see a crash, since writing into random memory almost never turns out well.
Here is one example of brute-force serialization and de-serialization. Note that serialization in general is tricky, due to e.g. endianness, different floating-point formats, conversion between decimal text representations and binary and whatnot.
#include <fstream>
using std::string;
using std::ofstream;
using std::ifstream;
using std::cout;
string fname("/tmp/doubles.bin");
void write_doubles()
{
ofstream of;
of.open(fname.c_str(), std::ios::binary | std::ios::out);
double arr[100];
for (int i = 0; i < 100; i++)
{
arr[i] = static_cast<double>(i+100);
}
of.write((char*)arr, 100*sizeof(double));
}
void read_doubles()
{
ifstream ifs;
ifs.open(fname.c_str(), std::ios::binary | std::ios::in);
double arr[100];
ifs.read((char*)arr, 100*sizeof(double));
for (int i = 0; i < 100; i++)
{
cout << "Arr[i]: " << arr[i] << ", ";
}
cout << '\n';
}
int main()
{
write_doubles();
read_doubles();
return 0;
}

Program always producing same value?

the point of my program is to write the numbers 1 - 1,000,000 to a text file, generate a random number between 1 and 1,000,000, search for that line in the text file, take the value, and square it (this is just an exercise for me, it has no practical application). The problem is that whenever I run it, the value remains the same, but the rand() function is seeded by time(0). I suspect that it's a garbage value but I don't know where it's coming from (I have no experience with GDB or any other standalone debuggers). Here's my source code:
#include <fstream>
#include <ctime>
#include <iostream>
#include <string>
#include <cstdlib>
using namespace std;
int main(int argc, char** argv){
ofstream file("log.txt", ios::app);
ofstream programLog("programlog.dat", ios::app);
cout << "Test Start" << endl;
programLog << "Test Start" << endl;
cout << "Log file created" << endl;
programLog << "Log file created" << endl;
ifstream readFile("log.txt");
int foundNum;
std::string line = "";
unsigned int loopCount = 1000000;
unsigned int numToSearch;
const unsigned short min = 1;
const int max = 1000000;
unsigned int randomLine = 0;
for(int i = 0; i <= loopCount; i++){
file << i << endl;
}
//select random line
srand((unsigned)time(0));
while(!(randomLine > min) && !(randomLine < max)){
randomLine = (unsigned)rand();
programLog << randomLine;
int newlines = 0;
//size_t found;
while(getline(readFile, line)){
if(line.find("\n") != string::npos)
newlines++;
if(newlines == randomLine)
numToSearch = atoi(line.c_str());
}
}
programLog << "Random line selected" << endl;
//read line
while(std::getline(readFile,line)){
if(atoi(line.c_str()) == numToSearch){
foundNum = numToSearch;
break;
}
else
continue;
}
//square it
const unsigned int squared = foundNum*foundNum;
programLog << squared;
readFile.close(); //end read
file.close(); //end log
programLog.close(); //end programlog
return 0;
}
You never enter the while loop as you are using:
while(!(randomLine > min) && !(randomLine < max))
while immediately evaluates to false. You should use:
while(randomLine < min || randomLine > max)
Also, why do all your variables have different types? This could lead to unintended errors. You should change them to have the same type.
randomLineis initialized to 0, and still has that value once it reaches the while, so the loop body never executes.

How do I read a given number of lines from the end of the file using streams in C++?

For my implementation of tail shell command in Linux, I need to read certain amount of lines/bytes from the end of the file using stream input/output. Does anyone have suggestions how to do that? I suspect I need to open a file and pass some parameter to the ifstream constructor, but I don't know what exactly. Googling didn't find anything.
Since tail needs to work with pipes, that you can't rewind, you'll have to keep a rotating buffer of the last n lines you've read which you will dump on EOF.
This problem is analogous to the problem of getting the last n nodes of a singly-linked list. You have to go all the way to the end with a buffer of n lines, then spit out the lines from buffer.
I don't think there's an easy way to go about this, you'll probably need to seek to the end of the file, back up one 'chunk' (an arbitrary size, but a couple of kilobytes perhaps), read that 'chunk' of data and start looking through it for new line characters, if you didn't find enough, you back up twice your chunk size (remember, you read forward, so you need to back up the one you read, plus the one you want to read next), and read in another one.
HTH
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
int main()
{
ifstream is("file.txt", ios::binary);
if (!is) {
cout << "Failed to open file" << endl;
return 1;
}
is.seekg(0, ios::end);
int len = is.tellg();
char c;
int n = 0;
ostringstream line;
int lines = 0;
for (int i = len - 1; i >= 0; --i) {
is.seekg(i, ios::beg);
is.get(c);
if (c == '\n' || i == 0) {
if (i < len - 1) {
if (i == 0) {
line << c;
}
string s = line.str();
cout << lines << ": " << string(s.rend() - n, s.rend()) << endl;
++lines;
n = 0;
line.seekp(0, ios::beg);
}
} else {
line << c;
++n;
}
}
is.close();
return 0;
}
this shows how you'd do it in c++... read successive chunks from the end of the file, then scan the chunks for new lines. if a newline isn't found, part of the chunk has to be kept around and combined with the next chunk read in...
//
// USAGE: lastln COUNT [FILE]
//
// Print at most COUNT lines from the end of FILE or standard input.
// If COUNT is -1, all lines are printed.
//
#include <errno.h>
#include <libgen.h>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
int main(int argc, char **argv)
{
int ret = 0, maxLines = -1, len, count = 0, sz = 4096, lines = 0, rd;
istream *is;
ifstream ifs;
stringstream ss;
char *buf = NULL;
const char *prog = (argc > 0 && argv[0] ? basename(argv[0]) : "");
string line;
if (argc > 1) {
if ((maxLines = atoi(argv[1])) == 0) {
goto end;
}
}
if (argc > 2 && !(argv[2] && argv[2][0] == '-' && argv[2][1] == '\0')) {
ifs.open(argv[2], ios::in | ios::binary);
if (!ifs) {
ret = 1;
cerr << prog << ": " << argv[2] << ": " << strerror(errno) << endl;
goto end;
}
is = &ifs;
} else {
ss << cin.rdbuf();
if (!ss) {
ret = 1;
cerr << prog << ": failed to read input" << endl;
goto end;
}
is = &ss;
}
is->seekg(0, ios::end);
len = is->tellg();
buf = new char[sz + 1];
while (rd = min(len - count, sz)) {
is->seekg(0 - count - rd, ios::end);
is->read(buf, rd);
count += rd;
char *p = buf + rd, *q;
*p = '\0';
for (;;) {
q = (char *)memrchr(buf, '\n', p - buf);
if (q || count == len) {
if (q) *q = '\0';
if (lines || p - q - 1 > 0 || !q) {
++lines;
cout << lines << ": " << (q ? q + 1 : buf) << line << endl;
line.clear();
if (lines >= maxLines && maxLines != -1) break;
}
if (q) p = q; else break;
} else {
line = string(buf, p - buf) + line;
break;
}
}
}
end:
if (buf) delete[] buf;
return ret;
}