Hex value comparison and save to file C++ - c++

I am reading H.264 bitstream as Hex file in c++. I want to insert some string whenever some certain condition met.Like in the attached image if hex value of 00 00 00 01 occurs anywhere in the file i want to add some string like ABC before 00 00 00 01 in the file and save this as a new file. Write now my approach is to read h.264 file as hex. convert it into string and make a string comparison. if there is a way i can do a straight hex comparison? Here is my current code
#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <iomanip>
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
unsigned char x;
string s1,s2,s3;
s2="Mushahid Hussain";
s3="0000000141";
std::ifstream input("d:\\Jm\\videos\\trying2.264", std::ios::binary);
input >> std::noskipws;
while (input >> x) {
long constant = 0x0000000168;
std::ostringstream buffer;
buffer << std::hex << std::setw(2) << std::setfill('0')
<< (int)x;
s1=buffer.str();
if (s1.find(s1) != std::string::npos) {
cout<<"hello";
s1+=s2;
}
std::ofstream outfile;
outfile.open("d:\\Jm\\bin\\trying5.264", std::ios_base::app);
outfile << s1;
}
return 0;
}
Edit 1
As answered by Tommylee2k i am able to append string . But problem is that at the end of file hex CD value is appending like shown in the attached image.

perhaps a better approach is to read the file binary into a memory buffer, and find the memcmp() instead.
When you found your pattern, you write the block ahead of the match, then your "ABC"-string, and continue searching the rest of the file
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include <malloc.h>
char pattern[4]= { 0x00,0x00,0x01,0x67 };
char *memfind(char *s, int len, char *p, int plen) {
int n=0;
char *pos = s;
while ((pos-s)<(len-plen)) {
while ( *(pos+n) == *(p+n) && n<=plen) n++;
if (n==plen)
return pos;
pos++;n=0;
}
return NULL;
}
int main() {
FILE *in = fopen("in.vid", "r+");
FILE *out = fopen("out.vid", "wb");
// get Filesize
size_t size = 0;
fseek(in, 0L, SEEK_END);
size = ftell(in);
// read whole file in
char *buffer = malloc(size);
fseek (in, 0L, SEEK_SET);
fread (buffer, size, 1, in);
char *currentPos = buffer;
char *found;
if (buffer) {
while (1) {
found = memfind(currentPos, size-(currentPos-buffer), pattern, sizeof(pattern));
if (found==NULL) break;
fwrite(currentPos, 1, (size_t) (found-currentPos), out);
fwrite("ABC", sizeof("ABC"), 1, out);
fwrite(pattern, sizeof(pattern),1,out);
currentPos=found+4;
}
fwrite (currentPos, 1, (size_t) size - (currentPos-buffer), out);
free(buffer);
}
fclose (in);
fclose (out);
return 0;
}

Related

Cant copy the whole text file to char array

I am trying to copy a whole text file into char array using fstream but even upon increasing the size of the array it reads the text file to same limit .i am bount to save it in a char array and it will be good if it is not a dynamic one ??? any solution please ...
// smallGrams.cpp : Defines the entry point for the console application.
//
//#include "stdafx.h"
#include<iostream>
using namespace std;
#include<string>
#include<fstream>
void readInput(const char* Path);
void removePunctucationMarks();
void removeSpacing();
void insertDots();
char * getText();
void generateUnigrams();
void generateBigrams();
void generateTrigrams();
double validateSentance(string str);
string sentenceCreation(int position);
int main()
{
char *path="alice.txt";
readInput(path);
return 0;
}
void readInput(const char* Path)
{
ifstream infile;
infile.open(Path);
if(!infile.fail())
cout<<"File opened successfully"<<endl;
else
cout<<"File failed to open"<<endl;
int arrSize=100000000;
char *arr=new char[arrSize];
int i=0;
while(!infile.eof()&&i<arrSize)
{
infile.get(arr[i]);
i++;
}
arr[i-1]='\0';
for(short i=0;i<arrSize&&arr[i]!='\0';i++)
{
cout<<arr[i];
}
}
This is a C style solution that works. It checks the file size and then allocate the necessary memory for the array and reads all the content of the file in one call. The fread() call returns the number of bytes you requested or an error has ocurred (check fread() reference)
# include <cstring>
# include <cstdlib>
# include <cstdio>
int main(int argc, char *argv[]) {
char *data;
int data_len;
FILE *fd;
fd = fopen ("file.txt", "r");
if (fd == NULL) {
// error
return -1;
}
fseek (fd , 0 , SEEK_END);
data_len = ftell (fd);
rewind (fd);
data = (char *) malloc ((data_len + 1) * sizeof (char));
memset (data, data_len + 1, NULL);
if (fread (data, sizeof (char), data_len, fd) != data_len) {
// error
return -1;
}
printf ("%s\n", data);
fclose (fd);
free (data);
return 0;
}
Here with a simple doubling method...
#include<iostream>
#include<string>
#include<fstream>
#include <cstdint>
#include <cstring>
using namespace std;
void readInput(const char* Path)
{
ifstream infile;
infile.open(Path);
if(!infile.fail())
cout<<"File opened successfully"<<endl;
else{
cout<<"File failed to open"<<endl;
return;
}
int capacity=1000;
char *arr=new char[capacity];
char *temp;
int i=0;
while(infile >> arr[i])
{
i++;
if ( i >= capacity ) {
temp = new char[capacity*2];
std::memcpy(temp , arr, capacity);
delete [] arr;
arr = temp;
capacity *=2;
}
}
}
int main()
{
char *path="alice.txt";
readInput(path);
return 0;
}
The error could when you read and display the array content using the for loop and not on reading the data from file.
Use int instead of short in for loop, as short can increment upto 32768, only.

C++ reading a file prints nothing to console

I'm having trouble printing the contents of a file to console.
file.bin contents are "abc".
data holds value, but it just doesn't print it...
#include <Windows.h>
#include <iostream>
int main()
{
wchar_t *data;
FILE* file;
int err = _wfopen_s(&file, L"file.bin", L"rb");
if (err != 0)
{
std::cout << "Error";
return 0;
}
fseek(file, 0, SEEK_END);
long lSize;
lSize = ftell(file);
rewind(file);
data = (wchar_t *)malloc(lSize + 1);
fread(data, 1, lSize, file);
//dereference pointer
wchar_t data2 = *data;
std::wcout << data2; // prints nothing...
system("PAUSE");
return 0;
}
EDIT
I know about fstream but I would really prefer C style opening/reading files.
#include <fstream>
#include <string>
#include <iostream>
int main()
{
std::ifstream ifs("file.bin");
std::string content( (std::istreambuf_iterator<char>(ifs) ),
(std::istreambuf_iterator<char>() ) );
std::cout<<content;
return 0;
}
Use std::ifstream if you're using c++. You're making this much more complicated then you need to. See this former answer.

Possible memory leak in string memory allocation

This is my code:
#include <string>
#include <iostream>
#include <cstdio>
#include <cstdlib>
std::string & fileread(const char * name)
{
FILE *fp = fopen(name,"rb");
size_t sz;
int i;
char *buff;
fseek(fp, 0, SEEK_END);
sz = ftell(fp);
fseek(fp, 0, SEEK_SET);
buff = (char *)malloc(sizeof(char)*(sz+1));
buff[sz] = '\0';
fread(buff,sz,1,fp);
std::string * rtstr = new std::string(buff);
free(buff);
fclose(fp);
return * rtstr;
}
int main(int argc,char * argv[])
{
std::string file_info(fileread(argv[1]));
std::cout<<file_info << std::endl;
return 0;
}
It is simply read one file, and print its content to screen.
In the function fileread, I use new std::string(buff); to get a std::string *, and return the point back. Will it cause memory leak? And if the answer is 'yes' , how to avoid it?
About use C in C++: fread is much faster than ifstream (tested with 1 billion random number)
My problem is about memory leak.
Return std::string by value. Don't worry, C++ will take care of not copying the object redundantly (unless you have a very old compiler).
Here is the code, fixed:
#include <string>
#include <iostream>
#include <cstdio>
#include <cstdlib>
std::string fileread(const char * name)
{
FILE *fp = fopen(name,"rb");
size_t sz;
int i;
char *buff;
fseek(fp, 0, SEEK_END);
sz = ftell(fp);
fseek(fp, 0, SEEK_SET);
buff = (char *)malloc(sizeof(char)*(sz+1));
buff[sz] = '\0';
fread(buff,sz,1,fp);
std::string rtstr(buff);
free(buff);
fclose(fp);
return * rtstr;
}
int main(int argc,char * argv[])
{
std::string file_info(fileread(argv[1]));
std::cout<<file_info << std::endl;
return 0;
}
I made only the small change necessary and ignored any other problems you might have in your code. Take care.
As Nawaz rightly commented: "DONT code C in C++. Use std::ifstream and std::string (not std::string*)". Here is the code in C++ to avoid all the issues:
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main () {
string line;
ifstream myfile ("myfile.txt");
if (myfile.is_open())
{
while ( getline (myfile,line) )
{
cout << line << '\n';
}
myfile.close();
}
else cout << "Unable to open file";
return 0;
}

C++ Read large data, parse, then write data

I'm trying to read a large dataset, format it the way I need, and then write it to another file. I'm trying to use C++ over SAS or STATA for the speed advantage. The data file are usually around 10gigabytes. And my current code takes over an hour to run (and then I kill it because I'm sure that something is very inefficient with my code.
Is there a more efficient way to do this? Maybe read the file into memory and then analyze it using the switch statements? (I have 32gb ram linux 64bit). Is it possible that reading, and then writing within the loop slows it down since it is constantly reading, then writing? I tried to read it from one drive, and then write to another in an attempt to speed this up.
Are the switch cases slowing it down?
The process I have now reads the data using getline, uses the switch statement to parse it correctly, and then writes it to my outfile. And repeats for 300 million lines. There are about 10 more cases in the switch statement, but I didn't copy for brevity's sake.
The code is probably very ugly all being in the main function, but I wanted to get it working before I worked on attractiveness.
I've tried using read() but without any success. Please let me know if I need to clarify anything.
Thank you for the help!
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <stdio.h>
//#include <cstring>
//#include <boost/algorithm/string.hpp>
#include <vector>
using namespace std;
//using namespace boost;
struct dataline
{
char type[0];
double second;
short mill;
char event[1];
char ticker[6];
char marketCategory[1];
char financialStatus[1];
int roundLotSize;
short roundLotOnly;
char tradingState[1];
char reserved[1];
char reason[4];
char mpid[4];
char primaryMarketMaker[1];
char primaryMarketMode[1];
char marketParticipantState[1];
unsigned long orderNumber;
char buySell[0];
double shares;
float price;
int executedShares;
double matchNumber;
char printable[1];
double executionPrice;
int canceledShares;
double sharesBig;
double crossPrice;
char crossType[0];
double pairedShares;
double imbalanceShares;
char imbalanceDirection[1];
double fairPrice;
double nearPrice;
double currentReferencePrice;
char priceVariationIndicator[1];
};
int main ()
{
string a;
string b;
string c;
string d;
string e;
string f;
string g;
string h;
string k;
string l;
string times;
string smalltimes;
short time; //counter to keep second filled
short smalltime; //counter to keep millisecond filled
double N;
double NN;
double NNN;
int length;
char M;
//vector<> fout;
string line;
ofstream fout ("/media/3tb/test.txt");
ifstream myfile;
myfile.open("S050508-v3.txt");
dataline oneline;
if (myfile.is_open())
{
while ( myfile.good() )
{
getline (myfile,line);
// cout << line<<endl;;
a=line.substr(0,1);
stringstream ss(a);
char type;
ss>>type;
switch (type)
{
case 'T':
{
if (type == 'T')
{
times=line.substr(1,5);
stringstream s(times);
s>>time;
//oneline.second=time;
//oneline.second;
//cout<<time<<endl;
}
else
{
time=time;
}
break;
}
case 'M':
{
if (type == 'M')
{
smalltimes=line.substr(1,3);
stringstream ss(smalltimes);
ss>>smalltime; //oneline.mill;
// cout<<smalltime<<endl; //smalltime=oneline.mill;
}
else
{
smalltime=smalltime;
}
break;
}
case 'R':
{
oneline.second=time;
oneline.mill=smalltime;
a=line.substr(0,1);
stringstream ss(a);
ss>>oneline.type;
b=line.substr(1,6);
stringstream sss(b);
sss>>oneline.ticker;
c=line.substr(7,1);
stringstream ssss(c);
ssss>>oneline.marketCategory;
d=line.substr(8,1);
stringstream sssss(d);
sssss>>oneline.financialStatus;
e=line.substr(9,6);
stringstream ssssss(e);
ssssss>>oneline.roundLotSize;
f=line.substr(15,1);
stringstream sssssss(f);
sssssss>>oneline.roundLotOnly;
*oneline.tradingState=0;
*oneline.reserved=0;
*oneline.reason=0;
*oneline.mpid=0;
*oneline.primaryMarketMaker=0;
*oneline.primaryMarketMode=0;
*oneline.marketParticipantState=0;
oneline.orderNumber=0;
*oneline.buySell=0;
oneline.shares=0;
oneline.price=0;
oneline.executedShares=0;
oneline.matchNumber=0;
*oneline.printable=0;
oneline.executionPrice=0;
oneline.canceledShares=0;
oneline.sharesBig=0;
oneline.crossPrice=0;
*oneline.crossType=0;
oneline.pairedShares=0;
oneline.imbalanceShares=0;
*oneline.imbalanceDirection=0;
oneline.fairPrice=0;
oneline.nearPrice=0;
oneline.currentReferencePrice=0;
*oneline.priceVariationIndicator=0;
break;
}//End Case
}//End Switch
}//end While
myfile.close();
}//End If
else cout << "Unable to open file";
cout<<"Junk"<<endl;
return 0;
}
UPDATE So I've been trying to use memory map, but now I'm getting a segmentation fault.
I've been trying to follow different examples to piece together something that would work for mine. Why would I be getting a segmentation fault? I've taken the first part of my code, which looks like this:
int main (int argc, char** path)
{
long i;
int fd;
char *map;
char *FILEPATH = path;
unsigned long FILESIZE;
FILE* fp = fopen(FILEPATH, "/home/brian/Desktop/S050508-v3.txt");
fseek(fp, 0, SEEK_END);
FILESIZE = ftell(fp);
fseek(fp, 0, SEEK_SET);
fclose(fp);
fd = open(FILEPATH, O_RDONLY);
map = (char *) mmap(0, FILESIZE, PROT_READ, MAP_SHARED, fd, 0);
char z;
stringstream ss;
for (long i = 0; i <= FILESIZE; ++i)
{
z = map[i];
if (z != '\n')
{
ss << z;
}
else
{
// c style tokenizing
ss.str("");
}
}
if (munmap(map, FILESIZE) == -1) perror("Error un-mmapping the file");
close(fd);
The data file are usually around 10gigabytes.
...
Are the switch cases slowing it down?
Almost certainly not, smells like you're I/O bound. But you should consider measuring it. Modern CPUs have performance counters which are pretty easy to leverage with the right tools. But let's start to partition the problems into some major domains: I/O to devices, load/store to memory, CPU. You can place some markers in your code where you read a clock in order to understand how long each of the operations are taking. On linux you can use clock_gettime() or the rdtsc instruction to access a clock with higher precision than the OS tick.
Consider mmap/CreateFileMapping, either of which might provide better efficiency/throughput to the pages you're accessing.
Consider large/huge pages if streaming through large amounts of data which has already been paged in.
From the manual for mmap():
Description
mmap() creates a new mapping in the virtual address space of the
calling process. The starting address for the new mapping is specified
in addr. The length argument specifies the length of the mapping.
Here's an mmap() example:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
int *map; /* mmapped array of int's */
fd = open(FILEPATH, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = mmap(0, FILESIZE, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Read the file int-by-int from the mmap
*/
for (i = 1; i <=NUMINTS; ++i) {
printf("%d: %d\n", i, map[i]);
}
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}

Extract hexdump or RAW data of a file to text

I was wondering if there is a way to output the hexdump or raw data of a file to txt file.
for example
I have a file let's say "data.jpg" (the file type is irrelevant) how can I export the HEXdump (14ed 5602 etc) to a file "output.txt"?
also how I can I specify the format of the output for example, Unicode or UTF?
in C++
You can use a loop, fread and fprintf: With read you get the byte-value of the bytes, then with fprintf you can use the %x to print hexadecimal to a file.
http://www.cplusplus.com/reference/clibrary/cstdio/fread/
http://www.cplusplus.com/reference/clibrary/cstdio/fprintf/
If you want this to be fast you load whole machine-words (int or long long) instead of single bytes, if you want this to be even faster you fread a whole array, then sprintf a whole array, then fprintf that array to the file.
Maybe something like this?
#include <sstream>
#include <iostream>
#include <iomanip>
#include <iterator>
#include <algorithm>
int main()
{
std::stringstream buffer( "testxzy" );
std::istreambuf_iterator<char> it( buffer.rdbuf( ) );
std::istreambuf_iterator<char> end; // eof
std::cout << std::hex << std::showbase;
std::copy(it, end, std::ostream_iterator<int>(std::cout));
std::cout << std::endl;
return 0;
}
You just have to replace buffer with an ifstream that reads the binary file, and write the output to a textfile using an ofstream instead of cout.
This is pretty old -- if you want Unicode, you'll have to add that yourself.
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char **argv) {
unsigned long offset = 0;
FILE *input;
int bytes, i, j;
unsigned char buffer[16];
char outbuffer[60];
if ( argc < 2 ) {
fprintf(stderr, "\nUsage: dump filename [filename...]");
return EXIT_FAILURE;
}
for (j=1;j<argc; ++j) {
if ( NULL ==(input=fopen(argv[j], "rb")))
continue;
printf("\n%s:\n", argv[j]);
while (0 < (bytes=fread(buffer, 1, 16, input))) {
sprintf(outbuffer, "%8.8lx: ", offset+=16);
for (i=0;i<bytes;i++) {
sprintf(outbuffer+10+3*i, "%2.2X ",buffer[i]);
if (!isprint(buffer[i]))
buffer[i] = '.';
}
printf("%-60s %*.*s\n", outbuffer, bytes, bytes, buffer);
}
fclose(input);
}
return 0;
}