fscanf line with condition - c++

my goal is to read in a data file consisting of just one number per line and write the data into a histogram. There are some comments in the file behind # characters. I want to skip these lines.
I have started writing:
TH1F *hist = new TH1F("hist","",4096, -0.5,4095.5);
//TF1 *fitfunc;
char filename[100];
double val;
int i;
char line[256];
sprintf(filename,"test.dat");
FILE* pfile = fopen(filename, "r");
for (i=0;i<=14;i++) {
fgets(line,256,pfile);
cout<<line<<endl;
fscanf(pfile, "%lf /n", &val);
hist->SetBinContent(i,val);
}
But only every other line gets written as "line" while the others are fscanfed.
Would be very nice, if someone could give me a hint.
...so this will obviously not work properly:
TH1F *hist = new TH1F("hist","",4096, -0.5,4095.5);
//TF1 *fitfunc;
char filename[100];
double val;
int i;
char zeile[256];
sprintf(filename,"test.dat");
FILE* pfile = fopen(filename, "r");
for (i=0;i<=14;i++)
{
fgets(zeile,256,pfile);
cout<<"fgets: "<<zeile<<endl;
if (zeile[0]!='#')
{
fscanf(pfile, "%lf /n", &val);
cout<<"val: "<<val<<endl;
hist->SetBinContent(i,val);
}
}

You need to use sscanf() instead of fscanf() after you've read the line with fgets():
TH1F *hist = new TH1F("hist", "", 4096, -0.5, 4095.5);
char filename[100];
char zeile[256];
sprintf(filename, "test.dat");
FILE *pfile = fopen(filename, "r");
if (pfile == 0)
…handle error; do not continue…
for (int i = 0; i < 14 && fgets(zeile, sizeof(zeile), pfile) != 0; i++)
{
cout << "fgets: " << zeile << endl;
if (zeile[0] != '#')
{
double val;
if (sscanf(zeile, "%lf", &val) == 1)
{
cout << "val: " << val << endl;
hist->SetBinContent(i, val);
}
// else … optionally report that line was erroneous
}
}
I left the sprintf() for the file name in place, but it provides marginal value. I'd be tempted to use const char *filename = "test.dat"; so that the error message can report the file name that failed to open without repeating the string literal.
Converted into a standalone test program:
#include <cstdlib>
#include <iostream>
using namespace std;
int main()
{
char filename[100];
char zeile[256];
sprintf(filename, "test.dat");
FILE *pfile = fopen(filename, "r");
if (pfile != 0)
{
for (int i = 0; i < 14 && fgets(zeile, sizeof(zeile), pfile) != 0; i++)
{
cout << "fgets: " << zeile;
if (zeile[0] != '#')
{
double val;
if (sscanf(zeile, "%lf", &val) == 1)
cout << "val: " << val << endl;
}
}
fclose(pfile);
}
return 0;
}
and given a test data file test.dat containing:
1.234
2.345
#3.456
#4.567
5.678
the output from the program shown is:
fgets: 1.234
val: 1.234
fgets: 2.345
val: 2.345
fgets: #3.456
fgets: #4.567
fgets: 5.678
val: 5.678
This generates the three expected val lines and reads but ignores the two comment lines.

Related

Splitting csv large files into small files with dynamic names using C++

I am a beginner, so I apologise if my question looks childish. I have 38 large files in a folder. I want to split each of the files into smaller parts with dynamic name. Line 1 to line 13 works well. The challenge is in line 16-19. The output shows that the whole data from the ifstream is not appearing as char. This error makes it difficult to split the files. Please what am I getting wrong
#define SEGMENT 728300 //approximate target size of small file
using namespace std;
long file_size(char *name);//function definition below
int main(int argc, char **argv)
{
char input_file_1[100]; // input file
strcpy(input_file_1,argv[1]);
string PathToData = "path to the files";
TString name = PathToData+input_file_1;
std::cout << "Reading file " << name << endl;
char getdata[35000];
ifstream csv_db(name);
while(csv_db.getline(getdata,sizeof(csv_db)))
if (csv_db.eof())
csv_db.close();
int segments=0, i, accum;
FILE *fp1, *fp2;
unsigned int huga=strlen(getdata);
char largeFileName[huga + 100]; // Make sure there's enough space
strcpy(largeFileName, getdata);
std::cout << largeFileName << endl;
std::cout << largeFileName << endl;
long sizeFile = file_size(largeFileName);
segments = sizeFile/SEGMENT + 1980;//ensure end of file
char filename[360]={"path to folder where to keep the result"};
char smallFileName[360];
char line[1080];
fp1 = fopen(largeFileName, "r");
if(fp1)
{
for(i=1980;i<segments;i++)
{
accum = 0;
sprintf(smallFileName, "%s%d.csv", filename, i);
fp2 = fopen(smallFileName, "w");
if(fp2)
{
while(fgets(line, 1080, fp1) && accum <= SEGMENT)
{
accum += strlen(line);//track size of growing file
fputs(line, fp2);
}
fclose(fp2);
}
}
fclose(fp1);
}
return 0;
}
long file_size(char *name)
{
FILE *fp = fopen(name, "rb"); //must be binary read to get bytes
long size=-1;
if(fp)
{
fseek (fp, 0, SEEK_END);
size = ftell(fp)+1;
fclose(fp);
}
return size;
}

ifstream read reading random char for the whole stream

I'm trying to implement the function from listing 5.1 here
but when copying into a buffer with read from a file I just get the same character (Í) for the whole array, where string.txt is a copy and paste from the previous link content.
Here is my code:
#include <iostream>
#include <fstream>
#include <string>
#include <cinttypes>
#include <cstdio>
#include <cstring>
const int block_size = 0x4000; //16KB
int search(char* buffer, int searchLength, char* stringToSearch, int stringToSearchLength) {
char * potentialMatch;
while (searchLength) {
potentialMatch = reinterpret_cast<char *>(memchr(buffer, *stringToSearch, searchLength));
if (potentialMatch == NULL)
break;
if (stringToSearchLength == 1) {
return 1;
} else {
if (!memcmp(potentialMatch + 1, stringToSearch + 1, stringToSearchLength - 1))
return 1;
}
searchLength -= potentialMatch - buffer + 1;
buffer = potentialMatch + 1;
}
return 0;
}
int main(int argc, char* argv[]) {
char *toSearch = "Interpreting Where";
int done = 0;
int found = 0;
char *buffer;
int64_t fileSizeLeft = 0;
std::ifstream myFile("string.txt");
if (!myFile.fail()) {
buffer = new char[block_size];
myFile.seekg(0, std::ios::end); //Get file's size
fileSizeLeft = myFile.tellg();
} else {
std::cout << "Cannot open file" << std::endl;
return 1;
}
int toSearchLength = strlen(toSearch);
int stringLeft = toSearchLength - 1;
int first_time = 1;
while (!done && fileSizeLeft > toSearchLength) {
if (first_time) {
myFile.read(buffer, block_size);
found = search(buffer, block_size, toSearch, toSearchLength);
} else {
memcpy(buffer, buffer + stringLeft, stringLeft);
myFile.read(buffer+stringLeft, fileSizeLeft-stringLeft);
found = search(buffer, block_size, toSearch, toSearchLength);
}
fileSizeLeft = fileSizeLeft - block_size;
first_time = 0;
}
if (found) {
std::cout << "String found" << std::endl;
} else {
std::cout << "String not found" << std::endl;
}
myFile.close();
delete[] buffer;
return 0;
}
I hope you can help me see what I'm doing wrong, thanks!
You are setting myFile's position to ios_base::end with seekg:
myFile.seekg(0, ios::end);
Then trying to read from it:
myFile.read(buffer, block_size);
Clearly no data will be read since myFile is already at ios_base::end. And you'll be reading whatever uninitialized data that was already in buffer
What you probably intended to do was to set your myFile position back to the beginning by doing this before reading:
myFile.seekg(0, ios::beg);

how to make 10 copies of initial file, if first file is as-1.txt second should be as-2.txt and so on

Loop isn't making 10 copies and i have no idea how to change file names
#include "iostream"
#include "fstream"
#include "windows.h"
using namespace std;
void main()
{
char str[200];
ifstream myfile("as-1.txt");
if (!myfile)
{
cerr << "file not opening";
exit(1);
}
for (int i = 0; i < 10; i++)
{
ofstream myfile2("as-2.txt");
while (!myfile.eof())
{
myfile.getline(str, 200);
myfile2 << str << endl;
}
}
system("pause");
}
Solution using plain C API from <cstdio>. Easily customizable.
const char* file_name_format = "as-%d.txt"; //Change that if you need different name pattern
const char* original_file_name = "as-1.txt"; //Original file
const size_t max_file_name = 255;
FILE* original_file = fopen(original_file_name, "r+");
if(!original_file)
//file not found, handle error
fseek(original_file, 0, SEEK_END); //(*)
long file_size = ftell(original_file);
fseek(original_file, 0, SEEK_SET);
char* original_content = (char*)malloc(file_size);
fread(original_content, file_size, 1, original_file);
fclose(original_file);
size_t copies_num = 10;
size_t first_copy_number = 2;
char file_name[max_file_name];
for(size_t n = first_copy_number; n < first_copy_number + copies_num; ++n)
{
snprintf(file_name, max_file_name, file_name_format, n);
FILE* file = fopen(file_name, "w");
fwrite(original_content, file_size, 1, file);
fclose(file);
}
free(original_content);
(*) As noted on this page, SEEK_END may not necessarily be supported (i.e. it is not a portable solution). However most POSIX-compliant systems (including the most popular Linux distros), Windows family and OSX support this without any problems.
Oh, and one more thing. This line
while (!myfile.eof())
is not quite correct. Read this question - it explains why you shouldn't write such code.
int main()
{
const int copies_of_file = 10;
for (int i = 1; i <= copies_of_file; ++i)
{
std::ostringstream name;
name << "filename as-" << i << ".txt";
std::ofstream ofile(name.str().c_str());
ofile.close();
}
return 0;
}
That will make 10 copies of a blank .txt file named "filename as-1.txt" "filename as-2.txt" etc.
Note also the use of int main: main always has a return of int, never void

Junk values in ofstream using strncpy

I am running the following program below. I am taking the first 63 char values in B.txt and then attaching the float values in A.txt, beginning at the 62nd column in A.txt, at the end of the lines of B.txt
So if B.txt contains:
I am running the following program below. I am taking the firstXXXXXXXX
and A.txt contains:
I am running the following program below. I am taking the fir3.14
I want B.txt to look like:
I am running the following program below. I am taking the first3.14
However, the output I'm getting instead is:
I am running the following program below. I am taking the firstBUNCH OF JUNK3.14
int main()
{
loadfileB("B.txt");
return 0;
}
void loadfileB(char* fileName)
{
FILE* fp = fopen(fileName, "r");
char line[82];
vector<int> rownum;
vector<float> temp;
temp = loadfileA("A.txt");
int i = 0;
ofstream fout("output.txt");
while (fgets(line, 81, fp) != 0)
{
radius=temp[i];
char buffer[64];
strncpy(buffer, line, 63);
fout << buffer<< " " << radius << endl;
i++;
}
fclose(fp);
}
vector<float> loadfileA(char* fileName)
{
FILE* fp = fopen(fileName, "r");
char line[82];
vector<int> rownum;
vector <float> tempvec;
int i = 0;
while (fgets(line, 81, fp) != 0)
{
float temp;
getFloat(line, &temp, 60, 6);
tempvec.push_back(temp);
}
fclose(fp);
return tempvec;
}
void getFloat(char* line, float* d, int pos, int len)
{
char buffer[80];
*d = -1;
strncpy(buffer, &line[pos], len);
buffer[len] = '\0';
sscanf(buffer, "%f", d);
}
strncpy is a bad function to use. This is because it does not null-terminate its output if the input did not fit in the buffer. The garbage you are seeing is the result of passing a non-null-terminated buffer to a function that expected a null-terminated string.
The simplest fix is to replace:
char buffer[64];
strncpy(buffer, line, 63);
with:
std::string buffer = line;
buffer.resize(63);
In your other usage you do null-terminate, however you never check that len is smaller than 80 either. Again the simpler fix would be:
std::string buffer( line + pos, len );
sscanf(buffer.c_str(), "%f", d);
The getFloat function should have some way of signaling error (either a return value; or throw an exception if sscanf does not return 1).
Of course, you could replace a lot of your other C-style code with C++-style code too and avoid buffer size issues entirely.

Is there a better way to search a file for a string?

I need to search a (non-text) file for the byte sequence "9µ}Æ" (or "\x39\xb5\x7d\xc6").
After 5 hours of searching online this is the best I could do. It works but I wanted to know if there is a better way:
char buffer;
int pos=in.tellg();
// search file for string
while(!in.eof()){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='9'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='µ'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='}'){
in.read(&buffer, 1);
pos=in.tellg();
if(buffer=='Æ'){
cout << "found";
}
}
}
}
in.seekg((streampos) pos);
Note:
I can't use getline(). It's not a text file so there are probably not many line breaks.
Before I tried using a multi-character buffer and then copying the buffer to a C++ string, and then using string::find(). This didn't work because there are many '\0' characters throughout the file, so the sequence in the buffer would be cut very short when it was copied to the string.
Similar to what bames53 posted; I used a vector as a buffer:
std::ifstream ifs("file.bin");
ifs.seekg(0, std::ios::end);
std::streamsize f_size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::vector<unsigned char> buffer(f_size);
ifs.read(buffer.data(), f_size);
std::vector<unsigned char> seq = {0x39, 0xb5, 0x7d, 0xc6};
bool found = std::search(buffer.begin(), buffer.end(), seq.begin(), seq.end()) != buffer.end();
If you don't mind loading the entire file into an in-memory array (or using mmap() to make it look like the file is in memory), you could then search for your character sequence in-memory, which is a bit easier to do:
// Works much like strstr(), except it looks for a binary sub-sequence rather than a string sub-sequence
const char * MemMem(const char * lookIn, int numLookInBytes, const char * lookFor, int numLookForBytes)
{
if (numLookForBytes == 0) return lookIn; // hmm, existential questions here
else if (numLookForBytes == numLookInBytes) return (memcmp(lookIn, lookFor, numLookInBytes) == 0) ? lookIn : NULL;
else if (numLookForBytes < numLookInBytes)
{
const char * startedAt = lookIn;
int matchCount = 0;
for (int i=0; i<numLookInBytes; i++)
{
if (lookIn[i] == lookFor[matchCount])
{
if (matchCount == 0) startedAt = &lookIn[i];
if (++matchCount == numLookForBytes) return startedAt;
}
else matchCount = 0;
}
}
return NULL;
}
.... then you can just call the above function on the in-memory data array:
char * ret = MemMem(theInMemoryArrayContainingFilesBytes, numBytesInFile, myShortSequence, 4);
if (ret != NULL) printf("Found it at offset %i\n", ret-theInMemoryArrayContainingFilesBytes);
else printf("It's not there.\n");
This program loads the entire file into memory and then uses std::search on it.
int main() {
std::string filedata;
{
std::ifstream fin("file.dat");
std::stringstream ss;
ss << fin.rdbuf();
filedata = ss.str();
}
std::string key = "\x39\xb5\x7d\xc6";
auto result = std::search(std::begin(filedata), std::end(filedata),
std::begin(key), std::end(key));
if (std::end(filedata) != result) {
std::cout << "found\n";
// result is an iterator pointing at '\x39'
}
}
const char delims[] = { 0x39, 0xb5, 0x7d, 0xc6 };
char buffer[4];
const size_t delim_size = 4;
const size_t last_index = delim_size - 1;
for ( size_t i = 0; i < last_index; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[last_index]) )
{
if ( memcmp( buffer, delims, delim_size ) == 0 )
break; // you are arrived
memmove( buffer, buffer + 1, last_index );
}
You are looking for 4 bytes:
unsigned int delim = 0xc67db539;
unsigned int uibuffer;
char * buffer = reinterpret_cast<char *>(&uibuffer);
for ( size_t i = 0; i < 3; ++i )
{
if ( ! ( is.get( buffer[i] ) ) )
return false; // stream to short
}
while ( is.get(buffer[3]) )
{
if ( uibuffer == delim )
break; // you are arrived
uibuffer >>= 8;
}
Because you said you cannot search the entire file because of null terminator characters in the string, here's an alternative for you, which reads the entire file in and uses recursion to find the first occurrence of a string inside of the whole file.
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
string readFile (char *fileName) {
ifstream fi (fileName);
if (!fi)
cerr << "ERROR: Cannot open file" << endl;
else {
string str ((istreambuf_iterator<char>(fi)), istreambuf_iterator<char>());
return str;
}
return NULL;
}
bool findFirstOccurrenceOf_r (string haystack, char *needle, int haystack_pos, int needle_pos, int needle_len) {
if (needle_pos == needle_len)
return true;
if (haystack[haystack_pos] == needle[needle_pos])
return findFirstOccurrenceOf_r (haystack, needle, haystack_pos+1, needle_pos+1, needle_len);
return false;
}
int findFirstOccurrenceOf (string haystack, char *needle, int length) {
int pos = -1;
for (int i = 0; i < haystack.length() - length; i++) {
if (findFirstOccurrenceOf_r (haystack, needle, i, 0, length))
return i;
}
return pos;
}
int main () {
char str_to_find[4] = {0x39, 0xB5, 0x7D, 0xC6};
string contents = readFile ("input");
int pos = findFirstOccurrenceOf (contents, str_to_find, 4);
cout << pos << endl;
}
If the file is not too large, your best solution would be to load the whole file into memory, so you don't need to keep reading from the drive. If the file is too large to load in at once, you would want to load in chunks of the file at a time. But if you do load in chucks, make sure you check to edges of the chunks. It's possible that your chunk happens to split right in the middle of the string you're searching for.