using fwrite() & fread() for binary in/output - c++

I'm using a large array of floats. After a lot of fiddling I've managed to write it to a binary file. When opening that file at a later time, the reading process only reads a couple of handfuls of floats (according to the return-value of fread(), and it's all values 0.0f). The reading is supposed to put the floats into an (the original) array, and it does not contain the original values.
I'm using Code::Blocks and MinGW doing a program in the 32bit realm on a 64bit pc .. and I'm not very proficient on c/c++ and pointers.
#include<...>
const int mSIZE = 6000 ;
static float data[mSIZE*mSIZE] ;
void myUseFunc(){
const char *chN = "J:/path/flt_632_55.bin" ;
FILE *outFile=NULL ;
# .. sucessfully filling and using data[]
..
size_t st = mSIZE*mSIZE ;
outFile = fopen( chN , "w" ) ;
if(!outFile){ printf("error opening file %s \n", chN); exit(0);}
else{
size_t indt;
indt = fwrite( data , sizeof(float), st , outFile );
std::cout << "floats written to file: " << indt << std::endl ;
#.. value shows that all values ar written
# and a properly sized file has appeared at the proper place
}
fclose( outFile ) ;
}
void myLoadFunc( const char *fileName){
FILE *inFile = NULL ;
inFile = fopen( fileName, "r");
if(!inFile){ printf("error opening file %s \n", fileName); exit(0); }
size_t blok = mSIZE*mSIZE ;
size_t out;
out = fread( dataOne, sizeof(GLfloat), blok , inFile);
fclose(inFile);
if(out != blok){
std::cout<< out << std::endl ;
fputs ("Reading error",stderr);
# no stderr presented at the console ..
printf("some error\n") ;
exit(0);
# .. program exits at out=14
}
...
}
int main( ){
...
const char *FileName = "J:/path/flt_632_55.bin" ;
myLoadFunc( FileName ) ;
...
}

You are not writing to/reading from a binary file, you open the files as text files.
You need to add the "b" to the open mode, like
outFile = fopen( chN , "wb" ) ;

Related

Splitting csv large files into small files with dynamic names using C++

I am a beginner, so I apologise if my question looks childish. I have 38 large files in a folder. I want to split each of the files into smaller parts with dynamic name. Line 1 to line 13 works well. The challenge is in line 16-19. The output shows that the whole data from the ifstream is not appearing as char. This error makes it difficult to split the files. Please what am I getting wrong
#define SEGMENT 728300 //approximate target size of small file
using namespace std;
long file_size(char *name);//function definition below
int main(int argc, char **argv)
{
char input_file_1[100]; // input file
strcpy(input_file_1,argv[1]);
string PathToData = "path to the files";
TString name = PathToData+input_file_1;
std::cout << "Reading file " << name << endl;
char getdata[35000];
ifstream csv_db(name);
while(csv_db.getline(getdata,sizeof(csv_db)))
if (csv_db.eof())
csv_db.close();
int segments=0, i, accum;
FILE *fp1, *fp2;
unsigned int huga=strlen(getdata);
char largeFileName[huga + 100]; // Make sure there's enough space
strcpy(largeFileName, getdata);
std::cout << largeFileName << endl;
std::cout << largeFileName << endl;
long sizeFile = file_size(largeFileName);
segments = sizeFile/SEGMENT + 1980;//ensure end of file
char filename[360]={"path to folder where to keep the result"};
char smallFileName[360];
char line[1080];
fp1 = fopen(largeFileName, "r");
if(fp1)
{
for(i=1980;i<segments;i++)
{
accum = 0;
sprintf(smallFileName, "%s%d.csv", filename, i);
fp2 = fopen(smallFileName, "w");
if(fp2)
{
while(fgets(line, 1080, fp1) && accum <= SEGMENT)
{
accum += strlen(line);//track size of growing file
fputs(line, fp2);
}
fclose(fp2);
}
}
fclose(fp1);
}
return 0;
}
long file_size(char *name)
{
FILE *fp = fopen(name, "rb"); //must be binary read to get bytes
long size=-1;
if(fp)
{
fseek (fp, 0, SEEK_END);
size = ftell(fp)+1;
fclose(fp);
}
return size;
}

Convert the Linux open, read, write, close functions to work on Windows

The code below was written for Linux and uses open, read, write and close. I am working on a Windows computer where I normally use fopen, fgets, fputs, fclose. Right now I get a no prototype error for open, read, write and close. Is there a header file I can include to make this work on a Windows computer or do I need to convert the code? Can you show how to convert it so it works the same on Windows or at least point me to an online document which shows how to convert it?
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef unix
#include <unistd.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifndef O_BINARY
#define O_BINARY 0
#endif
#define NB 8192
char buff[NB];
int
main(argc,argv)
int argc;
char **argv;
{
int fdi, fdo, i, n, m;
char *p, *q;
char c;
if( argc > 0 )
printf( "%s: Reverse bytes in 8-byte values \n", argv[0] );
if( argc > 1 )
strcpy( buff, argv[1] );
else
{
printf( "Input file name ? " );
gets( buff );
}
fdi = open( buff, O_BINARY | O_RDONLY, S_IREAD );
if( fdi <= 0 )
{
printf( "Can't open <%s>\n", buff );
exit(2);
}
if( argc > 2 )
strcpy( buff, argv[2] );
else
{
printf( "Output file name ? " );
gets( buff );
}
fdo = open( buff, O_BINARY | O_RDWR | O_CREAT | O_TRUNC,
S_IREAD | S_IWRITE );
if( fdo <= 0 )
{
printf( "Can't open <%s>\n", buff );
exit(2);
}
while( (n = read( fdi, buff, NB )) > 0 )
{
m = n / 8;
p = buff;
q = buff+7;
for( i=0; i<m; i++ )
{
c = *p;
*p++ = *q;
*q-- = c;
c = *p;
*p++ = *q;
*q-- = c;
c = *p;
*p++ = *q;
*q-- = c;
c = *p;
*p++ = *q;
*q-- = c;
p += 4;
q += 12;
}
write( fdo, buff, n );
}
close( fdo );
close( fdi );
exit(0);
}
Microsoft directly supports POSIX-style low-level IO calls such as open(), read(), , write(), and close(); although with what appears to be a misleading "deprecated" characterization.
The required header is <io.h>.
The calls correspond to functions named with a preceeding underscore, so open() maps to _open().
The full list of supported "low-level" IO functions Microsoft supports are:
Low-Level I/O
Low-Level I/O Functions
Function Use
_close Close file
_commit Flush file to disk
_creat, _wcreat Create file
_dup Return next available file descriptor for given file
_dup2 Create second descriptor for given file
_eof Test for end of file
_lseek, _lseeki64 Reposition file pointer to given location
_open, _wopen Open file
_read Read data from file
_sopen, _wsopen, _sopen_s, _wsopen_s Open file for file sharing
_tell, _telli64 Get current file-pointer position
_umask, _umask_s Set file-permission mask
_write Write data to file
Some of the low-level functions may not have a non-underscore, POSIX-style equivalent name.
The corresponding functions in Windows use the same name but with an underscore (_) prepended to the name.
open -> _open
close -> _close
etc.
They are declared in the header io.h. See https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx for the list of all supported functions.
Borland C++ Builder encapsulated binary file access functions into:
FileOpen
FileCreate
FileRead
FileWrite
FileSeek
FileClose
Here simple example of loading text file:
BYTE *txt=NULL; int hnd=-1,siz=0;
hnd = FileOpen("textfile.txt",fmOpenRead);
if (hnd!=-1)
{
siz=FileSeek(hnd,0,2); // position to end of file (0 bytes from end) and store the offset to siz which means size of file
FileSeek(hnd,0,0); // position to start of file (0 bytes from start)
txt = new BYTE[siz];
FileRead(hnd,txt,siz); // read siz bytes to txt buffer
FileClose(hnd);
}
if (txt!=NULL)
{
// here do your stuff with txt[siz] I save it to another file
hnd = FileCreate("output.txt");
if (hnd!=-1)
{
FileWrite(hnd,txt,siz); // write siz bytes to txt buffer
FileClose(hnd);
}
delete[] txt;
}
IIRC All these are part of VCL so in case you are using console you need to set VCL include check during the project creation or include it manually.

Binary (de-)serialization of a matrix using fwrite/fread doesn't work

I'm trying to write/read a double matrix to file as binary data but I'm not getting the correct values when reading.
I'm not sure if this is the correct procedure to do it with matrices.
Here's the code that I'm using to write it:
void writeMatrixToFileBin(double **myMatrix, int rows, int colums){
cout << "\nWritting matrix A to file as bin..\n";
FILE * pFile;
pFile = fopen ( matrixOutputName.c_str() , "wb" );
fwrite (myMatrix , sizeof(double) , colums*rows , pFile );
fclose (pFile);
}
Here's the code that I'm using to read it:
double** loadMatrixBin(){
double **A; //Our matrix
cout << "\nLoading matrix A from file as bin..\n";
//Initialize matrix array (too big to put on stack)
A = new double*[nRows];
for(int i=0; i<nRows; i++){
A[i] = new double[nColumns];
}
FILE * pFile;
pFile = fopen ( matrixFile.c_str() , "rb" );
if (pFile==NULL){
cout << "Error opening file for read matrix (BIN)";
}
// copy the file into the buffer:
fread (A,sizeof(double),nRows*nColumns,pFile);
// terminate
fclose (pFile);
return A;
}
It doesn't work because myMatrix is not a single continuous memory area, it's an array of pointers. You have to write (and load) in a loop:
void writeMatrixToFileBin(double **myMatrix, int rows, int colums){
cout << "\nWritting matrix A to file as bin..\n";
FILE * pFile;
pFile = fopen ( matrixOutputName.c_str() , "wb" );
for (int i = 0; i < rows; i++)
fwrite (myMatrix[i] , sizeof(double) , colums , pFile );
fclose (pFile);
}
Similar when reading.

Bus Error on fopen()?

I am using Totalview to debug some code, and it is stopping with a Bus Error.
What is this, and how do I fix it? here is the code snip
In my main:
char *infilename = "/home/dcole/Images/lena1024s.jpg";
/* Try opening a jpeg*/
if( read_jpeg_file( infilename ) > 0 )
{
//do some stuff
}
The function:
int read_jpeg_file( const char *filename )
{
/* these are standard libjpeg structures for reading(decompression) */
struct jpeg_decompress_struct cinfo;
struct jpeg_error_mgr jerr;
/* libjpeg data structure for storing one row, that is, scanline of an image */
JSAMPROW row_pointer[1];
FILE *infile = fopen( filename, "rb" ); //this line is where the debugger stops with a Bus Error
unsigned long location = 0;
int i = 0;
if ( !infile )
{
printf("Error opening jpeg file %s\n!", filename );
return -1;
}
return 1;
}
The file I am passing in does exist. I can see that it shows up as the right sting to the full path in the debugger.
I think you're having a memory access problem because of the filename variable...
Instead of:
char *infilename = "/home/dcole/Images/lena1024s.jpg";
try using:
char infilename[] = "/home/dcole/Images/lena1024s.jpg";

How to read a .gz file line-by-line in C++?

I have 3 terabyte .gz file and want to read its uncompressed content line-by-line in a C++ program. As the file is quite huge, I want to avoid loading it completely in memory.
Can anyone post a simple example of doing it?
You most probably will have to use ZLib's deflate, example is available from their site
Alternatively you may have a look at BOOST C++ wrapper
The example from BOOST page (decompresses data from a file and writes it to standard output)
#include <fstream>
#include <iostream>
#include <boost/iostreams/filtering_streambuf.hpp>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/zlib.hpp>
int main()
{
using namespace std;
ifstream file("hello.z", ios_base::in | ios_base::binary);
filtering_streambuf<input> in;
in.push(zlib_decompressor());
in.push(file);
boost::iostreams::copy(in, cout);
}
For something that is going to be used regularly, you probably want to use one of the previous suggestions. Alternatively, you can do
gzcat file.gz | yourprogram
and have yourprogram read from cin. This will decompress parts of the file in memory as it is needed, and send the uncompressed output to yourprogram.
Using zlib, I'm doing something along these lines:
// return a line in a std::vector< char >
std::vector< char > readline( gzFile f ) {
std::vector< char > v( 256 );
unsigned pos = 0;
for ( ;; ) {
if ( gzgets( f, &v[ pos ], v.size() - pos ) == 0 ) {
// end-of-file or error
int err;
const char *msg = gzerror( f, &err );
if ( err != Z_OK ) {
// handle error
}
break;
}
unsigned read = strlen( &v[ pos ] );
if ( v[ pos + read - 1 ] == '\n' ) {
if ( pos + read >= 2 && v[ pos + read - 2 ] == '\r' ) {
pos = pos + read - 2;
} else {
pos = pos + read - 1;
}
break;
}
if ( read == 0 || pos + read < v.size() - 1 ) {
pos = read + pos;
break;
}
pos = v.size() - 1;
v.resize( v.size() * 2 );
}
v.resize( pos );
return v;
}
EDIT: Removed two mis-copied * in the example above.
EDIT: Corrected out of bounds read on v[pos + read - 2]
The zlib library supports decompressing files in memory in blocks, so you don't have to decompress the entire file in order to process it.
Here is some code with which you can read normal and zipped files line by line:
char line[0x10000];
FILE *infile=open_file(file);
bool gzipped=endsWith(file, ".gz");
if(gzipped)
init_gzip_stream(infile,&line[0]);
while (readLine(infile,line,gzipped)) {
if(line[0]==0)continue;// skip gzip new_block
printf(line);
}
#include <zlib.h>
#define CHUNK 0x100
#define OUT_CHUNK CHUNK*100
unsigned char gzip_in[CHUNK];
unsigned char gzip_out[OUT_CHUNK];
///* These are parameters to inflateInit2. See http://zlib.net/manual.html for the exact meanings. */
#define windowBits 15
#define ENABLE_ZLIB_GZIP 32
z_stream strm = {0};
z_stream init_gzip_stream(FILE* file,char* out){// unsigned
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.next_in = gzip_in;
strm.avail_in = 0;
strm.next_out = gzip_out;
inflateInit2 (& strm, windowBits | ENABLE_ZLIB_GZIP);
return strm;
}
bool inflate_gzip(FILE* file, z_stream strm,size_t bytes_read){
strm.avail_in = (int)bytes_read;
do {
strm.avail_out = OUT_CHUNK;
inflate (& strm, Z_NO_FLUSH);
// printf ("%s",gzip_out);
}while (strm.avail_out == 0);
if (feof (file)) {
inflateEnd (& strm);
return false;
}
return true;// all OK
}
char* first_line=(char*)&gzip_out[0];
char* current_line=first_line;
char* next_line=first_line;
char hangover[1000];
bool readLine(FILE* infile,char* line,bool gzipped){
if(!gzipped)
return fgets(line, sizeof(line), infile) != NULL;
else{
bool ok=true;
current_line=next_line;
if(!current_line || strlen(current_line)==0 || next_line-current_line>OUT_CHUNK){
current_line=first_line;
size_t bytes_read = fread (gzip_in, sizeof (char), CHUNK, infile);
ok=inflate_gzip(infile,strm,bytes_read);
strcpy(line,hangover);
}
if(ok){
next_line=strstr(current_line,"\n");
if(next_line){
next_line[0]=0;
next_line++;
strcpy(line+strlen(hangover),current_line);
hangover[0]=0;
}else{
strcpy(hangover,current_line);
line[0]=0;// skip that one!!
}
}
return ok;
}
}
You can't do that, because *.gz doesn't have "lines".
If compressed data has newlines, you'll have to decompress it. You don't have to decompress all data at once, you know, you can do it in chunks, and send strings back to main program when you encounter newline characters. *.gz can be decompressed using zlib.
Chilkat (http://www.chilkatsoft.com/) has libraries to read compressed files from a C++, .Net, VB, ... application.