I have the following C++ code, which tries to read a binary file, and print out the resulting 32 bit values as hex:
// hello.cpp file
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
int main()
{
int file_size; // font file size in bytes
int i;
std::cout << "Hello World\n";
std::string binary_data_file("font.dat");
struct stat statbuff;
stat(binary_data_file.c_str(), &statbuff);
file_size = statbuff.st_size;
void *data_buffer;
posix_memalign(&data_buffer, 4096, file_size);
std::ifstream data_input_file(binary_data_file.c_str(), std::ios::in | std::ios::binary);
data_input_file.read((char *) data_buffer, file_size);
data_input_file.close();
int * debug_buffer = (int * ) data_buffer;
for (int j = 0; j< 148481; j++) {
std::cout << "Data size: " << std::dec << file_size << std::endl;
std::cout << "Element: " << j << " Value: " << std::hex << *(debug_buffer + j) << std::endl;
}
return 0;
}
This code causes a Segmentation Fault when j == 148480
Data size: 211200
Element: 148477 Value: 0
Data size: 211200
Element: 148478 Value: 0
Data size: 211200
Element: 148479 Value: 0
Data size: 211200
Segmentation fault (core dumped)
Why is this the case? Surely the buffer size should be equal to 211200, right, so j should be able to go up to 211200?
You allocated 211200 bytes, but you're trying to access 148481 * sizeof(int) bytes, which is far past the end of the buffer (and past the end of the file content).
Related
The format of the file is:
ITEM: TIMESTEP
0
ITEM: NUMBER OF ATOMS
32768
ITEM: BOX BOUNDS pp pp ff
0.0000000000000000e+00 3.2000000000000000e+01
0.0000000000000000e+00 3.2000000000000000e+01
0.0000000000000000e+00 3.2000000000000000e+01
ITEM: ATOMS type x y z
1 0.292418 1.13983 1.28999
......
I read the header for each timestamp into a dummy string, and value of time step into an array. My code can read two timestamps correctly (65554 lines), but tellg() sets into -1 and I only get the last read values in my output. And also, the file never reaches EOF and my code continues for eternity.
#include <bits/stdc++.h>
using namespace std;
int main(int argc, char** argv)
{
string f = argv[1];int ens=1;string file;
fstream xyz("readas.xyz",ios_base::out); //to write data to cross-check what I read
fstream* Pxyz = &xyz;
float x,y,z,type;
int* time = (int*) malloc(100*sizeof(int));
int step;
string dummy;
while(ens < atoi(argv[2])) //this is to open different files to read
{
file=f+to_string(ens);
cout<<"Reading "+file<<endl;
fstream fobj(file,ios_base::in);
fstream* f= &fobj; //reading from this file
if(f->is_open())
{
*time=0;step=0;
while(true)
{
*f>>dummy>>dummy;
if(f->fail())break;
*f>>*(time+(++step));
*Pxyz << "32768" << "\n" << *(time+step) << endl;
*f>>dummy>>dummy>>dummy>>dummy;
*f>>dummy;
*f>>dummy>>dummy>>dummy>>dummy>>dummy>>dummy;
*f>>dummy>>dummy;
*f>>dummy>>dummy;
*f>>dummy>>dummy;
*f>>dummy>>dummy>>dummy>>dummy>>dummy>>dummy;
for(int i=0;i<32768;i++)
{
*f>>type>>x>>y>>z;
*Pxyz <<f->tellg() << " " << *(time+step) << " " << i << " " << type << " " << x << " " << y << " " << z << " " <<endl;
}
}
f->close(); //closing this file
}
++ens;
}
return 0;
}
The point at which the problem starts:
tellg time i and other values
1754887 10 32766 2 31.3309 31.9485 31.6061
1754914 10 32767 1 31.6358 31.1965 30.9986
32768
10
-1 10 0 0 31.6358 31.1965 30.9986
-1 10 1 0 31.6358 31.1965 30.9986
.......
I would like to read a wave file, and process them into fft. this is my current working code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sndfile.h>
#include <iostream>
#include <vector>
#include "fftw-3.3.8/api/fftw3.h"
using namespace std;
#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof (x [0])))
#define MAX(x,y) ((x) > (y) ? (x) : (y))
#define MIN(x,y) ((x) < (y) ? (x) : (y))
vector<double> read_audio_vector(const char* filePath){
SNDFILE *infile ;
SF_INFO sfinfo ;
double buffer [8192] = {};
vector<double> output_buffer(8192, 0);
sf_count_t count ;
cout << "Reading from : " << filePath << endl;
memset (&sfinfo, 0, sizeof (sfinfo)) ;
if ((infile = sf_open (filePath, SFM_READ, &sfinfo)) == NULL) {
printf ("Error : Not able to open input file '%s'\n", filePath);
sf_close (infile);
exit (1) ;
}
count = sf_read_double (infile, buffer, ARRAY_LEN (buffer));
for (int j=0; j<8192; ++j){
output_buffer[j] = buffer[j];
}
sf_close (infile) ;
return output_buffer;
}
vector<vector<double> > computeFullFFT_vector(int frameSize, int numFrames, vector<double> buffer ){
vector<double> audioFrame(frameSize,0);
vector<double> magnitudeSpectrum(frameSize/2,0);
vector<vector<double> > Spectrogram(numFrames, vector<double>(frameSize/2));
int startidx;
for (int frameidx=0; frameidx<numFrames; ++frameidx){
// Extract frame from buffer, with a hop of 128
startidx=frameidx*128;
for (int i = 0; i < frameSize; i++){
audioFrame[i] = buffer[startidx+i];
}
// performFFT && Update -> Spectrogram
}
return Spectrogram;
}
int main (int argc, char ** argv) {
// Init
SNDFILE *infile ;
SF_INFO sfinfo ;
int frameSize = 256;
// Read Audio
cout << "\n==== Read Audio ===== \n";
vector<double> x = read_audio_vector(argv[1]);
cout << "--x.size() : " << x.size() << endl;
int i;
i=0; cout << "x[" << i << "] : " << x[i] << endl;
i=7999; cout << "x[" << i << "] : " << x[i] << endl;
i=8000; cout << "x[" << i << "] : " << x[i] << endl;
i=8191; cout << "x[" << i << "] : " << x[i] << endl;
// Process FFT here
int numFrames = (8192-frameSize)/128 + 1;
vector<vector<double> > Spectrogram(numFrames, vector<double>(frameSize/2));
Spectrogram = computeFullFFT_vector(frameSize, numFrames, x);
cout << "Done" << endl;
return 0 ;
}
However, the problem with is that i assumed and pre-allocated 8192 number of samples.
In this case, i have only 1 second at 8kHz, meaning i only have 8000 samples.
Hence you see these values
buffer[0] : 0.176361
buffer[7999] : 0.025177
buffer[8000] : 0
buffer[8191] : 0
As you can see, from index 8000 to 8191, these values are empty. So they are redundant.
Why i set to 8192, is because i want to pre-allocate Spectrogram with numFrames, and to do that i need to know the number of samples.
Problem:
I want to make this code a general-purpose code, that accepts a wave file of any length, (1s, 10sec, 3minutes, etc), so this pre-allocation does not work anymore.
Is there a way to find out the number of samples of the wave file, so i can change from a fixed 8192 to a variable number depending on the length of the wave files?
Alternatively, can i read the wave file in chunks, but with hop length?
Currently this doesn't work because it doesn't read them with hop length.
int num_frames = 0;
while ((count = sf_read_double (infile, buffer, ARRAY_LEN (buffer))) > 0) {
for (int i = 0; i < 256; i++){
buffer[i] *= 0.5;
}
num_frames++;
}
cout << "num_frames=" << num_frames; // this gives 32 frames, instead of the 63 frames that i desire
FYI : i compile with
g++ ./debug_tmp.cpp $(pkg-config --libs --cflags sndfile) ;
./a.out wav/test_1s.wav
To get the total number of samples of your .wav file, you need to look at the structure SF_INFO and especially the members frames and channels. The total number of samples will be the product of these two.
Yes you can read your file by cutting it in chunks. Simply specify the length of your choice and pass it in third argument of sf_readf_double. Note that this argument means frames, and not samples. The return will be the actual amount of frames read (in case of you are at the end of the file, the number of frames actually read will be less that the number you asked.
Here is a code example in C where I show the total number of samples, then I reduce the volume of the audio by cutting in chunks with a voluntary weird number of my choice 147.
#include <stdio.h>
#include "sndfile.h"
#define MONO 1
#define DATA_BLOCK_LENGTH 147 // Frames
int main(void) {
int i;
// Input file
char *inFileName;
SNDFILE *inFile;
SF_INFO inFileInfo;
inFileName = "audioFiles/whiteNoise.wav";
// Get total number of samples
inFile = sf_open(inFileName, SFM_READ, &inFileInfo);
if(inFile == NULL)
printf("Audio file error.\n");
int nFrames = (int)inFileInfo.frames;
// Print results
printf("Total number of frames: %d\n", nFrames);
printf("Number of channels: %d\n", inFileInfo.channels);
printf("Total number of samples: %d", nFrames * inFileInfo.channels);
// Output file
char *outFileName;
SNDFILE *outFile;
SF_INFO outFileInfo;
outFileName = "audioFiles/outWhiteNoise.wav";
outFileInfo.frames = inFileInfo.frames;
outFileInfo.samplerate = inFileInfo.samplerate;
outFileInfo.channels = inFileInfo.channels;
outFileInfo.format = inFileInfo.format;
outFile = sf_open(outFileName, SFM_WRITE, &outFileInfo);
// Process
int inDataBuffer[DATA_BLOCK_LENGTH*MONO];
int outDataBuffer[DATA_BLOCK_LENGTH*MONO];
int nReadFrames;
while(nFrames > 0) {
nReadFrames = sf_readf_int(inFile, inDataBuffer, DATA_BLOCK_LENGTH);
for(i = 0; i < DATA_BLOCK_LENGTH; i++)
outDataBuffer[i] = inDataBuffer[i] / 2;
sf_writef_int(outFile, outDataBuffer, nReadFrames);
nFrames -= nReadFrames;
}
sf_close(inFile); sf_close(outFile);
return 0;
}
I'm writing a code where I need to write a large string into memory.
I used a stringstream object to do so, but something odd to me happens: even if the size of the underlying string buffer has not exceeded the maximum size of a string object my program crashes with a BAD_ACCESS error.
I've created a test program like this:
#include <sstream> // std::stringstream
#include <iostream> // std::cout
int main(int argc, const char * argv[]) {
std::stringstream stream;
std::string string;
std::cout << "Max string size: " << string.max_size() << "\n";
for (int i = 0; true; i++) {
if (i >= 644245094) {
stream.seekg(0, std::ios::end);
std::stringstream::pos_type size = stream.tellg();
stream.seekg(0, std::ios::beg);
std::cout << "Size of stringstream: " << size << "\n";
}
stream << "hello";
}
return 0;
}
That if (i >= 644245094) inside the loop is only used to print the size of the stringstream buffer just before the program crashes. I've used my debugger to see which was the number of the last iteration and used it to print the size of the buffer just before the crash happens.
This is the output I get:
Max string size: 18446744073709551599
Size of stringstream: 3221225470
After this the program crashes.
I thought that the cause might be because the program fills up my computer's RAM but the memory used by this program is ~6.01GB so not enough to fill up my RAM. For the record I have a 16GB RAM Macbook Pro.
What could be the problem? Am I missing something about how << operator works?
Thank you in advance!
The behaviour of a std::stringstream when it gets full and fails may not be consistent across all platforms.
I modified your code and rain it on Yocto 3.19.0-32 64-bit, with gcc 5.4.1. I did not get an exception thrown, rather the stream set one of its failure mode bits.
The code I ran was:
#include <sstream> // std::stringstream
#include <iostream> // std::cout
std::stringstream::pos_type get_size(std::stringstream& stream)
{
stream.seekg(0, std::ios::end);
std::stringstream::pos_type size = stream.tellg();
stream.seekg(0, std::ios::beg);
return size;
}
int main(int argc, const char * argv[])
{
std::stringstream stream;
std::string string;
std::cout << "Max string size: " << string.max_size() << std::endl;
std::stringstream::pos_type size;
for (unsigned long i = 0; true; ++i)
{
size = get_size(stream);
stream.write("x", 1);
if (stream.fail())
{
std::cout << "Fail after " << i + 1 << " insertions" << std::endl;
std::cout << "Size of stringstream just before fail: " << size << std::endl;
break;
}
}
size = get_size(stream);
std::cout << "Size of stringstream just after fail: " << size << std::endl;
return 0;
}
And I got the following output, which shows that my stringstream filled and failed 56 bytes short of 8GB:
Max string size: 4611686018427387897
Fail after 8589934536 insertions
Size of stringstream just before fail: 8589934535
Size of stringstream just after fail: -1
Can you not use a different container and pre-allocate the memory, instead of using such a large stringstream?
I wanted to read an array of double values from a file to an array. I have like 128^3 values. My program worked just fine as long as I stayed at 128^2 values, but now I get an "segmentation fault" error, even though 128^3 ≈ 2,100,000 is by far below the maximum of int. So how many values can you actually put into an array of doubles?
#include <iostream>
#include <fstream>
int LENGTH = 128;
int main(int argc, const char * argv[]) {
// insert code here...
const int arrLength = LENGTH*LENGTH*LENGTH;
std::string filename = "density.dat";
std::cout << "opening file" << std::endl;
std::ifstream infile(filename.c_str());
std::cout << "creating array with length " << arrLength << std::endl;
double* densdata[arrLength];
std::cout << "Array created"<< std::endl;
for(int i=0; i < arrLength; ++i){
double a;
infile >> a;
densdata[i] = &a;
std::cout << "read value: " << a << " at line " << (i+1) << std::endl;
}
return 0;
}
You are allocating the array on the stack, and stack size is limited (by default, stack limit tends to be in single-digit megabytes).
You have several options:
increase the size of the stack (ulimit -s on Unix);
allocate the array on the heap using new;
move to using std::vector.
I have a program that currently generates large arrays and matrices that can be upwards of 10GB in size. The program uses MPI to parallelize workloads, but is limited by the fact that each process needs its own copy of the array or matrix in order to perform its portion of the computation. The memory requirements make this problem unfeasible with a large number of MPI processes and so I have been looking into Boost::Interprocess as a means of sharing data between MPI processes.
So far, I have come up with the following which creates a large vector and parallelizes the summation of its elements:
#include <cstdlib>
#include <ctime>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/tuple/tuple_comparison.hpp>
#include <mpi.h>
typedef boost::interprocess::allocator<double, boost::interprocess::managed_shared_memory::segment_manager> ShmemAllocator;
typedef boost::interprocess::vector<double, ShmemAllocator> MyVector;
const std::size_t vector_size = 1000000000;
const std::string shared_memory_name = "vector_shared_test.cpp";
int main(int argc, char **argv) {
int numprocs, rank;
MPI::Init();
numprocs = MPI::COMM_WORLD.Get_size();
rank = MPI::COMM_WORLD.Get_rank();
if(numprocs >= 2) {
if(rank == 0) {
std::cout << "On process rank " << rank << "." << std::endl;
std::time_t creation_start = std::time(NULL);
boost::interprocess::shared_memory_object::remove(shared_memory_name.c_str());
boost::interprocess::managed_shared_memory segment(boost::interprocess::create_only, shared_memory_name.c_str(), size_t(12000000000));
std::cout << "Size of double: " << sizeof(double) << std::endl;
std::cout << "Allocated shared memory: " << segment.get_size() << std::endl;
const ShmemAllocator alloc_inst(segment.get_segment_manager());
MyVector *myvector = segment.construct<MyVector>("MyVector")(alloc_inst);
std::cout << "myvector max size: " << myvector->max_size() << std::endl;
for(int i = 0; i < vector_size; i++) {
myvector->push_back(double(i));
}
std::cout << "Vector capacity: " << myvector->capacity() << " | Memory Free: " << segment.get_free_memory() << std::endl;
std::cout << "Vector creation successful and took " << std::difftime(std::time(NULL), creation_start) << " seconds." << std::endl;
}
std::flush(std::cout);
MPI::COMM_WORLD.Barrier();
std::time_t summing_start = std::time(NULL);
std::cout << "On process rank " << rank << "." << std::endl;
boost::interprocess::managed_shared_memory segment(boost::interprocess::open_only, shared_memory_name.c_str());
MyVector *myvector = segment.find<MyVector>("MyVector").first;
double result = 0;
for(int i = rank; i < myvector->size(); i = i + numprocs) {
result = result + (*myvector)[i];
}
double total = 0;
MPI::COMM_WORLD.Reduce(&result, &total, 1, MPI::DOUBLE, MPI::SUM, 0);
std::flush(std::cout);
MPI::COMM_WORLD.Barrier();
if(rank == 0) {
std::cout << "On process rank " << rank << "." << std::endl;
std::cout << "Vector summing successful and took " << std::difftime(std::time(NULL), summing_start) << " seconds." << std::endl;
std::cout << "The arithmetic sum of the elements in the vector is " << total << std::endl;
segment.destroy<MyVector>("MyVector");
}
std::flush(std::cout);
MPI::COMM_WORLD.Barrier();
boost::interprocess::shared_memory_object::remove(shared_memory_name.c_str());
}
sleep(300);
MPI::Finalize();
return 0;
}
I noticed that this causes the entire shared object to be mapped into each processes' virtual memory space - which is an issue with our computing cluster as it limits virtual memory to be the same as physical memory. Is there a way to share this data structure without having to map out the entire shared memory space - perhaps in the form of sharing a pointer of some kind? Would trying to access unmapped shared memory even be defined behavior? Unfortunately the operations we are performing on the array means that each process eventually needs to access every element in it (although not concurrently - I suppose its possible to break up the shared array into pieces and trade portions of the array for those you need, but this is not ideal).
Since the data you want to share is so large, it may be more practical to treat the data as a true file, and use file operations to read the data that you want. Then, you do not need to use shared memory to share the file, just let each process read directly from the file system.
ifstream file ("data.dat", ios::in | ios::binary);
file.seekg(someOffset, ios::beg);
file.read(array, sizeof(array));