Is there any memory leak between two threads (Buffering, Emptying) - c++

Consider the following code please, including two threads buffering_thread (fills up buffer pointer with one message) and sending_thread (empties the buffer):
#include "msg.cpp"
msg * buffer= NULL;
byte * send_bytes= NULL;
bool keep_buffering = true;
bool keep_sending = true;
int counter = 0;
void start_buffering() {
while (keep_buffering){
while (buffer != NULL && keep_buffering){}
msg m (counter);
buffer = &m;
counter ++;
}
std::cout << "Finished start_buffering" << std::endl;
}
void sending() {
while (keep_sending){
if (counter > 10){
keep_buffering = false;
break;
}
if (buffer != NULL){
HeaderType messageHeader = buffer -> getHeader();
print(messageHeader);
send_bytes = (byte *) realloc(send_bytes,DATASIZE + HEADER);
memcpy (send_bytes, &messageHeader, HEADER);
memcpy (send_bytes + HEADER,buffer -> getText(), DATASIZE);
// Do something, suppose for now NOTHING
free (buffer -> getText());
buffer = NULL;
}
}
std::cout << "Finished sending" << std::endl;
}
int main () {
std::thread sending_thread(sending);
std::thread buffering_thread(start_buffering);
buffering_thread.join();
keep_sending = false;
sending_thread.join();
//free (buffer);
free (send_bytes);
return 0;
}
where the class msg is as follows:
#include <iostream>
#include <stdlib.h>
#include <cstring>
#include <mutex>
#include <thread>
#define DATASIZE 10
#define HEADER sizeof(HeaderType)
class msg
{
private:
HeaderType header;
byte * text;
public:
msg(int ID);
HeaderType getHeader();
byte * getText();
};
msg::msg(int ID){
header.mID = ID;
text = (byte *)malloc (DATASIZE);
memset (text, '.', DATASIZE);
}
HeaderType msg::getHeader(){
return header;
}
void print(HeaderType header) {
std::cout << "Message ID: " << header.mID << std::endl;
}
byte * msg::getText(){
return text;
}
and this is HeaderType:
typedef struct {
int mID;
}HeaderType;
Valgrind reports:
==3809== 20 bytes in 2 blocks are definitely lost in loss record 1 of 1
==3809== at 0x4028876: malloc (vg_replace_malloc.c:236)
==3809== by 0x80492BD: msg::msg(int) (in /home/linux/LCR-write/src/test)
==3809== by 0x8049384: start_buffering() (in /home/linux/LCR-write/src/test)
Indeed, I guess there must not be any memory leak inside this code. The buffer should always be emptied by sending_thread. Would somebody point out the fault please.

The problem is in the start_buffering() function.
while (keep_buffering){
msg m (counter);
while (buffer != NULL && keep_buffering){}
buffer = &m;
counter ++;
}
This loop creates a msg and allocates the memory for text in it's constructor. Then it waits, and then it will do:
buffer = &m;
counter ++;
// end of loop
msg m (counter);
When the loop reaches the end, it starts the next iteration without waiting for anything. The 'old' msg m falls out of scope, and a new msg m is allocated at the exact same place on the stack than the old one. So the pointer buffer is still 'valid' (not really!), pointing to the new m. The result is a memory leak for the first msg m, and, more importantly, undefined behaviour. You may not use things that are not in scope anymore.
Additionally, I would recommend to use 'official' synchronisation and wait mechanisms.

you need a destructor for msg that frees the memory allocated for text in the constructor
class msg
{
private:
HeaderType header;
byte * text;
public:
msg(int ID);
HeaderType getHeader();
byte * getText();
~msg() {free(text);}
};

Related

asynchronous io is working but never ends in linux

In a previous question, I asked how to implement asynchronous I/O. This code now works, except that at the end it never stops. It seems that aio_read reads starting at offset, for length, and if it is past the end of the file, the operation succeeds? This code builds and runs on Ubuntu 20.04LTS and successfully reads blocks 1-5, each 512 bytes, then when it runs out of file it keeps oscillating between block 4 and 5. It never terminates.
Here is the code:
#include <aio.h>
#include <fcntl.h>
#include <signal.h>
#include <unistd.h>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 512;
mutex readMutex;
bool readReady = false;
condition_variable cv;
bool operation_completed = false;
int fh;
int bytesRead;
void process(char* buf, uint32_t bytesRead) {
cout << "processing..." << endl;
usleep(100000);
}
void aio_completion_handler(sigval_t sigval) {
struct aiocb* req = (struct aiocb*)sigval.sival_ptr;
// check whether asynch operation is complete
int status;
if ((status = aio_error(req)) != 0) {
cout << "Error: " << status << '\n';
return;
}
int ret = aio_return(req);
bytesRead = req->aio_nbytes;
cout << "ret == " << ret << endl;
cout << (char*)req->aio_buf << endl;
unique_lock<mutex> readLock(readMutex);
operation_completed = true;
cv.notify_one();
}
void thready() {
char* buf1 = new char[blockSize];
char* buf2 = new char[blockSize];
aiocb cb;
char* processbuf = buf1;
char* readbuf = buf2;
fh = open("smallfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
memset(&cb, 0, sizeof(aiocb));
cb.aio_fildes = fh;
cb.aio_nbytes = blockSize;
cb.aio_offset = 0;
// Fill in callback information
/*
Using SIGEV_THREAD to request a thread callback function as a notification
method
*/
cb.aio_sigevent.sigev_notify_attributes = nullptr;
cb.aio_sigevent.sigev_notify = SIGEV_THREAD;
cb.aio_sigevent.sigev_notify_function = aio_completion_handler;
/*
The context to be transmitted is loaded into the handler (in this case, a
reference to the aiocb request itself). In this handler, we simply refer to
the arrived sigval pointer and use the AIO function to verify that the request
has been completed.
*/
cb.aio_sigevent.sigev_value.sival_ptr = &cb;
int cursor = 0;
int currentBytesRead = read(fh, buf1, blockSize); // read the 1st block
while (true) {
cb.aio_buf = readbuf;
operation_completed = false; // set predicate to true and wait until asynch changes it
cb.aio_offset = cursor;
aio_read(&cb); // each next block is read asynchronously
process(processbuf, currentBytesRead); // process while waiting
{
unique_lock<mutex> readLock(readMutex);
cv.wait( readLock, []{ return operation_completed; } );
}
if (!operation_completed)
break;
currentBytesRead = bytesRead; // make local copy of global modified by the asynch code
cursor += bytesRead;
if (currentBytesRead < blockSize) {
break; // last time, get out
}
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead = bytesRead; // create local copy
}
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
} catch (std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
First, is the above code an appropriate way to do this to get the length of the file and figure out exactly how many reads to do?
Second, if this is so, fine, but how can aio_read just return success if I try to read past the end of file? Error status is always zero. I am confused about what it is supposed to do.
with 512 bytes of each of 1,2,3,4,5

Using mmap memory for a circular buffer with very low overhead

I have a debugging tool which in order to register its acquired data uses a data structure called DiskPool (code follows). At start, this data structure mmaps a certain amount of data (backed by a file on disk). Clients can allocate memory via a simple bump pointer mechanism (implemented using std::atomic<size_t>.
As the volume of acquired data is massive I have decided to have a window over a time period instead of registering and keeping all the data. To fulfil such a purpose I have to change the disk pool into a circular buffer but this should not impose a considerable overhead as this overhead affects the measurement.
I wanted to ask you if anybody has any idea? (For example, using an atomic interface of STL).
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <atomic>
#include <memory>
#include <signal.h>
#include <chrono>
#include <thread>
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
class DiskPool {
char* addr_; // Initialized by mmap()
size_t len_; // Given by the user as many as memory pages as needed
std::atomic<size_t> top_; // Offset from address_
int fd_;
public:
DiskPool(size_t l, const char* file) : len_(l), top_(0),fd_(-1)
{
struct stat st;
fd_= open(file, O_CREAT|O_RDWR, S_IREAD | S_IWRITE);
if (fd_ == -1)
handle_error("open");
if (ftruncate(fd_, len_* sysconf(_SC_PAGE_SIZE)) != 0)
handle_error("ftruncate() error");
else {
fstat(fd_, &st);
printf("the file has %ld bytes\n", (long) st.st_size);
}
addr_ = static_cast<char*>( mmap(NULL, (len_* sysconf(_SC_PAGE_SIZE)),
PROT_READ | PROT_WRITE, MAP_SHARED|MAP_NORESERVE, fd_,0));
if (addr_ == MAP_FAILED)
handle_error("mmap failed.");
}
~DiskPool()
{
close(fd_);
if( munmap(addr_, len_)< 0) {
handle_error("Could not unmap file");
exit(1);}
std::cout << "Successfully unmapped the file. " << std::endl;
}
void* allocate(size_t s)
{
size_t t = std::atomic_fetch_add(&top_, s);
return addr_+t;
}
void flush() {madvise(addr_, len_, MADV_DONTNEED);}
};
As an example, I created sample code that uses this disk pool to record data at the creation and destruction of an object (AutomaticLifetimeCollector).
static const std::string RECORD_FILE = "Data.txt";
static const size_t DISK_POOL_NUMBER_OF_PAGES = 10000;
static std::shared_ptr<DiskPool> diskPool =
std::shared_ptr <DiskPool> (new DiskPool(DISK_POOL_NUMBER_OF_PAGES,RECORD_FILE.c_str()));
struct TaskRecord
{
uint64_t tid; // Thread id
uint64_t tag; // User-given identifier (“f1”)
uint64_t start_time; // nanoseconds
uint64_t stop_time;
uint64_t cpu_time;
TaskRecord(int depth, size_t tag, uint64_t start_time) :
tid(pthread_self()), tag(tag),
start_time(start_time), stop_time(0), cpu_time(0) {}
};
class AutomaticLifetimeCollector
{
TaskRecord* record_;
public:
AutomaticLifetimeCollector(size_t tag) :
record_(new(diskPool->allocate(sizeof(TaskRecord)))
TaskRecord(2, tag, (uint64_t)1000000004L))
{
}
~AutomaticLifetimeCollector() {
record_->stop_time = (uint64_t)1000000000L;
record_->cpu_time = (uint64_t)1000000002L;
}
};
inline void DelayMilSec(unsigned int pduration)
{
std::this_thread::sleep_until(std::chrono::system_clock::now() +
std::chrono::milliseconds(pduration));
}
std::atomic<bool> LoopsRunFlag {true};
void sigIntHappened(int signal)
{
std::cout<< "Application was terminated.";
LoopsRunFlag.store(false, std::memory_order_release);
}
int main()
{
signal(SIGINT, sigIntHappened);
unsigned int i = 0;
while(LoopsRunFlag)
{
AutomaticLifetimeCollector alc(i++);
DelayMilSec(2);
}
diskPool->flush();
return(0);
}
So accounting only for the handing out of variable-sized slices for a variable buffer, I believe a Compare-And-Swap loop should work.
The basic idea here is to read a value (which is atomic), do some computation with it, then write the value, if it did not change since reading. If it did change (another thread/process), the computation must be redone with the new value.
Since you have variable sized objects, I think actually simply slicing it into n array elements with (i + 1) % n won't work, as given (i + item_len) % capacity, it would split the allocation between the end and start of the buffer, and while that can be correct and working, I think maybe not what you wanted. So that means a condition, but I think the CPU should predict it pretty well.
#include <iostream>
#include <atomic>
std::atomic<size_t> next_index = 0;
const size_t len = 100; // small for demo purpose
size_t alloc(size_t required_size)
{
if (required_size > len) std::terminate(); // do something, would cause a buffer overflow
size_t i, ret_index, new_index;
i = next_index.load();
do
{
auto space = len - i;
ret_index = required_size <= space ? i : 0; // Wrap if needed
new_index = ret_index + required_size;
} while (next_index.compare_exchange_weak(i, new_index)); // succeed if value did of i not change
return ret_index;
}
int main()
{
std::cout << alloc(4) << std::endl; // 0 - 3
std::cout << alloc(8) << std::endl; // 4 - 11
std::cout << alloc(32) << std::endl; // 12 - 43
std::cout << alloc(32) << std::endl; // 44 - 75
std::cout << alloc(32) << std::endl; // 0 - 31 (76 - 107 would overflow)
std::cout << alloc(32) << std::endl; // 32 - 63
std::cout << alloc(32) << std::endl; // 64 - 95
std::cout << alloc(32) << std::endl; // 0 - 31 (96 - 127 would overflow)
}
Which should be fairly simple to plug in to your class:
void* allocate(size_t s)
{
if (s > len_ * sysconf(_SC_PAGE_SIZE)) std::terminate(); // do something, would cause a buffer overflow
size_t i, ret_index, new_index;
i = top_.load();
do
{
auto space = len_ * sysconf(_SC_PAGE_SIZE) - i;
ret_index = s <= space ? i : 0; // Wrap if needed
new_index = ret_index + s;
} while (top_.compare_exchange_weak(i, new_index)); // succeed if value did of i not change
return addr_+ ret_index;
}
len_ * sysconf(_SC_PAGE_SIZE) is in a few places, so might be the more useful value to store in len_ itself.

Can't read complete string when writing into IPC SHM in c++

I'm attempting to build a simple interface to use shm ipc in c++. For that, I've written the following code:
sharedmem.h:
#pragma once
#include <iostream>
#include <sstream>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <stdio.h>
using namespace std;
namespace IPC
{
void Send(const string filename, std::string msg,int size=1024 )
{
key_t key = ftok(filename.c_str(),65);
std::stringstream ss;
ss << msg.c_str();
int shmid = shmget(key,size,0666|IPC_CREAT);
char *str = (char*) shmat(shmid,(void*)0,0);
ss >> str;
shmdt(str);
}
string Receive(const string filename, int size=1024 )
{
key_t key = ftok(filename.c_str(),65);
int shmid = shmget(key,size,0666|IPC_CREAT);
char *str = (char*) shmat(shmid,(void*)0,0);
string ret(str);
shmdt(str);
shmctl(shmid,IPC_RMID,NULL);
return ret;
}
};
Outside, I use it like:
sender.cpp
#include "sharedmem.h"
int main()
{
IPC::Send("fila1", "hello ipc");
return 0;
}
receiver.cpp
#include "sharedmem.h"
int main()
{
std::string ret = IPC::Receive("fila1");
cout << "Recebi na fila: " << ret;
return 0;
}
CMakeLists.txt:
set (CMAKE_CXX_STANDARD 17)
add_executable(sender sender.cpp)
add_executable(receiver receiver.cpp)
and built with cmake . && make
In this example I write "hello ipc" but the other process reads only "hello". What could be wrong here? Thanks in advance.
In your send function:
void Send(const string filename, std::string msg,int size=1024 )
{
key_t key = ftok(filename.c_str(),65);
std::stringstream ss;
ss << msg.c_str();
int shmid = shmget(key,size,0666|IPC_CREAT); // this call could fail, what happens next is
// a likely a segmentation error.
// ... or worse.
char *str = (char*) shmat(shmid,(void*)0,0);
ss >> str; // <-- error is here. You extract from ss until the first whitespace character.
// what happens if input string is larger than the size of the allocated block?
shmdt(str);
}
The stringstream ss has no functional use in your function, except for adding a bug. I suggest you try this instead:
int Send(const string& filename, const std::string& msg) noexcept // if you have no return value,
// you should throw on error,
// let's avoid that
{
key_t key = ftok(filename.c_str(), 65); // you should maybe consider using a named constant
// for your project ID
if (key == -1)
return errno;
int shmid = shmget(key, msg.length() + 1, 0666 | IPC_CREAT); // allocate enough memory for the
// message, plus its NULL terminator
if (shmid == -1)
return errno;
void *shared_mem = shmat(shmid, nullptr, 0);
if (shared_mem == (void*)-1)
{
// the systeml failed to lock the allocated memory.
// do some cleanup by de-allocating the shared memory block.
int ret = errno; // keep original error for return.
shmctl(shmid , IPC_RMID, nullptr);
return ret;
}
// copy message string with its NULL terminator to shared memory
memcpy(shared_mem, msg.c_str(), msg.length() + 1); // using length() + 1 is ok here, result of
// c_str() always has a NULL terminator.
shmdt(shared_mem);
return 0;
}
Your receive function also lacks in error checking. That should be very similar to the Send() function.
Note that the strings are passed by const reference, that's to avoid copying them (and the potential errors associated with those unneeded memory allocations)

Why does Windows task manager show memory increases while writing very large files? Should I be worried? [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I am developing a c++ application (in VS2012, Windows Server 2012 R2) that writes large volumes of binary data, from cyclical arrays of buffers that have been allocated, to raw files. The thing is that system RAM usage as reported by Windows Task Manager increases in a linear rate as fwrite writes the data in the files until it reaches a certain point where it remains almost constant (also see the following image). Also, the memory used by my application remains constant the whole time.
I call fflush periodically and it has no effect. Although it seems to be a harmless case, I am concerned about this issue in terms of performance, as another Java application will also be running in a nominal operation.
Therefore, I would like to ask if I should worry about this and if there is a way to avoid this issue towards achieving the best performance for a real-time data recording system.
Similar questions have been asked here and here for linux operating systems and it has been said that the system can devote an amount of memory for caching the data, as long as there is enough memory available.
A part of the application is presented next. In short, the application controls a pair of cameras and each of them acquires frames and store them in properly aligned allocated buffers. There are i) a CameraInterface class, which creates two "producer" threads, ii) a Recorder class, which creates two "consumer" threads and iii) a SharedMemoryManager class that provides a producer with an available buffer for storing data and a consumer with the next buffer to be written to the file. The SharedMemoryManager holds two arrays of buffers (one for each pair of producer-consumer) and two respective arrays of flags that indicate the status of the buffer. It also holds two std::queue objects for quick accessing of the next buffers to be recorder. Parts of the Recorder and the SharedMemoryManager are shown next.
// somewhere in file "atcore.h"...
typedef unsigned char AT_U8;
// File: SharedMemoryManager.h
#ifndef __MEM_MANAGER__
#define __MEM_MANAGER__
#pragma once
#include "atcore.h"
#include <queue>
#include <mutex>
#define NBUFFERS 128
#define BUFFER_AVAILABLE 0
#define BUFFER_QUEUED 1
#define BUFFER_FULL 2
#define BUFFER_RECORDING_PENDING 3
// the status flag cycle is
// EMPTY -> QUEUED -> FULL -> RECORDING_PENDING -> EMPTY
using namespace std;
typedef struct{
AT_U8** buffers;
int* flags;
int acquiredCounter;
int consumedCounter;
int queuedCounter;
mutex flagMtx;
} sharedMemory;
typedef struct{
AT_U8* buffer;
int bufSize;
int index;
} record;
class SharedMemoryManager
{
public:
SharedMemoryManager();
~SharedMemoryManager(void);
void enableRecording();
void disableRecording();
int setupMemory(int cameraIdentifier, int bufferSize);
void freeMemory();
void freeCameraMemory(int cameraIdentifier);
int getBufferSize(int cameraIdentifier);
AT_U8* getBufferForCameraQueue(int cameraIdentifier); // get pointer to the next available buffer for queueing in the camera
int hasFramesForRecording(int cameraIdentifier); // ask how many frames for recording are there in the respective queue
AT_U8* getNextFrameForRecording(int cameraIdentifier); // get pointer to the next buffer to be recorded to a file
void copyMostRecentFrame(unsigned char* buffer, int cameraIdentifier); // TODO // get a copy of the most recent frame on the buffer
void notifyAcquiredFrame(AT_U8* buffer, int bufSize, int cameraIdentifier); // use this function to notify the manager that the buffer has just been filled with data
void notifyRecordedFrame(AT_U8* buffer, int cameraIdentifier); // use this function to notify the manager that the buffer has just been written to file and can be used again
private:
bool useMem0, useMem1;
int bufSize0, bufSize1;
sharedMemory* memory0;
sharedMemory* memory1;
queue<record*> framesForRecording0;
queue<record*> framesForRecording1;
bool isRecording;
int allocateBuffers(sharedMemory* mem, int bufSize);
void freeBufferArray(sharedMemory* mem);
};
#endif // !__MEM_MANAGER
// File: SharedMemoryManager.cpp
...
int SharedMemoryManager::hasFramesForRecording(int cameraIdentifier){
if (cameraIdentifier!=0 && cameraIdentifier!=1){
cout << "Could not get the number of frames in the shared memory. Invalid camera id " << cameraIdentifier << endl;
return -1;
}
if (cameraIdentifier==0){
return (int)framesForRecording0.size();
}
else{
return (int)framesForRecording1.size();
}
}
AT_U8* SharedMemoryManager::getNextFrameForRecording(int cameraIdentifier){
if (cameraIdentifier!=0 && cameraIdentifier!=1){
cout << "Error in getNextFrameForRecording. Invalid camera id " << cameraIdentifier << endl;
return NULL;
}
sharedMemory* mem;
if (cameraIdentifier==0) mem=memory0;
else mem=memory1;
queue<record*>* framesQueuePtr;
if (cameraIdentifier==0) framesQueuePtr = &framesForRecording0;
else framesQueuePtr = &framesForRecording1;
if (framesQueuePtr->empty()){ // no frames to be recorded at the moment
return NULL;
}
record* item;
int idx;
AT_U8* buffer = NULL;
item = framesQueuePtr->front();
framesQueuePtr->pop();
idx = item->index;
delete item;
mem->flagMtx.lock();
if (mem->flags[idx] == BUFFER_FULL){
mem->flags[idx] = BUFFER_RECORDING_PENDING;
buffer = mem->buffers[idx];
}
else{
cout << "PROBLEM. Buffer in getBufferForRecording. Buffer flag is " << mem->flags[idx] << endl;
cout << "----- BUFFER FLAGS -----" << endl;
for (int i=0; i<NBUFFERS; i++){
cout << "[" << i << "] " << mem->flags[i] << endl;
}
cout << "----- -----" << endl;
}
mem->flagMtx.unlock();
return buffer;
}
int SharedMemoryManager::allocateBuffers(sharedMemory* mem, int bufSize){
// allocate the array for the buffers
mem->buffers = (AT_U8**)calloc(NBUFFERS,sizeof(AT_U8*));
if (mem->buffers==NULL){
cout << "Could not allocate array of buffers." << endl;
return -1;
}
// allocate the array for the respective flags
mem->flags = (int*)malloc(NBUFFERS*sizeof(int));
if (mem->flags==NULL){
cout << "Could not allocate array of flags for the buffers." << endl;
free(mem->buffers);
return -1;
}
int i;
for (i=0; i<NBUFFERS; i++){ // allocate the buffers
mem->buffers[i] = (AT_U8*)_aligned_malloc((size_t)bufSize,8);
if (mem->buffers[i] == NULL){
cout << "Could not allocate memory for buffer no. " << i << endl;
for (int j=0; j<i; j++){ // free the previously allocated buffers
_aligned_free(mem->buffers[j]);
}
free(mem->buffers);
free(mem->flags);
return -1;
}
else{
mem->flags[i]=BUFFER_AVAILABLE;
}
}
return 0;
}
void SharedMemoryManager::freeBufferArray(sharedMemory* mem){
if (mem!=NULL){
for(int i=0; i<NBUFFERS; i++){
_aligned_free(mem->buffers[i]);
mem->buffers[i]=NULL;
}
free(mem->buffers);
mem->buffers = NULL;
free(mem->flags);
mem->flags = NULL;
free(mem);
mem = NULL;
}
}
// File: Recorder.h
#ifndef __RECORDER__
#define __RECORDER__
#pragma once
#include <string>
#include <queue>
#include <future>
#include <thread>
#include "atcore.h"
#include "SharedMemoryManager.h"
using namespace std;
class Recorder
{
public:
Recorder(SharedMemoryManager* memoryManager);
~Recorder();
void recordBuffer(AT_U8 *buffer, int bufsize);
int setupRecording(string filename0, string filename1, bool open0, bool open1);
void startRecording();
void stopRecording();
int testWriteSpeed(string directoryPath, string filename);
void insertFrameItem(AT_U8* buffer, int bufSize, int chunkID);
private:
FILE *chunk0, *chunk1;
string chunkFilename0, chunkFilename1;
int frameCounter0, frameCounter1;
bool writes0, writes1;
int bufSize0, bufSize1;
static SharedMemoryManager* manager;
bool isRecording;
promise<int> prom0;
promise<int> prom1;
thread* recordingThread0;
thread* recordingThread1;
static void performRecording(promise<int>* exitCode, int chunkIdentifier);
void writeNextItem(int chunkIdentifier);
void closeFiles();
};
#endif //!__RECORDER__
// File: Recorder.cpp
#include "Recorder.h"
#include <ctime>
#include <iostream>
using namespace std;
Recorder* recorderInstance; // keep a pointer to the current instance, for accessing static functions from (non-static) objects in the threads
SharedMemoryManager* Recorder::manager; // the same reason
...
void Recorder::startRecording(){
if (isRecording == false){ // do not start new threads if some are still running
isRecording = true;
if (writes0==true) recordingThread0 = new thread(&Recorder::performRecording, &prom0, 0);
if (writes1==true) recordingThread1 = new thread(&Recorder::performRecording, &prom1, 1);
}
}
void Recorder::writeNextItem(int chunkIdentifier){
FILE* chunk;
AT_U8* buffer;
int* bufSize;
if (chunkIdentifier==0){
chunk = chunk0;
bufSize = &bufSize0;
buffer = manager->getNextFrameForRecording(0);
}
else {
chunk = chunk1;
bufSize = &bufSize1;
buffer = manager->getNextFrameForRecording(1);
}
size_t nbytes = fwrite(buffer, 1, (*bufSize)*sizeof(unsigned char), chunk);
if (nbytes<=0){
cout << "No data were written to file." << endl;
}
manager->notifyRecordedFrame(buffer,chunkIdentifier);
if (chunkIdentifier==0) frameCounter0++;
else frameCounter1++;
}
void Recorder::performRecording(promise<int>* exitCode, int chunkIdentifier){
bool flag = true;
int remaining = manager->hasFramesForRecording(chunkIdentifier);
while( recorderInstance->isRecording==true || remaining>0 ){
if (remaining>0){
if (recorderInstance->isRecording==false){
cout << "Acquisition stopped, still " << remaining << " frames are to be recorded in chunk " << chunkIdentifier << endl;
}
recorderInstance->writeNextItem(chunkIdentifier);
}
else{
this_thread::sleep_for(chrono::milliseconds(10));
}
remaining = manager->hasFramesForRecording(chunkIdentifier);
}
cout << "Done recording." << endl;
}
In the Windows memory use screen shot you show, the biggest chunk (45GB) is "cached" of which 27GB is "modified", meaning "dirty pages waiting to be written to disk". This is normal behavior because you are writing faster than the disk I/O can keep up. flush/fflush has no effect on this because it is not in your process. As you note: "the memory used by my application remains constant the whole time". Do not be concerned. However, if you really don't want the OS to buffer dirty output pages, consider using "unbuffered I/O" available on Windows, as it will write through immediately to disk.
Edit: Some links to unbuffered I/O on Windows. Note that unbuffered I/O places memory-alignment constraints on your reads and writes.
File Buffering
CreateFile function

Why does this code leak when the memory allocation is cross thread?

First off, this is running on OSX, compiled in xcode. I have tried both clang and g++, both show the same symptoms. Basically, there are two lines in this sample, one allocates and frees a buffer in the worker thread, and the other allocates the buffer in the main thread, and frees it in the worker thread. The in-thread alloc works fine, but the cross thread alloc causes the total memory used by the process (viewed using top) to increase by 4k bytes every time the buffer is retrieved on the worker thread. What gives here? The heap should be global to all threads in the process and no marshaling should be occuring here, so why is the process memory increasing?
Valgrind reports no leaks in both cases, and reports the same number of allocations and frees in both cases. However, the process memory is clearly increasing in one versus the other.
The sample code is stripped down to show the basics:
#include <iostream>
#include <stdio.h>
#include <list>
#include <assert.h>
#include <unistd.h>
class CNetworkBuffer
{
public:
uint32_t MaxSize;
uint32_t Size;
uint8_t *Data;
CNetworkBuffer()
{
printf("Creating new network buffer: %d %d\n", 0, 0);
Size = 0;
MaxSize = 0;
Data = NULL;
}
CNetworkBuffer(uint32_t size, uint32_t maxSize)
{
printf("Creating new network buffer: %d %d\n", size, maxSize);
MaxSize = maxSize;
Size = size;
Data = new uint8_t[MaxSize];
}
~CNetworkBuffer()
{
printf("Deleting network buffer of size %d (%d) %p\n", Size, MaxSize, Data);
if(Data != NULL)
{
delete[] Data;
Data = NULL;
}
}
};
class CNetworkBuffers
{
public:
//
// Public constructor
//
CNetworkBuffers()
{
}
//
// Public destructor
//
~CNetworkBuffers()
{
while(!m_buffers.empty())
{
CNetworkBuffer *p = m_buffers.front();
m_buffers.pop_front();
delete p;
}
}
//
// Initializes the total number of buffers in this cache and the size of
// each buffer
//
void Initialize(uint32_t numBuffers, uint32_t bufferSize)
{
for(int i = 0; i < numBuffers; ++i)
{
m_buffers.push_back(new CNetworkBuffer(0, bufferSize));
}
}
//
// Retrieves a new buffer from the list of free buffers. This call blocks if there
// are no free buffers.
//
std::shared_ptr<CNetworkBuffer> GetBuffer()
{
m_cs.lock();
if(m_buffers.size() > 0)
{
CNetworkBuffer *pBuffer = m_buffers.front();
m_buffers.pop_front();
printf("Retrieving buffer: %p\n", pBuffer);
m_cs.Unlock();
return std::shared_ptr<CNetworkBuffer>(pBuffer);
}
else
{
assert(false);
}
m_cs.unlock();
}
private:
//
// The list of free buffers in this class.
//
std::list<CNetworkBuffer*> m_buffers;
std::mutex m_cs;
};
static void *
ThreadFunc(void *p)
{
CNetworkBuffers *buffers = (CNetworkBuffers *)p;
for(int i = 0; i < 64; ++i)
{
//
// NOTE: This type of allocation works fine
//
std::shared_ptr<CNetworkBuffer> buffer(new CNetworkBuffer(0, 1024));
//
// NOTE: Comment out the previous line and uncomment out this one,
// and watch as the process memory grows.
//
//std::shared_ptr<CNetworkBuffer> buffer = buffers->GetBuffer();
printf("Retrieved buffer of size %d (%d) - %d\n", buffer->Size, buffer->MaxSize, i);
sprintf((char *)buffer->Data, "%s", "DEADBEEF");
sleep(1);
}
return NULL;
}
int main(int argc, const char * argv[]) {
printf("Allocating memory for buffers\n");
CNetworkBuffers buffers;
buffers.Initialize(64, 1024);
// Now create another thread to read use those buffers
pthread_t t;
pthread_create(&t, NULL, ThreadFunc, &buffers);
pthread_join(t, NULL);
return 0;
}