In trying to create an asynchronous I/O file reader in C++ under Linux. The example I have has two buffers. The first read blocks. Then, for each time around the main loop, I asynchronously launch the IO and call process() which runs the simulated processing of the current block. When processing is done, we wait for the condition variable. The idea is that the asynchronous handler should notify the condition variable.
Unfortunately the notify seems to happen before wait, and it seems like this is not the way the condition variable wait() function works. How should I rewrite the code so that the loop waits until the asynchronous io has completed?
#include <aio.h>
#include <fcntl.h>
#include <signal.h>
#include <unistd.h>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 512;
mutex readMutex;
condition_variable cv;
int fh;
int bytesRead;
void process(char* buf, uint32_t bytesRead) {
cout << "processing..." << endl;
usleep(100000);
}
void aio_completion_handler(sigval_t sigval) {
struct aiocb* req = (struct aiocb*)sigval.sival_ptr;
// check whether asynch operation is complete
if (aio_error(req) == 0) {
int ret = aio_return(req);
bytesRead = req->aio_nbytes;
cout << "ret == " << ret << endl;
cout << (char*)req->aio_buf << endl;
}
{
unique_lock<mutex> readLock(readMutex);
cv.notify_one();
}
}
void thready() {
char* buf1 = new char[blockSize];
char* buf2 = new char[blockSize];
aiocb cb;
char* processbuf = buf1;
char* readbuf = buf2;
fh = open("smallfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
memset(&cb, 0, sizeof(aiocb));
cb.aio_fildes = fh;
cb.aio_nbytes = blockSize;
cb.aio_offset = 0;
// Fill in callback information
/*
Using SIGEV_THREAD to request a thread callback function as a notification
method
*/
cb.aio_sigevent.sigev_notify_attributes = nullptr;
cb.aio_sigevent.sigev_notify = SIGEV_THREAD;
cb.aio_sigevent.sigev_notify_function = aio_completion_handler;
/*
The context to be transmitted is loaded into the handler (in this case, a
reference to the aiocb request itself). In this handler, we simply refer to
the arrived sigval pointer and use the AIO function to verify that the request
has been completed.
*/
cb.aio_sigevent.sigev_value.sival_ptr = &cb;
int currentBytesRead = read(fh, buf1, blockSize); // read the 1st block
while (true) {
cb.aio_buf = readbuf;
aio_read(&cb); // each next block is read asynchronously
process(processbuf, currentBytesRead); // process while waiting
{
unique_lock<mutex> readLock(readMutex);
cv.wait(readLock);
}
currentBytesRead = bytesRead; // make local copy of global modified by the asynch code
if (currentBytesRead < blockSize) {
break; // last time, get out
}
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead = bytesRead; // create local copy
}
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
} catch (std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
A condition varible should generally be used for
waiting until it is possible that the predicate (for example a shared variable) has changed, and
notifying waiting threads that the predicate may have changed, so that waiting threads should check the predicate again.
However, you seem to be attempting to use the state of the condition variable itself as the predicate. This is not how condition variables are supposed to be used and may lead to race conditions such as those described in your question. Another reason to always check the predicate is that spurious wakeups are possible with condition variables.
In your case, it would probably be appropriate to create a shared variable
bool operation_completed = false;
and use that variable as the predicate for the condition variable. Access to that variable should always be controlled by the mutex.
You can then change the lines
{
unique_lock<mutex> readLock(readMutex);
cv.notify_one();
}
to
{
unique_lock<mutex> readLock(readMutex);
operation_completed = true;
cv.notify_one();
}
and change the lines
{
unique_lock<mutex> readLock(readMutex);
cv.wait(readLock);
}
to:
{
unique_lock<mutex> readLock(readMutex);
while ( !operation_completed )
cv.wait(readLock);
}
Instead of
while ( !operation_completed )
cv.wait(readLock);
you can also write
cv.wait( readLock, []{ return operation_completed; } );
which is equivalent. See the documentation of std::condition_varible::wait for further information.
Of course, operation_completed should also be set back to false when appropriate, while the mutex is locked.
Related
In a previous question, I asked how to implement asynchronous I/O. This code now works, except that at the end it never stops. It seems that aio_read reads starting at offset, for length, and if it is past the end of the file, the operation succeeds? This code builds and runs on Ubuntu 20.04LTS and successfully reads blocks 1-5, each 512 bytes, then when it runs out of file it keeps oscillating between block 4 and 5. It never terminates.
Here is the code:
#include <aio.h>
#include <fcntl.h>
#include <signal.h>
#include <unistd.h>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 512;
mutex readMutex;
bool readReady = false;
condition_variable cv;
bool operation_completed = false;
int fh;
int bytesRead;
void process(char* buf, uint32_t bytesRead) {
cout << "processing..." << endl;
usleep(100000);
}
void aio_completion_handler(sigval_t sigval) {
struct aiocb* req = (struct aiocb*)sigval.sival_ptr;
// check whether asynch operation is complete
int status;
if ((status = aio_error(req)) != 0) {
cout << "Error: " << status << '\n';
return;
}
int ret = aio_return(req);
bytesRead = req->aio_nbytes;
cout << "ret == " << ret << endl;
cout << (char*)req->aio_buf << endl;
unique_lock<mutex> readLock(readMutex);
operation_completed = true;
cv.notify_one();
}
void thready() {
char* buf1 = new char[blockSize];
char* buf2 = new char[blockSize];
aiocb cb;
char* processbuf = buf1;
char* readbuf = buf2;
fh = open("smallfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
memset(&cb, 0, sizeof(aiocb));
cb.aio_fildes = fh;
cb.aio_nbytes = blockSize;
cb.aio_offset = 0;
// Fill in callback information
/*
Using SIGEV_THREAD to request a thread callback function as a notification
method
*/
cb.aio_sigevent.sigev_notify_attributes = nullptr;
cb.aio_sigevent.sigev_notify = SIGEV_THREAD;
cb.aio_sigevent.sigev_notify_function = aio_completion_handler;
/*
The context to be transmitted is loaded into the handler (in this case, a
reference to the aiocb request itself). In this handler, we simply refer to
the arrived sigval pointer and use the AIO function to verify that the request
has been completed.
*/
cb.aio_sigevent.sigev_value.sival_ptr = &cb;
int cursor = 0;
int currentBytesRead = read(fh, buf1, blockSize); // read the 1st block
while (true) {
cb.aio_buf = readbuf;
operation_completed = false; // set predicate to true and wait until asynch changes it
cb.aio_offset = cursor;
aio_read(&cb); // each next block is read asynchronously
process(processbuf, currentBytesRead); // process while waiting
{
unique_lock<mutex> readLock(readMutex);
cv.wait( readLock, []{ return operation_completed; } );
}
if (!operation_completed)
break;
currentBytesRead = bytesRead; // make local copy of global modified by the asynch code
cursor += bytesRead;
if (currentBytesRead < blockSize) {
break; // last time, get out
}
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead = bytesRead; // create local copy
}
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
} catch (std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
First, is the above code an appropriate way to do this to get the length of the file and figure out exactly how many reads to do?
Second, if this is so, fine, but how can aio_read just return success if I try to read past the end of file? Error status is always zero. I am confused about what it is supposed to do.
with 512 bytes of each of 1,2,3,4,5
In a previous questionTrying to write asynchronous I/O in C++ using locks and condition variables. This code calls terminate on the first lock() why?
,
we tried to use two mutexes to have asynchronous code that reads one block of a file into memory, then asynchronously tries to read the next block while processing the current one. Someone made a comment that using read was not the best way to do that. This is an attempt to use POSIX aio_read, but we are trying to wait on a condition_variable and do a notify on the condition variable in the callback after the I/O completes, and it's not working -- in the debugger we can see it blows right past the wait.
#include <aio.h>
#include <fcntl.h>
#include <signal.h>
#include <unistd.h>
#include <condition_variable>
#include <cstring>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 512;
mutex readMutex;
mutex procMutex;
condition_variable cv;
int fh;
int bytesRead;
void process(char* buf, uint32_t bytesRead) {
cout << "processing..." << endl;
usleep(100000);
}
void aio_completion_handler(sigval_t sigval) {
struct aiocb* req = (struct aiocb*)sigval.sival_ptr;
// check whether asynch operation is complete
if (aio_error(req) == 0) {
int ret = aio_return(req);
cout << "ret == " << ret << endl;
cout << (char*)req->aio_buf << endl;
}
cv.notify_one();
}
void thready() {
char* buf1 = new char[blockSize];
char* buf2 = new char[blockSize];
aiocb cb;
char* processbuf = buf1;
char* readbuf = buf2;
fh = open("smallfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
memset(&cb, 0, sizeof(aiocb));
cb.aio_fildes = fh;
cb.aio_nbytes = blockSize;
cb.aio_offset = 0;
// Fill in callback information
/*
Using SIGEV_THREAD to request a thread callback function as a notification
method
*/
cb.aio_sigevent.sigev_notify_attributes = nullptr;
cb.aio_sigevent.sigev_notify = SIGEV_THREAD;
cb.aio_sigevent.sigev_notify_function = aio_completion_handler;
/*
The context to be transmitted is loaded into the handler (in this case, a
reference to the aiocb request itself). In this handler, we simply refer to
the arrived sigval pointer and use the AIO function to verify that the request
has been completed.
*/
cb.aio_sigevent.sigev_value.sival_ptr = &cb;
int currentBytesRead = read(fh, buf1, blockSize); // read the 1st block
unique_lock<mutex> readLock(readMutex);
while (true) {
cb.aio_buf = readbuf;
aio_read(&cb); // each next block is read asynchronously
process(processbuf, currentBytesRead); // process while waiting
cv.wait(readLock);
if (currentBytesRead < blockSize) {
break; // last time, get out
}
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead = bytesRead; // create local copy
}
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
} catch (std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
We were trying to create C++ code that would read a block from a file, and start a thread to asynchronously read the next block while processing the first block.
We started with condition_variable, but it was crashing so we went with straight locks.
The program dies on the first readLock.lock()
Ok, as some comments explained, the following code is wrong because we are not using RAII. Corrected code follows after, with delays put in for each function to force bugs. It still crashes though not on the first lock.
#include <fcntl.h>
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 32768;
char* buf1;
char* buf2;
char* processbuf = buf1;
char* readbuf = buf2;
mutex readMutex;
mutex procMutex;
//condition_variable readCV;
//condition_variable procCV;
int fh;
int bytesRead;
std::unique_lock<std::mutex> readLock;
std::unique_lock<std::mutex> procLock;
void nextRead() {
while (true) {
readLock.lock();
bytesRead = read(fh, readbuf, blockSize);
for (int i = 0; i < 5; i++) {
cerr << "reading..." << endl;
usleep(100000);
readLock.unlock();
}
cerr << "notifying..." << endl;
readLock.unlock();
if (bytesRead != blockSize) // last time, end here!
return;
procLock.lock();
procLock.unlock();
}
}
void process(char* buf, uint32_t bytesRead) { cout << "hit it!" << endl; }
int thready() {
buf1 = new char[blockSize];
buf2 = new char[blockSize];
fh = open("bigfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
int currentBytesRead = read(fh, buf1, blockSize);
thread reader(nextRead);
process(processbuf, currentBytesRead);
while (true) {
readLock.lock();
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead =
bytesRead; // copy locally so thread can do the other one
// TODO: is the above a problem? what if
readLock.unlock();
procLock.lock();
process(processbuf, currentBytesRead);
procLock.unlock();
}
reader.join();
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
}catch(std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
Here is the corrected code using RAII which still does not work. Now it terminates on the 3rd read? The file is 1Mb of zeros.
I am considering changing block size to 1 byte and having the file be "123456789" for purposes of easy testing
:
#include <fcntl.h>
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <thread>
using namespace std;
using namespace std::chrono_literals;
constexpr uint32_t blockSize = 32768;
char* buf1;
char* buf2;
char* processbuf = buf1;
char* readbuf = buf2;
mutex readMutex;
mutex procMutex;
//condition_variable readCV;
//condition_variable procCV;
int fh;
int bytesRead;
void nextRead() {
while (true) {
{
unique_lock<mutex> readLock(readMutex);
bytesRead = read(fh, readbuf, blockSize);
for (int i = 0; i < 5; i++) {
cerr << "reading..." << endl;
usleep(100000);
readLock.unlock();
}
cerr << "notifying..." << endl;
}
if (bytesRead != blockSize) // last time, end here!
return;
// wait for process to complete
unique_lock<mutex> procLock(procMutex);
}
}
void process(char* buf, uint32_t bytesRead) {
cout << "processing..." << endl;
usleep(100000);
}
int thready() {
buf1 = new char[blockSize];
buf2 = new char[blockSize];
fh = open("bigfile.dat", O_RDONLY);
if (fh < 0) {
throw std::runtime_error("cannot open file!");
}
int currentBytesRead = read(fh, buf1, blockSize);
thread reader(nextRead);
process(processbuf, currentBytesRead);
while (true) {
{
unique_lock<mutex> readLock(readMutex);
cout << "back from wait" << endl;
swap(processbuf, readbuf); // switch to other buffer for next time
currentBytesRead = bytesRead; // create local copy
// TODO: is the above a problem? what if
}
unique_lock<mutex> procLock(procMutex);
process(processbuf, currentBytesRead);
}
reader.join();
delete[] buf1;
delete[] buf2;
}
int main() {
try {
thready();
} catch(std::exception& e) {
cerr << e.what() << '\n';
}
return 0;
}
The output is:
processing...
reading...
reading...back from wait
processing...
back from wait
processing...
terminate called after throwing an instance of 'std::system_error'
what(): Operation not permitted
Aborted (core dumped)
Your mutexes are shared between threads as globals. That's fine. But your locks need to be local variables owned by a single thread. Use the fact that unique_lock auto locks the mutex in its constructor and auto-releases it its destructor. Here's a modificaton of your code:
void nextRead() {
while (true) {
{
// acquire the read lock
std::unique_lock<std::mutex> readLock(readMutex);
bytesRead = read(fh, readbuf, blockSize);
for (int i = 0; i < 5; i++) {
cerr << "reading..." << endl;
usleep(100000);
}
// when readLock goes out of scope, the mutex associated with it is unlocked
}
cerr << "notifying..." << endl;
{
// acquire the process lock
std::unique_lock<std::mutex> procLock(procMutex);
if (bytesRead != blockSize) // last time, end here!
return;
// procMutex gets unlocked inmplicitly as procLock goes out of scope
}
}
You'll need to make similar changes in your thready function.
Lets say I have two processes (simulated in this example with two threads) in a producer-consumer set up. That is, one process writes data to a file, the other process consumes the data in the file, then clears said file.
The set up I currently have, based on bits and pieces I've thrown together from various resources online, is that I should use a lock file to ensure that only one process can access the data file at a time. The producer acquires the lock, writes to the file, then releases the lock. Meanwhile, the consumer waits for modify events with inotify at which point it acquires the lock, consumes the data, and empties the file.
This seems relatively straightforward, but the part that's tripping me up is that when I empty the file out in my consumer thread, it triggers inotify modify event again, which sets off the whole flow again, and ends with the data file being cleared again, thus repeating forever.
I've tried a few ways to work around this problem, but none of them seem quite right. I'm worried doing this wrong will introduce potential race conditions or I'll end up skipping modify events or something.
Here is my current code:
#include <fstream>
#include <iostream>
#include <string>
#include "pthread.h"
#include "sys/file.h"
#include "sys/inotify.h"
#include "sys/stat.h"
#include "unistd.h"
const char* lock_filename = "./test_lock_file";
const char* data_filename = "./test_data_file";
int AquireLock(char const* lockName) {
mode_t m = umask(0);
int fd = open(lockName, O_RDWR | O_CREAT, 0666);
umask(m);
bool success = false;
if (fd < 0 || flock(fd, LOCK_EX) < 0) {
close(fd);
return -1;
}
return fd;
}
void ReleaseLock(int fd, char const* lockName) {
if (fd < 0) return;
remove(lockName);
close(fd);
}
void* ConsumerThread(void*) {
// Set up inotify.
int file_descriptor = inotify_init();
if (file_descriptor < 0) return nullptr;
int watch_descriptor =
inotify_add_watch(file_descriptor, data_filename, IN_MODIFY);
if (watch_descriptor < 0) return nullptr;
char buf[4096] __attribute__((aligned(__alignof__(inotify_event))));
while (true) {
// Read new events.
const inotify_event* event;
ssize_t numRead = read(file_descriptor, buf, sizeof(buf));
if (numRead <= 0) return nullptr;
// For each event, do stuff.
for (int i = 0; i < numRead; i += sizeof(inotify_event) + event->len) {
event = reinterpret_cast<inotify_event*>(&buf[i]);
// Critical section!
int fd = AquireLock(lock_filename);
// Read from the file.
std::string line;
std::ifstream data_file(data_filename);
if (data_file.is_open()) {
while (getline(data_file, line)) {
std::cout << line << std::endl;
}
data_file.close();
// Clear the file by opening then closing without writing to it.
std::ofstream erase_data_file(data_filename);
erase_data_file.close();
std::cout << "file cleared." << std::endl;
}
ReleaseLock(fd, lock_filename);
// Critical section over!
}
}
return nullptr;
}
int main(int argv, char** argc) {
// Set up other thread.
pthread_t thread;
int rc = pthread_create(&thread, NULL, ConsumerThread, nullptr);
if (rc) return rc;
// Producer thread: Periodically write to a file.
while (true) {
sleep(3);
// Critical section!
int fd = AquireLock(lock_filename);
// Write some text to a file
std::ofstream data_file(data_filename);
int counter = 0;
if (data_file.is_open()) {
std::cout << "Writing to file.\n";
data_file << "This is some example data. " << counter++ << "\n";
data_file.close();
}
ReleaseLock(fd, lock_filename);
// Critical section over!
}
pthread_exit(NULL);
return 0;
}
One idea I had was to disable tracking of modify events at the start of the consumer thread's critical section with inotify_rm_watch, then re-add it right before leaving the critical section. This doesn't seem to work though. Even with the events disabled, modify events are still getting triggered and I'm not sure why.
I've also considered just using a boolean to see if there was any file contents while consuming the file, and only clearing the file if it wasn't empty. This felt kind of hacky since it's still doing a second unnecessary iteration of the loop, but if I can't find a better solution I might just go with that. Ideally there would be a way to have only the producer thread's modifications trigger events, while the consumer could have it's own file modifications somehow ignored or disabled, but I'm not sure how to achieve that effect.
The main() function creates a thread that is supposed to live until the user wishes to exit the program. The thread needs to return values to the main functions at periodic intervals. I tried doing something like this, but hasn't worked well -
std::queue<std::string> q;
void start_thread(int num)
{
std::string str;
//Do some processing
q.push(str);
}
int main()
{
//Thread initialization
int i;
//Start thread
pthread_create(&m_thread,NULL,start_thread,static_cast<void *>i);
while(true)
{
if(q.front())
{
std::cout<<q.front();
return 0;
}
}
//Destroy thread.....
return 0;
}
Any suggestions?
It is not safe to read and write from STL containers concurrently. You need a lock to synchronize access (see pthread_mutex_t).
Your thread pushes a single value into the queue. You seem to be expecting periodic values, so you'll want to modify start_thread to include a loop that calls queue.push.
The return 0; in the consumer loop will exit main() when it finds a value in the queue. You'll always read a single value and exit your program. You should remove that return.
Using if (q.front()) is not the way to test if your queue has values (front assumes at least one element exists). Try if (!q.empty()).
Your while(true) loop is gonna spin your processor somethin' nasty. You should look at condition variables to wait for values in the queue in a nice manner.
try locking a mutex before calling push() / front() on the queue.
Here is a working example of what it looks like you were trying to accomplish:
#include <iostream>
#include <queue>
#include <vector>
#include <semaphore.h>
#include <pthread.h>
struct ThreadData
{
sem_t sem;
pthread_mutex_t mut;
std::queue<std::string> q;
};
void *start_thread(void *num)
{
ThreadData *td = reinterpret_cast<ThreadData *>(num);
std::vector<std::string> v;
std::vector<std::string>::iterator i;
// create some data
v.push_back("one");
v.push_back("two");
v.push_back("three");
v.push_back("four");
i = v.begin();
// pump strings out until no more data
while (i != v.end())
{
// lock the resource and put string in the queue
pthread_mutex_lock(&td->mut);
td->q.push(*i);
pthread_mutex_unlock(&td->mut);
// signal activity
sem_post(&td->sem);
sleep(1);
++i;
}
// signal activity
sem_post(&td->sem);
}
int main()
{
bool exitFlag = false;
pthread_t m_thread;
ThreadData td;
// initialize semaphore to empty
sem_init(&td.sem, 0, 0);
// initialize mutex
pthread_mutex_init(&td.mut, NULL);
//Start thread
if (pthread_create(&m_thread, NULL, start_thread, static_cast<void *>(&td)) != 0)
{
exitFlag = true;
}
while (!exitFlag)
{
if (sem_wait(&td.sem) == 0)
{
pthread_mutex_lock(&td.mut);
if (td.q.empty())
{
exitFlag = true;
}
else
{
std::cout << td.q.front() << std::endl;
td.q.pop();
}
pthread_mutex_unlock(&td.mut);
}
else
{
// something bad happened
exitFlag = true;
}
}
return 0;
}