Boost::Process pipe chain - c++

Using the results of this question simultaneous read and write to child's stdio using boost.process, I am trying to modify the code so that a file is read, piped through gzip, the output of gzip piped through bzip2, and finally the output of bzip2 written to a file.
My first attempt was
/*
* ProcessPipe.cpp
*
* Created on: Apr 17, 2018
* Author: dbetz
*/
//#define BOOST_ASIO_ENABLE_HANDLER_TRACKING 1
#include <boost/asio.hpp>
#include <boost/asio/high_resolution_timer.hpp>
#include <boost/process.hpp>
#include <boost/process/async.hpp>
#include <iostream>
#include <fstream>
#include <functional>
namespace bp = boost::process;
using boost::system::error_code;
using namespace std::chrono_literals;
using Loop = std::function<void()>;
using Buffer = std::array<char, 500>;
int main() {
boost::asio::io_service svc;
auto gzip_exit = [](int code, std::error_code ec) {
std::cout << "gzip exited " << code << " (" << ec.message() << ")\n";
};
auto bzip2_exit = [](int code, std::error_code ec) {
std::cout << "bzip2 exited " << code << " (" << ec.message() << ")\n";
};
bp::async_pipe file_to_gzip_pipe{svc}, gzip_to_bzip_pipe{svc}, bzip_to_file_pipe{svc};
bp::child process_gzip("/usr/bin/gzip", "-c", bp::std_in < file_to_gzip_pipe, bp::std_out > gzip_to_bzip_pipe, svc, bp::on_exit(gzip_exit));
bp::child process_bzip2("/usr/bin/bzip2", "-c", bp::std_in < gzip_to_bzip_pipe, bp::std_out > bzip_to_file_pipe, svc, bp::on_exit(bzip2_exit));
std::ifstream ifs("src/ProcessPipe2.cpp");
Buffer file_to_gzip_buffer;
Loop file_to_gzip_loop;
file_to_gzip_loop = [&] {
if (!ifs.good())
{
error_code ec;
file_to_gzip_pipe.close(ec);
std::cout << "Read file, write gzip: closed stdin (" << ec.message() << ")\n";
return;
}
ifs.read(file_to_gzip_buffer.data(), file_to_gzip_buffer.size());
boost::asio::async_write(file_to_gzip_pipe, boost::asio::buffer(file_to_gzip_buffer.data(), ifs.gcount()),
[&](error_code ec, size_t transferred) {
std::cout << "Read file, write gzip: " << ec.message() << " sent " << transferred << " bytes\n";
if (!ec) {
file_to_gzip_loop(); // continue writing
}
});
};
Buffer gzip_to_bzip_buffer;
Loop gzip_to_bzip_loop;
gzip_to_bzip_loop=[&] {
gzip_to_bzip_pipe.async_read_some(boost::asio::buffer(gzip_to_bzip_buffer),
[&](error_code ec, size_t transferred){
// duplicate buffer
std::cout << "Read gzip, write bzip: " << ec.message() << " got " << transferred << " bytes\n";
if (!ec)
gzip_to_bzip_loop();
else
gzip_to_bzip_pipe.close();
}
);
};
std::ofstream ofs("src/ProcessPipe2.gz");
Buffer bzip_to_file_buffer;
Loop bzip_to_file_loop;
bzip_to_file_loop = [&] {
bzip_to_file_pipe.async_read_some(boost::asio::buffer(bzip_to_file_buffer),
[&](error_code ec, size_t transferred) {
std::cout << "Read bzip, write file: " << ec.message() << " got " << transferred << " bytes\n";
ofs << std::string(bzip_to_file_buffer.data(),transferred);
if (!ec)
bzip_to_file_loop(); // continue reading
});
};
file_to_gzip_loop(); // async
gzip_to_bzip_loop();
bzip_to_file_loop(); // async
svc.run(); // Await all async operations
}
but this gives an error:
Read gzip, write bzip: Bad file descriptor got 0 bytes
The problem seems to be that gzip_to_bzip_pipe is opened for writing by gzip and for reading by bzip. Any ideas?

I'd write that code simply like:
#include <boost/process.hpp>
#include <iostream>
namespace bp = boost::process;
int main() {
bp::pipe intermediate;
bp::child process_gzip("/bin/gzip", "-c", bp::std_in<"src/ProcessPipe2.cpp", bp::std_out> intermediate);
bp::child process_bzip2("/bin/bzip2", "-c", bp::std_in<intermediate, bp::std_out> "src/ProcessPipe2.gz.bz2");
process_bzip2.wait();
process_bzip2.wait();
}
BONUS
You can do without sub processes entirely and just use boost::iostreams::copy:
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/copy.hpp>
#include <iostream>
#include <fstream>
namespace io = boost::iostreams;
int main() {
std::ifstream ifs("src/ProcessPipe2.cpp");
io::filtering_stream<io::output> os;
os.push(io::gzip_compressor());
os.push(io::bzip2_compressor());
std::ofstream ofs("src/ProcessPipe2.gz.bz2");
os.push(ofs);
io::copy(ifs, os);
}

Related

Boost ASIO performing async write/read/write handshake with a timer

I have an application where I need to connect to a socket, send a handshake message (send command1, get response, send command2), and then receive data. It is set to expire after a timeout, stop the io_service, and then attempt to reconnect. There is no error message when I do my first async_write but the following async_read waits until the timer expires, and then reconnects in an infinite loop.
My code looks like:
#include <boost/asio.hpp>
#include <boost/bind/bind.hpp>
#include <iostream>
#include <string>
#include <memory>
#include <boost/date_time/posix_time/posix_time.hpp>
using namespace std;
using boost::asio::ip::tcp;
static shared_ptr<boost::asio::io_service> _ios;
static shared_ptr<boost::asio::deadline_timer> timer;
static shared_ptr<boost::asio::ip::tcp::socket> tcp_sock;
static shared_ptr<tcp::resolver> _resolver;
static boost::asio::ip::tcp::resolver::results_type eps;
string buffer(1024,0);
void handle_read(const boost::system::error_code& ec, size_t bytes)
{
if (ec)
{
cout << "error: " << ec.message() << endl;
_ios->stop();
return;
}
// got first response, send off reply
if (buffer == "response")
{
boost::asio::async_write(*tcp_sock, boost::asio::buffer("command2",7),
[](auto ec, auto bytes)
{
if (ec)
{
cout << "write error: " << ec.message() << endl;
_ios->stop();
return;
}
});
}
else
{
// parse incoming data
}
// attempt next read
timer->expires_from_now(boost::posix_time::seconds(10));
boost::asio::async_read(*tcp_sock, boost::asio::buffer(buffer,buffer.size()), handle_read);
}
void get_response()
{
timer->expires_from_now(boost::posix_time::seconds(10));
boost::asio::async_read(*tcp_sock, boost::asio::buffer(buffer,buffer.size()), handle_read);
}
void on_connected(const boost::system::error_code& ec, tcp::endpoint)
{
if (!tcp_sock->is_open())
{
cout << "socket is not open" << endl;
_ios->stop();
}
else if (ec)
{
cout << "error: " << ec.message() << endl;
_ios->stop();
return;
}
else
{
cout << "connected" << endl;
// do handshake (no errors?)
boost::asio::async_write(*tcp_sock, boost::asio::buffer("command1",7),
[](auto ec, auto bytes)
{
if (ec)
{
cout << "write error: " << ec.message() << endl;
_ios->stop();
return;
}
get_response();
});
}
}
void check_timer()
{
if (timer->expires_at() <= boost::asio::deadline_timer::traits_type::now())
{
tcp_sock->close();
timer->expires_at(boost::posix_time::pos_infin);
}
timer->async_wait(boost::bind(check_deadline));
}
void init(string ip, string port)
{
// set/reset data and connect
_resolver.reset(new tcp::resolver(*_ios));
eps = _resolver->resolve(ip, port);
timer.reset(new boost::asio::deadline_timer(*_ios));
tcp_sock.reset(new boost::asio::ip::tcp::socket(*_ios));
timer->expires_from_now(boost::posix_time::seconds(5));
// start async connect
boost::asio::async_connect(*tcp_sock, eps, on_connected);
timer->async_wait(boost::bind(check_timer));
}
int main(int argc, char** argv)
{
while (1)
{
// start new io context
_ios.reset(new boost::asio::io_service);
init(argv[1],argv[2]);
_ios->run();
cout << "try reconnect" << endl;
}
return 0;
}
Why would I be timing out? When I do a netcat and follow the same procedure things look ok. I get no errors from the async_write indicating that there are any errors and I am making sure to not call the async_read for the response until I am in the write handler.
Others have been spot on. You use "blanket" read, which means it only completes at error (like EOF) or when the buffer is full (docs)
Besides your code is over-complicated (excess dynamic allocation, manual new, globals, etc).
The following simplified/cleaned up version still exhibits your problem: http://coliru.stacked-crooked.com/a/8f5d0820b3cee186
Since it looks like you just want to limit over-all time of the request, I'd suggest dropping the timer and just limit the time to run the io_context.
Also showing how to use '\n' for message delimiter and avoid manually managing dynamic buffers:
Live On Coliru
#include <boost/asio.hpp>
#include <iomanip>
#include <iostream>
#include <memory>
#include <string>
namespace asio = boost::asio;
using asio::ip::tcp;
using boost::system::error_code;
using namespace std::literals;
struct Client {
#define HANDLE(memfun) std::bind(&Client::memfun, this, std::placeholders::_1, std::placeholders::_2)
Client(std::string const& ip, std::string const& port) {
async_connect(_sock, tcp::resolver{_ios}.resolve(ip, port), HANDLE(on_connected));
}
void run() { _ios.run_for(10s); }
private:
asio::io_service _ios;
asio::ip::tcp::socket _sock{_ios};
std::string _buffer;
void on_connected(error_code ec, tcp::endpoint) {
std::cout << "on_connected: " << ec.message() << std::endl;
if (ec)
return;
async_write(_sock, asio::buffer("command1\n"sv), [this](error_code ec, size_t) {
std::cout << "write: " << ec.message() << std::endl;
if (!ec)
get_response();
});
}
void get_response() {
async_read_until(_sock, asio::dynamic_buffer(_buffer /*, 1024*/), "\n", HANDLE(on_read));
}
void on_read(error_code ec, size_t bytes) {
std::cout << "handle_read: " << ec.message() << " " << bytes << std::endl;
if (ec)
return;
auto cmd = _buffer.substr(0, bytes);
_buffer.erase(0, bytes);
// got first response, send off reply
std::cout << "Handling command " << quoted(cmd) << std::endl;
if (cmd == "response\n") {
async_write(_sock, asio::buffer("command2\n"sv), [](error_code ec, size_t) {
std::cout << "write2: " << ec.message() << std::endl;
});
} else {
// TODO parse cmd
}
get_response(); // attempt next read
}
};
int main(int argc, char** argv) {
assert(argc == 3);
while (1) {
Client(argv[1], argv[2]).run();
std::this_thread::sleep_for(1s); // for demo on COLIRU
std::cout << "try reconnect" << std::endl;
}
}
With output live on coliru:
on_connected: Connection refused
try reconnect
on_connected: Success
write: Success
command1
handle_read: Success 4
Handling command "one
"
handle_read: Success 9
Handling command "response
"
write2: Success
command2
handle_read: Success 6
Handling command "three
"
handle_read: End of file 0
try reconnect
on_connected: Success
write: Success
command1
Local interactive demo:
Sidenote: as long as resolve() isn't happening asynchronously it will not be subject to the timeouts.

How to use a boost async_pipe to send child process output across a fork?

I'm new to the boost::asio, and boost::process libraries and I've come across a problem which I'm struggling to find a solution for...
Consider that I have a small toy program that does the following:
Firstly, fork()s itself into a parent-branch and a child-branch.
The child-branch then uses the boost::process::child class to invoke the unix command ls in an asynchronous context.
The child-branch supplies the boost::process::child class with a boost::process::async_pipe to direct std_out to.
The parent-branch wishes to read what has been written to the pipe, line by line, and process it further.
Currently, my implementation of this works up to a point. However, the read_loop() call in the parent-branch does not terminate. It is almost as if it never reaches EOF, or is blocked. Why is this?
Here is my MWE:
#include <boost/process.hpp>
#include <boost/asio.hpp>
#include <iostream>
#include <string>
#include <unistd.h>
void read_loop(boost::process::async_pipe& pipe)
{
static boost::asio::streambuf buffer;
boost::asio::async_read_until(
pipe,
buffer,
'\n',
[&](boost::system::error_code error_code, std::size_t bytes) {
if (!error_code) {
std::istream is(&buffer);
if (std::string line; std::getline(is, line)) {
std::cout << "Read Line: " << line << "\n";
}
read_loop(pipe);
}
else {
std::cout << "Error in read_loop()!\n";
pipe.close();
}
}
);
}
int main(int argc, char* argv[])
{
boost::asio::io_context io_context{};
boost::process::async_pipe pipe{ io_context };
io_context.notify_fork(boost::asio::io_context::fork_prepare);
pid_t pid{ fork() };
if (pid == 0) {
io_context.notify_fork(boost::asio::io_context::fork_child);
boost::process::child child(
boost::process::args({ "/usr/bin/ls", "/etc/" }),
boost::process::std_out > pipe,
boost::process::on_exit([&](int exit, std::error_code error_code) { std::cout << "[Exited with code " << exit << " (" << error_code.message() << ")]\n"; }),
io_context
);
io_context.run();
}
else {
io_context.notify_fork(boost::asio::io_context::fork_parent);
read_loop(pipe);
io_context.run();
}
return 0;
}
Which will successfully give the (abridged) output, as expected:
Read Line: adduser.conf
...
[Exited with code 0 (Success)]
...
Read Line: zsh_command_not_found
but will then just hang until it is forcibly killed.
Which leaves the main question, why does my read_loop() function end up blocking/not exiting correctly?
Thanks in advance!
Chasing The Symptom
The process not "seeing" EOF makes me think you have to close either end of the pipe. This is somewhat hacky, but works:
Live On Coliru
#include <boost/asio.hpp>
#include <boost/process.hpp>
#include <iostream>
namespace bp = boost::process;
void read_loop(bp::async_pipe& pipe) {
static boost::asio::streambuf buffer;
using boost::system::error_code;
async_read_until( //
pipe, buffer, '\n', [&](error_code ec, [[maybe_unused]] size_t bytes) {
// std::cout << "Handler " << ec.message() << " bytes:" << bytes << " (" <<
// buffer.size() << ")" << std::endl;
if (!ec) {
std::istream is(&buffer);
if (std::string line; std::getline(is, line)) {
std::cout << "Read Line: " << line << "\n";
}
read_loop(pipe);
} else {
std::cout << "Loop exit (" << ec.message() << ")" << std::endl;
pipe.close();
}
});
}
int main() {
boost::asio::io_context ioc{};
bp::async_pipe pipe{ioc};
ioc.notify_fork(boost::asio::io_context::fork_prepare);
pid_t pid{fork()};
if (pid == 0) {
ioc.notify_fork(boost::asio::io_context::fork_child);
bp::child child( //
bp::args({"/usr/bin/ls", "/etc/"}), bp::std_out > pipe, bp::std_in.close(),
bp::on_exit([&](int exit, std::error_code ec) {
std::cout << "[Exited with code " << exit << " (" << ec.message() << ")]\n";
pipe.close();
}),
ioc);
ioc.run();
} else {
ioc.notify_fork(boost::asio::io_context::fork_parent);
std::move(pipe).sink().close();
read_loop(pipe);
ioc.run();
}
}
Side note: I guess it would be nice to have a more unhacky way to specify this, like (bp::std_in < pipe).close() or so.
Fixing The Root Cause
When using Boost Process, the fork is completely redundant. Boost Process literally does the fork for you, complete with correct service notification and file descriptor handling.
You'll find the code becomes a lot simpler and also handles the closing correctly (likely because some assumptions within Boost Process implementation details):
Live On Coliru
#include <boost/asio.hpp>
#include <boost/process.hpp>
#include <iostream>
namespace bp = boost::process;
void read_loop(bp::async_pipe& pipe) {
static boost::asio::streambuf buffer;
static std::string line; // re-used because we can
async_read_until( //
pipe, buffer, '\n',
[&](boost::system::error_code ec, size_t /*bytes*/) {
if (ec) {
std::cout << "Loop exit (" << ec.message() << ")" << std::endl;
return;
}
if (getline(std::istream(&buffer), line))
std::cout << "Read Line: " << line << "\n";
read_loop(pipe);
});
}
int main() {
boost::asio::io_context ioc{};
bp::async_pipe pipe{ioc};
bp::child child( //
bp::args({"/bin/ls", "/etc/"}), bp::std_out > pipe,
bp::on_exit([&](int exit, std::error_code ec) {
std::cout << "[Exited with " << exit << " (" << ec.message()
<< ")]\n";
}));
read_loop(pipe);
ioc.run();
}

Is is possible to use process output while process is running?

Boost.process allows the usage of Boost.asio in order to perform asynchronous read.
From what I understand, this is useful to read output while process is running without having to wait for the process to terminate.
But to access this output, is it necessary to wait for the process to terminate, or is it possible to access it while process is running, and how?
Actually my need is to access the beginning of a process output (to check that it started as expected) while while keeping it running.
To detail the context, I run a process which I want to keep until the end of the execution:
boost::asio::io_service ios;
std::vector<char> buf;
bp::child c("process_that_needs_to_keep_running", args,
bp::std_out > boost::asio::buffer(buf), ios);
ios.run();
// I DON'T WANT WAIT FOR c TO TERMINATE
// but I want to check that buf contains some text that ensures me it started correctly
// the issue I have here is that I don't know how to read from buf, since its size and content might not be consistent
// is it possible to take a snapshot for instance?
check_started_correctly(buf);
Here the issue is that the producer creates output which I don't control, I just issues output.
If you use bp::std_out > some_kind_of_buffer_or_future you will usually get the result only at exit.
However, you can use an async_pipe:
bp::async_pipe pipe(io);
bp::child c( //
"/bin/bash",
std::vector<std::string>{
"-c",
"for a in {1..20}; do sleep 1; echo message $a; done",
}, //
bp::std_out > pipe, //
bp::on_exit(on_exit), //
io);
Now, you have to explicitly do the IO on that pipe:
boost::asio::streambuf sb;
async_read_until( //
pipe, sb, "message 5\n", //
[&](error_code ec, size_t) { //
std::cout << "Got message 5 (" << ec.message() << ")" << std::endl;
});
This works:
Live On Coliru
#include <boost/process.hpp>
#include <boost/process/async.hpp>
#include <boost/asio.hpp>
#include <iostream>
namespace bp = boost::process;
using boost::system::error_code;
namespace /*file-static*/ {
using namespace std::chrono_literals;
static auto now = std::chrono::steady_clock::now;
static const auto t0 = now();
static auto timestamp() {
return std::to_string((now() - t0) / 1.s) + "s ";
}
} // namespace
int main() {
boost::asio::io_context io;
bp::async_pipe pipe(io);
auto on_exit = [](int code, std::error_code ec) {
std::cout << timestamp() << "on_exit: " << ec.message() << " code "
<< code << std::endl;
};
bp::child c( //
"/bin/bash",
std::vector<std::string>{
"-c",
"for a in {1..20}; do sleep 1; echo message $a; done",
}, //
bp::std_out > pipe, //
bp::on_exit(on_exit), //
io);
boost::asio::streambuf sb;
async_read_until( //
pipe, sb, "message 5\n", //
[&](error_code ec, size_t) { //
std::cout << timestamp() << "Got message 5 (" << ec.message() << ")"
<< std::endl;
});
io.run();
}
Prints
5.025400s Got message 5 (Success)
20.100547s on_exit: Success code 0
So you can respond to content you're looking for when it happens. Keep in mind OS and shells do stream buffering on pipes, but the default is line-buffering so, you can expect to receive input as soon as a newline is printed.
Large Buffers?
The above kinda assumes that you can buffer the entire output up to the interesting message. What if that is gigabytes? As long as your pattern isn't gigabytes, you can keep reading until the criteria is matched.
Let's morph our example into an async grep that looks for the regex class\s*\w+_heap in all of the boost headers. Of course, this is many megabytes of data, but we use only a 10Kb buffer:
std::string text;
auto buf = boost::asio::dynamic_buffer(text, 10 * 1024); // max 10 kilobyte
size_t total_received =0;
boost::regex const re(R"(class\s*\w+_heap)");
Now we make a read loop that reads until match or when the buffer is full:
std::function<void()> wait_for_message;
wait_for_message = [&] {
async_read_until( //
pipe, buf, re, //
[&](error_code ec, size_t received) { //
std::cerr << '\x0d' << timestamp() << "Checking for message ("
<< ec.message() << ", total " << total_received
<< ") ";
if (received || ec != boost::asio::error::not_found) {
total_received += received;
buf.consume(received);
boost::smatch m;
if (regex_search(text, m, re)) {
std::cout << "\n" << timestamp()
<< "Found: " << std::quoted(m.str()) << " at "
<< (total_received - m.size()) << " bytes"
<< std::endl;
}
} else {
// discard 90% of buffer capacity
auto discard =
std::min(buf.max_size() / 10 * 9, buf.size());
total_received += discard;
buf.consume(discard);
}
if (!ec | (ec == boost::asio::error::not_found))
wait_for_message();
else
std::cout << "\n" << timestamp() << ec.message() << std::endl;
});
};
Of course, this system might miss matches if the match exceeds 10% of the buffer size (because we only keep 10% of the previous buffer contents to allow for matches overlapping read boundaries).
Again, see it Live On Coliru
#include <boost/process.hpp>
#include <boost/process/async.hpp>
#include <boost/asio.hpp>
#include <boost/regex.hpp>
#include <iostream>
#include <iomanip>
namespace bp = boost::process;
using boost::system::error_code;
namespace /*file-static*/ {
using namespace std::chrono_literals;
static auto now = std::chrono::steady_clock::now;
static const auto t0 = now();
static auto timestamp() {
return std::to_string((now() - t0) / 1.s) + "s ";
}
} // namespace
int main() {
boost::asio::io_context io;
bp::async_pipe pipe(io);
auto on_exit = [](int code, std::error_code ec) {
std::cout << timestamp() << "on_exit: " << ec.message() << " code "
<< code << std::endl;
};
bp::child c( //
"/usr/bin/find",
std::vector<std::string>{"/usr/local/include/boost", "-name",
"*.hpp", "-exec", "cat", "{}", "+"},
bp::std_out > pipe, //
bp::on_exit(on_exit), //
io);
std::string text;
auto buf = boost::asio::dynamic_buffer(text, 10 * 1024); // max 10 kilobyte
size_t total_received =0;
boost::regex const re(R"(class\s*\w+_heap)");
std::function<void()> wait_for_message;
wait_for_message = [&] {
async_read_until( //
pipe, buf, re, //
[&](error_code ec, size_t received) { //
std::cerr << '\x0d' << timestamp() << "Checking for message ("
<< ec.message() << ", total " << total_received
<< ") ";
if (received || ec != boost::asio::error::not_found) {
total_received += received;
buf.consume(received);
boost::smatch m;
if (regex_search(text, m, re)) {
std::cout << "\n" << timestamp()
<< "Found: " << std::quoted(m.str()) << " at "
<< (total_received - m.size()) << " bytes"
<< std::endl;
}
} else {
// discard 90% of buffer capacity
auto discard =
std::min(buf.max_size() / 10 * 9, buf.size());
total_received += discard;
buf.consume(discard);
}
if (!ec | (ec == boost::asio::error::not_found))
wait_for_message();
else
std::cout << "\n" << timestamp() << ec.message() << std::endl;
});
};
wait_for_message();
io.run();
std::cout << timestamp() << " - Done, total_received: " << total_received << "\n";
}
Which prints
2.033324s Found: "class d_ary_heap" at 6747512 bytes
2.065290s Found: "class pairing_heap" at 6831390 bytes
2.071888s Found: "class binomial_heap" at 6860833 bytes
2.072715s Found: "class skew_heap" at 6895677 bytes
2.073348s Found: "class fibonacci_heap" at 6921559 bytes
34.729355s End of file
34.730515s on_exit: Success code 0
34.730593s - Done, total_received: 154746011
Or live from my machine:

Use Futures with Boost Thread Pool

I'm implementing a TCP client which read and send files and strings and I'm using Boost as my main library. I'd like to continue reading or sending files while I keep sending strings, which in these case are the commands to send to the server. For this purpose I thought about using a Thread Pool in order to not overload the client. My question is, can I use futures to use callbacks when on of the thread in the pool ends? In case I can't, is there any other solution?
I was doing something like this, where pool_ is a boost:asio:thread_pool
void send_file(std::string const& file_path){
boost::asio::post(pool_, [this, &file_path] {
handle_send_file(file_path);
});
// DO SOMETHING WHEN handle_send_file ENDS
}
void handle_send_file(std::string const& file_path) {
boost::array<char, 1024> buf{};
boost::system::error_code error;
std::ifstream source_file(file_path, std::ios_base::binary | std::ios_base::ate);
if(!source_file) {
std::cout << "[ERROR] Failed to open " << file_path << std::endl;
//TODO gestire errore
}
size_t file_size = source_file.tellg();
source_file.seekg(0);
std::string file_size_readable = file_size_to_readable(file_size);
// First send file name and file size in bytes to server
boost::asio::streambuf request;
std::ostream request_stream(&request);
request_stream << file_path << "\n"
<< file_size << "\n\n"; // Consider sending readable version, does it change anything?
// Send the request
boost::asio::write(*socket_, request, error);
if(error){
std::cout << "[ERROR] Send request error:" << error << std::endl;
//TODO lanciare un'eccezione? Qua dovrò controllare se il server funziona o no
}
if(DEBUG) {
std::cout << "[DEBUG] " << file_path << " size is: " << file_size_readable << std::endl;
std::cout << "[DEBUG] Start sending file content" << std::endl;
}
long bytes_sent = 0;
float percent = 0;
print_percentage(percent);
while(!source_file.eof()) {
source_file.read(buf.c_array(), (std::streamsize)buf.size());
int bytes_read_from_file = source_file.gcount(); //int is fine because i read at most buf's size, 1024 in this case
if(bytes_read_from_file<=0) {
std::cout << "[ERROR] Read file error" << std::endl;
break;
//TODO gestire questo errore
}
percent = std::ceil((100.0 * bytes_sent) / file_size);
print_percentage(percent);
boost::asio::write(*socket_, boost::asio::buffer(buf.c_array(), source_file.gcount()),
boost::asio::transfer_all(), error);
if(error) {
std::cout << "[ERROR] Send file error:" << error << std::endl;
//TODO lanciare un'eccezione?
}
bytes_sent += bytes_read_from_file;
}
std::cout << "\n" << "[INFO] File " << file_path << " sent successfully!" << std::endl;
}
The operations posted to the pool end without the threads ending. That's the whole purpose of pooling the threads.
void send_file(std::string const& file_path){
post(pool_, [this, &file_path] {
handle_send_file(file_path);
});
// DO SOMETHING WHEN handle_send_file ENDS
}
This has several issues. The largest one is that you should not capture file_path by reference, as the argument is soon out of scope, and the handle_send_file call will run at an unspecified time in another thread. That's a race condition and dangling reference. Undefined Behaviour results.
Then the
// DO SOMETHING WHEN handle_send_file ENDS
is on a line which has no sequence relation with handle_send_file. In fact, it will probably run before that operation ever has a chance to start.
Simplifying
Here's a simplified version:
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <fstream>
#include <iostream>
namespace asio = boost::asio;
using asio::ip::tcp;
static asio::thread_pool pool_;
struct X {
std::unique_ptr<tcp::socket> socket_;
explicit X(unsigned short port) : socket_(new tcp::socket{ pool_ }) {
socket_->connect({ {}, port });
}
asio::thread_pool pool_;
std::unique_ptr<tcp::socket> socket_{ new tcp::socket{ pool_ } };
void send_file(std::string file_path) {
post(pool_, [=, this] {
send_file_implementation(file_path);
// DO SOMETHING WHEN send_file_implementation ENDS
});
}
// throws system_error exception
void send_file_implementation(std::string file_path) {
std::ifstream source_file(file_path,
std::ios_base::binary | std::ios_base::ate);
size_t file_size = source_file.tellg();
source_file.seekg(0);
write(*socket_,
asio::buffer(file_path + "\n" + std::to_string(file_size) + "\n\n"));
boost::array<char, 1024> buf{};
while (source_file.read(buf.c_array(), buf.size()) ||
source_file.gcount() > 0)
{
int n = source_file.gcount();
if (n <= 0) {
using namespace boost::system;
throw system_error(errc::io_error, system_category());
}
write(*socket_, asio::buffer(buf), asio::transfer_exactly(n));
}
}
};
Now, you can indeed run several of these operations in parallel (assuming several instances of X, so you have separate socket_ connections).
To do something at the end, just put code where I moved the comment:
// DO SOMETHING WHEN send_file_implementation ENDS
If you don't know what to do there and you wish to make a future ready at that point, you can:
std::future<void> send_file(std::string file_path) {
std::packaged_task<void()> task([=, this] {
send_file_implementation(file_path);
});
return post(pool_, std::move(task));
}
This overload of post magically¹ returns the future from the packaged task. That packaged task will set the internal promise with either the (void) return value or the exception thrown.
See it in action: Live On Coliru
int main() {
// send two files simultaneously to different connections
X clientA(6868);
X clientB(6969);
std::future<void> futures[] = {
clientA.send_file("main.cpp"),
clientB.send_file("main.cpp"),
};
for (auto& fut : futures) try {
fut.get();
std::cout << "Everything completed without error\n";
} catch(std::exception const& e) {
std::cout << "Error occurred: " << e.what() << "\n";
};
pool_.join();
}
I tested this while running two netcats to listen on 6868/6969:
nc -l -p 6868 | head& nc -l -p 6969 | md5sum&
./a.out
wait
The server prints:
Everything completed without error
Everything completed without error
The netcats print their filtered output:
main.cpp
1907
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <fstream>
#include <iostream>
#include <future>
namespace asio = boost::asio;
using asio::ip::tcp;
7ecb71992bcbc22bda44d78ad3e2a5ef -
¹ not magic: see https://www.boost.org/doc/libs/1_66_0/doc/html/boost_asio/reference/async_result.html

How to ensure that TCP file transfer is done (c++)

I am using boost to implement a TCP client and server. On the client side, I have to send multiple files one after the other. I use a separate mechanism to notify the server of a file transfer. If the server is ready to receive the file it will respond to the client and the transfer is initiated.
I defined asynchronous handlers to write the data and then let the OS take care of it by calling io_service.run(). To my knowledge io_service.run() blocks until there are no more handlers to be dispatched, however that does not mean that the data was actually received on the remote side correct? The issue is that after io_service.run() returns I initiate the next transfer, but the server is not done receiving the first one.
Do I need to implement some kind of external mechanism on the remote side to notify the client that the data was received or am I doing something wrong?
Client implementation:
#include "StdAfx.h"
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include <boost/enable_shared_from_this.hpp>
#include <boost/thread.hpp>
#include "AsyncTCPClient.h"
AsyncTCPClient::AsyncTCPClient(boost::asio::io_service& iIoService, const std::string& iServerIP, const std::string& iPath)
: mResolver(iIoService), mSocket(iIoService)
{
size_t wPos = iServerIP.find(':');
if(wPos==std::string::npos)
{
return;
}
std::string wPortStr = iServerIP.substr(wPos + 1);
std::string wServerIP = iServerIP.substr(0, wPos);
mSourceFile.open(iPath, std::ios_base::binary | std::ios_base::ate);
if(!mSourceFile)
{
LOG(LOGERROR) << "Failed to open file: " << iPath;
return;
}
size_t wFileSize = mSourceFile.tellg();
mSourceFile.seekg(0);
std::ostream wRequestStream(&mRequest);
wRequestStream << iPath << "\n" << wFileSize << "\n\n";
LOG(LOGINFO) << "File to transfer: " << iPath;
LOG(LOGINFO) << "Filesize: " << wFileSize << " bytes";
tcp::resolver::query wQuery(wServerIP, wPortStr);
mResolver.async_resolve(wQuery, boost::bind(&AsyncTCPClient::HandleResolve, this, boost::asio::placeholders::error, boost::asio::placeholders::iterator));
}
AsyncTCPClient::~AsyncTCPClient()
{
}
void AsyncTCPClient::HandleResolve(const boost::system::error_code & iErr, tcp::resolver::iterator iEndpointIterator)
{
if(!iErr)
{
tcp::endpoint wEndpoint = *iEndpointIterator;
mSocket.async_connect(wEndpoint, boost::bind(&AsyncTCPClient::HandleConnect, this, boost::asio::placeholders::error, ++iEndpointIterator));
}
else
{
LOG(LOGERROR) << "Error: " << iErr.message();
}
}
void AsyncTCPClient::HandleConnect(const boost::system::error_code &iErr, tcp::resolver::iterator iEndpointIterator)
{
if(!iErr)
{
boost::asio::async_write(mSocket, mRequest, boost::bind(&AsyncTCPClient::HandleWriteFile, this, boost::asio::placeholders::error));
}
else if(iEndpointIterator != tcp::resolver::iterator())
{
mSocket.close();
tcp::endpoint wEndpoint = *iEndpointIterator;
mSocket.async_connect(wEndpoint, boost::bind(&AsyncTCPClient::HandleConnect, this, boost::asio::placeholders::error, ++iEndpointIterator));
}
else
{
LOG(LOGERROR) << "Error: " << iErr.message();
}
}
void AsyncTCPClient::HandleWriteFile(const boost::system::error_code& iErr)
{
if(!iErr)
{
if(mSourceFile)
{
mSourceFile.read(mBuffer.c_array(), (std::streamsize)mBuffer.size());
// EOF reached
if(mSourceFile.gcount() <= 0)
{
LOG(LOGINFO) << "File transfer done";
return;
}
//LOG(LOGTRACE) << "Send " << mSourceFile.gcount() << "bytes, total: " << mSourceFile.tellg() << " bytes.\n";
boost::asio::async_write(mSocket, boost::asio::buffer(mBuffer.c_array(), mSourceFile.gcount()), boost::bind(&AsyncTCPClient::HandleWriteFile, this, boost::asio::placeholders::error));
}
else
{
LOG(LOGINFO) << "File transfer done";
return;
}
}
else
{
LOG(LOGERROR) << "Error value: " << iErr.value();
LOG(LOGERROR) << "Error message: " << iErr.message();
throw std::exception();
}
}
Server implementation:
#include "StdAfx.h"
#include <boost/array.hpp>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include <iostream>
#include <fstream>
#include <boost/enable_shared_from_this.hpp>
#include "AsyncTCPClient.h"
#include "AsyncTCPServer.h"
#include "Debug.h"
AsyncTCPServer::AsyncTCPServer(unsigned short iPort, const std::string iFilePath)
:mAcceptor(mIoService, boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), iPort), true)
{
mAsyncTCPConnectionPtr wNewConnection(new AsyncTCPConnection(mIoService, iFilePath));
mAcceptor.async_accept(wNewConnection->Socket(), boost::bind(&AsyncTCPServer::HandleAccept, this, wNewConnection, boost::asio::placeholders::error));
mIoService.run();
}
AsyncTCPServer::~AsyncTCPServer()
{
mIoService.stop();
}
void AsyncTCPServer::HandleAccept(mAsyncTCPConnectionPtr iCurConnection, const boost::system::error_code& iErr)
{
if (!iErr)
{
iCurConnection->Start();
}
else
{
BIOLOG(BioSans::LOGERROR) << " " << iErr << ", " << iErr.message();
}
}
Connection implementation:
#include "StdAfx.h"
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include <iostream>
#include <fstream>
#include "Debug.h"
#include "AsyncTCPConnection.h"
AsyncTCPConnection::AsyncTCPConnection(boost::asio::io_service& iIoService, const std::string iFilePath)
: mSocket(iIoService), mFileSize(0), mFilePath(iFilePath)
{
}
AsyncTCPConnection::~AsyncTCPConnection()
{
}
void AsyncTCPConnection::Start()
{
LOG(LOGINFO) << "Start";
async_read_until(mSocket, mRequestBuffer, "\n\n", boost::bind(&AsyncTCPConnection::HandleReadRequest, shared_from_this(), boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
void AsyncTCPConnection::HandleReadRequest(const boost::system::error_code& iErr, std::size_t iBytesTransferred)
{
if(iErr)
{
return HandleError(__FUNCTION__, iErr);
}
LOG(LOGTRACE) << "(" << iBytesTransferred << ")" << ", in_avail = " << mRequestBuffer.in_avail() << ", size = " << mRequestBuffer.size() << ", max_size = " << mRequestBuffer.max_size();
std::istream wRequestStream(&mRequestBuffer);
std::string wFilePath;
wRequestStream >> wFilePath;
wRequestStream >> mFileSize;
wRequestStream.read(mBuffer.c_array(), 2);
mOutputFile.open(mFilePath, std::ios_base::binary);
if(!mOutputFile)
{
LOG(LOGERROR) << "Failed to open: " << wFilePath;
return;
}
do
{
wRequestStream.read(mBuffer.c_array(), (std::streamsize)mBuffer.size());
LOG(LOGTRACE) << "Write " << wRequestStream.gcount() << " bytes";
mOutputFile.write(mBuffer.c_array(), wRequestStream.gcount());
}
while(wRequestStream.gcount() > 0);
async_read(mSocket, boost::asio::buffer(mBuffer.c_array(), mBuffer.size()),boost::bind(&AsyncTCPConnection::HandleReadFileContent, shared_from_this(), boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
void AsyncTCPConnection::HandleReadFileContent(const boost::system::error_code& iErr, std::size_t iBytesTransferred)
{
if(iBytesTransferred>0)
{
mOutputFile.write(mBuffer.c_array(), (std::streamsize)iBytesTransferred);
LOG(LOGTRACE) << "Received " << mOutputFile.tellp() << " bytes";
if (mOutputFile.tellp()>=(std::streamsize)mFileSize)
{
return;
}
}
if(iErr)
{
return HandleError(__FUNCTION__, iErr);
}
async_read(mSocket, boost::asio::buffer(mBuffer.c_array(), mBuffer.size()), boost::bind(&AsyncTCPConnection::HandleReadFileContent, shared_from_this(), boost::asio::placeholders::error, boost::asio::placeholders::bytes_transferred));
}
void AsyncTCPConnection::HandleError(const std::string& function_name, const boost::system::error_code& err)
{
LOG(LOGERROR) << " in " << function_name <<" due to " << err <<" " << err.message();
}
Code to send file:
void SendFile(std::string iFilePath, std::string iServerIP)
{
static int wRetries = 0;
try
{
boost::asio::io_service wIoService;
LOG(LOGINFO) << "Sending data to: " << iServerIP;
LOG(LOGINFO) << "Filename is: " << iFilePath;
AsyncTCPClient client(wIoService, iServerIP, iFilePath);
wIoService.run();
// here I want to make sure that the data got to the remote host
// it looks like wIoService.run() returns once bytes are written to the socket
}
catch(std::exception)
{
// retry 3 times in case something goes wrong
if(wRetries < 3)
{
wRetries++;
LOG(LOGWARNING) << "Problem sending file : " << iFilePath << " to address: " << iServerIP;
LOG(LOGWARNING) << "Retry #" << wRetries;
SendFile(iFilePath, iServerIP);
}
else
{
LOG(LOGERROR) << "Unable to send file: " << iFilePath << " to address: " << iServerIP;
wRetries = 0;
return;
}
}
wRetries = 0;
}
You could use 'boost::asio::io_service::work' to keep your IO Service thread alive until you want to shutdown your process. Otherwise, io_service::run will return when all posted tasks are complete.
http://www.boost.org/doc/libs/1_65_1/doc/html/boost_asio/reference/io_service__work.html
I wouldn't think you'd want to exit and recreate threads over and over for every transfer.
You can use a condition variable to signal when you want to shutdown the io_service thread and then destroy the work object, or just simply destroy the work object.
As to knowing when the server has received everything you've sent. You could design something in your protocol or just rely on the guaranteed aspects of TCP. I suggest reading up on TCP and IO-Completion in general.