I am using POCO reactor pattern for handling incoming tcp connections. Connections might take from couple of seconds to minutes depending on the request type as follows:
try{
ServerSocket serverSocket(port);
reactor = new SocketReactor();
ParallelSocketAcceptor<BFSTcpServiceHandler,SocketReactor> acceptor(serverSocket, *reactor);
//Start Reactor
reactor->run();
}catch(Exception&e){
LOG(ERROR)<<"ERROR in initializing TCPServer:"<<e.message();
return;
}
And here is the Handler:
BFSTcpServiceHandler::BFSTcpServiceHandler(StreamSocket& _socket,
SocketReactor& _reactor): socket(_socket),reactor(_reactor) {
//Set Keeep Alive for socket
socket.setKeepAlive(false);
//Register Callbacks
reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
ReadableNotification>(*this, &BFSTcpServiceHandler::onReadable));
/*reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
WritableNotification>(*this, &BFSTcpServiceHandler::onWriteable));*/
reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
ShutdownNotification>(*this, &BFSTcpServiceHandler::onShutdown));
reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
ErrorNotification>(*this, &BFSTcpServiceHandler::onError));
reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
TimeoutNotification>(*this, &BFSTcpServiceHandler::onTimeout));
/*reactor.addEventHandler(socket, NObserver<BFSTcpServiceHandler,
IdleNotification>(*this, &BFSTcpServiceHandler::onIdle));*/
}
BFSTcpServiceHandler::~BFSTcpServiceHandler() {
//Unregister Callbacks
reactor.removeEventHandler(socket, NObserver<BFSTcpServiceHandler,
ReadableNotification>(*this, &BFSTcpServiceHandler::onReadable));
...
//Close socket
try {
socket.close();
}catch(...){}
}
void BFSTcpServiceHandler::onReadable(
const Poco::AutoPtr<Poco::Net::ReadableNotification>& pNf) {
//LOG(ERROR)<<"onReadable:"<<socket.peerAddress().toString();
try{
//Read and process request
} catch(Exception &e){
LOG(ERROR)<<"Error in reading request:"<<e.message();
delete this;
}
//So after a connection is served just close it!
delete this;
}
void BFSTcpServiceHandler::onShutdown(
const Poco::AutoPtr<Poco::Net::ShutdownNotification>& pNf) {
LOG(ERROR)<<"onShutdown:"<<socket.peerAddress().toString();
//Call destructor of this class
delete this;
}
void BFSTcpServiceHandler::onWriteable(
const Poco::AutoPtr<Poco::Net::WritableNotification>& pNf) {
static bool once = true;
if(once) {
LOG(ERROR)<<"onWritable:"<<socket.peerAddress().toString()<<" keepAlive?"<<socket.getKeepAlive()<<" isBlocking?"<<socket.getBlocking()<<" noDeley?"<<socket.getNoDelay();
once = false;
}
}
void BFSTcpServiceHandler::onTimeout(
const Poco::AutoPtr<Poco::Net::TimeoutNotification>& pNf) {
LOG(ERROR)<<"\nTIMEOUT! onTimeout:"<<socket.peerAddress().toString();
}
void BFSTcpServiceHandler::onError(
const Poco::AutoPtr<Poco::Net::ErrorNotification>& pNf) {
LOG(ERROR)<<"\nERROR! onError:"<<socket.peerAddress().toString();
}
void BFSTcpServiceHandler::onIdle(
const Poco::AutoPtr<Poco::Net::IdleNotification>& pNf) {
LOG(ERROR)<<"\nIDLE! onIdle:"<<socket.peerAddress().toString();
}
The code works fine; however, after a while it gets stuck meaning that the server does accepts connections but onReadable is not called at all anymore. For example, after it gets stuck I can telnet to the server but when I send data onReadable is not fired. Using netstat I realized some data are being kept in the RCV_QEUEUE and reactor does not fire onReadable event.
I thought it's due to hitting connection/file limits of systems but it is not actually many connections open when the system gets stuck.
Any comment or help is appreciated.
Thanks,
The problem was using a faulty NIC/driver. I changed the code to regular POSIX sockets and had the same issue and switching the NIC solved the issue. I am not sure if it was a driver or hardware issue.
Related
I have been using this example from boost for a single threaded server: https://www.boost.org/doc/libs/1_81_0/doc/html/boost_asio/example/cpp03/http/server/
I modified the server class so that I could start the server, end the server and restart it.
server_impl::server_impl(
const std::string& address,
const std::string& port,
const std::shared_ptr<boost::asio::io_context>& io_context,
const std::shared_ptr<connection_manager>& connection_manager,
const std::shared_ptr<request_handler>& request_handler) :
io_context_(io_context),
acceptor_(*io_context_),
connection_manager_(connection_manager),
new_connection_(),
request_handler_(request_handler),
address_(address),
port_(port)
{
}
void server_impl::start_server()
{
// Open the acceptor with the option to reuse the address (i.e. SO_REUSEADDR).
boost::asio::ip::tcp::resolver resolver(*io_context_);
boost::asio::ip::tcp::endpoint endpoint =
*resolver.resolve(address_, port_).begin();
acceptor_.open(endpoint.protocol());
acceptor_.set_option(boost::asio::ip::tcp::acceptor::reuse_address(true));
acceptor_.bind(endpoint);
acceptor_.listen();
start_accept();
}
void server_impl::end_server()
{
/* The server is stopped by cancelling all outstanding asynchronous
operations */
acceptor_.close();
connection_manager_->stop_all();
}
void server_impl::start_accept()
{
new_connection_.reset(new connection_impl(
io_context_,
connection_manager_,
request_handler_));
acceptor_.async_accept(
new_connection_->socket(),
boost::bind(
&server_impl::handle_accept,
this,
boost::asio::placeholders::error));
}
void server_impl::handle_accept(const boost::system::error_code& e)
{
/* Check whether the server was stopped before this completion handler had a
chance to run. */
if (!acceptor_.is_open())
{
return;
}
if (!e)
{
connection_manager_->start(new_connection_);
}
start_accept();
}
void server_impl::serve_static(const std::string& path)
{
request_handler_->serve_from_directory(path);
}
You can see I moved the resolving and listening from the constructor to a start_server function. I then moved the contents of handle_stop to end_server.
This works if I just start the server and end it once like this (Using a thread to end after 30 seconds just to test):
int main()
{
server->serve_static("/path/to/html");
server->start_server();
std::thread t([&server](){sleep(30); server->end_server();});
io_context->run();
t.join();
}
But if I try to restart the server after it has been stopped, io_context->run(); returns immediately, I thought that it should block as async_accept was called on the acceptor again:
int main()
{
server->serve_static("/path/to/html");
server->start_server();
std::thread t([&server](){sleep(30); server->end_server();});
io_context->run();
t.join();
server->start_server();
io_context->run();
}
The server works for the first 30 seconds and then stops like expected, but then when started again, run returns immediately and it doesn't start again.
Per the boost documentation:
Subsequent calls to run(), run_one(), poll() or poll_one() will return immediately unless there is a prior call to restart().
I'm trying to setup a gRPC Async server which received a stream from the client (ServerAsyncReader usage). I'm a bit confusing with the process. I'm stuck on an assertion error.
Here it's my GreeterAsyncServer.
The assertion failed is at line GPR_ASSERT(ok); : ok is false when client sends colors (string)
void GreeterAsyncServer::HandleRpcs() {
// Spawn a new CallData instance to serve new clients.
new CallDataSendColors(&service_, cq_.get(), this);
void* tag; // uniquely identifies a request.
bool ok;
while (true) {
// Block waiting to read the next event from the completion queue. The
// event is uniquely identified by its tag, which in this case is the
// memory address of a CallData instance.
// The return value of Next should always be checked. This return value
// tells us whether there is any kind of event or cq_ is shutting down.
GPR_ASSERT(cq_->Next(&tag, &ok));
GPR_ASSERT(ok);
static_cast<CallData*>(tag)->Proceed();
}
}
void GreeterAsyncServer::Run(std::string server_address) {
ServerBuilder builder;
// Listen on the given address without any authentication mechanism.
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
// Register "service" as the instance through which we'll communicate with
// clients. In this case it corresponds to an *asynchronous* service.
builder.RegisterService(&service_);
// Get hold of the completion queue used for the asynchronous communication
// with the gRPC runtime.
cq_ = builder.AddCompletionQueue();
// Finally assemble the server.
server_ = builder.BuildAndStart();
std::cout << "Server listening on " << server_address << std::endl;
// Proceed to the server's main loop.
HandleRpcs();
}
Here it's my .proto file
// .proto file
// The greeting service definition.
service Greeter {
// Sends several colors to the server and receives the replies
rpc SendColors (stream ColorRequest) returns (HelloReply) {}
}
// The response message containing the greetings
message HelloReply {
string message = 1;
}
// The request message containing the color
message ColorRequest {
string color = 1;
}
Here it's my CallData class:
#pragma once
#include "CallDataT.h"
using grpc::ServerAsyncReader;
using helloworld::HelloReply;
using helloworld::ColorRequest;
class CallDataSendColors {
protected:
enum CallStatus { CREATE, PROCESS, FINISH };
CallStatus status_;
Greeter::AsyncService* service_;
ServerCompletionQueue* completionQueue_;
ServerContext serverContext_;
// What we get from the client.
ColorRequest request_;
// What we send back to the client.
HelloReply reply_;
// The means to get back to the client.
ServerAsyncReader<HelloReply, ColorRequest> reader_;
virtual void AddNextToCompletionQueue() override {
new CallDataSendColors(service_, completionQueue_, observer_);
}
virtual void WaitForRequest() override {
service_->RequestSendColors(&serverContext_, &reader_, completionQueue_, completionQueue_, this);
}
virtual void HandleRequest() override {
reader_.Read(&request_, this);
}
virtual void Proceed() override {
if (status_ == CREATE) {
status_ = PROCESS;
WaitForRequest();
}
else if (status_ == PROCESS) {
AddNextToCompletionQueue();
HandleRequest();
status_ = FINISH;
reply_.set_message(std::string("Color ") + std::string("received"));
reader_.Finish(reply_, grpc::Status::OK, this);
}
else {
// We're done! Self-destruct!
if (status_ != FINISH) {
// Log some error message
}
delete this;
}
}
public:
CallDataSendColors(Greeter::AsyncService* service, ServerCompletionQueue* completionQueue, IObserver* observer = nullptr) :
status_(CREATE),
service_(service),
completionQueue_(completionQueue),
reader_(&serverContext_) {
Proceed();
}
};
As you can see in the HandleRequest() method, the reader_.Read(&request_, this) will be called only one time. I don't know how to call it as long as there are messages commin in.
I will appreciate if someone can help me.
Thank you in advance.
I've found the solution. The method GreeterAsyncServer::HandleRpcs() must be edited link this:
while (true) {
bool gotEvent = cq_->Next(&tag, &ok);
if (false == gotEvent || true == isShuttingDown_) {
break;
}
//GPR_ASSERT(ok);
if (true == ok) {
static_cast<CallData*>(tag)->Proceed();
}
}
I am following ASIO's async_tcp_echo_server.cpp example to write a server.
My server logic looks like this (.cpp part):
1.Server startup:
bool Server::Start()
{
mServerThread = std::thread(&Server::ServerThreadFunc, this, std::ref(ios));
//ios is asio::io_service
}
2.Init acceptor and listen for incoming connection:
void Server::ServerThreadFunc(io_service& service)
{
tcp::endpoint endp{ address::from_string(LOCAL_HOST),MY_PORT };
mAcceptor = acceptor_ptr(new tcp::acceptor{ service,endp });
// Add a job to start accepting connections.
StartAccept(*mAcceptor);
// Process event loop.Hang here till service terminated
service.run();
std::cout << "Server thread exiting." << std::endl;
}
3.Accept a connection and start reading from the client:
void Server::StartAccept(tcp::acceptor& acceptor)
{
acceptor.async_accept([&](std::error_code err, tcp::socket socket)
{
if (!err)
{
std::make_shared<Connection>(std::move(socket))->StartRead(mCounter);
StartAccept(acceptor);
}
else
{
std::cerr << "Error:" << "Failed to accept new connection" << err.message() << std::endl;
return;
}
});
}
void Connection::StartRead(uint32_t frameIndex)
{
asio::async_read(mSocket, asio::buffer(&mHeader, sizeof(XHeader)), std::bind(&Connection::ReadHandler, shared_from_this(), std::placeholders::_1, std::placeholders::_2, frameIndex));
}
So the Connection instance finally triggers ReadHandler callback where I perform actual read and write:
void Connection::ReadHandler(const asio::error_code& error, size_t bytes_transfered, uint32_t frameIndex)
{
if (bytes_transfered == sizeof(XHeader))
{
uint32_t reply;
if (mHeader.code == 12345)
{
reply = (uint32_t)12121;
size_t len = asio::write(mSocket, asio::buffer(&reply, sizeof(uint32_t)));
}
else
{
reply = (uint32_t)0;
size_t len = asio::write(mSocket, asio::buffer(&reply, sizeof(uint32_t)));
this->mSocket.shutdown(tcp::socket::shutdown_both);
return;
}
}
while (mSocket.is_open())
{
XPacket packet;
packet.dataSize = rt->buff.size();
packet.data = rt->buff.data();
std::vector<asio::const_buffer> buffers;
buffers.push_back(asio::buffer(&packet.dataSize,sizeof(uint64_t)));
buffers.push_back(asio::buffer(packet.data, packet.dataSize));
auto self(shared_from_this());
asio::async_write(mSocket, buffers,
[this, self](const asio::error_code error, size_t bytes_transfered)
{
if (error)
{
ERROR(200, "Error sending packet");
ERROR(200, error.message().c_str());
}
}
);
}
}
Now, here is the problem. The server receives data from the client and sends ,using sync asio::write, fine. But when it comes to to asio::async_read or asio::async_write inside the while loop, the method's lambda callback never gets triggered, unless I put io_context().run_one(); immediately after that. I don't understand why I see this behaviour. I do call io_service.run() right after acceptor init, so it blocks there till the server exit. The only difference of my code from the asio example, as far as I can tell, is that I run my logic from a custom thread.
Your callback isn't returning, preventing the event loop from executing other handlers.
In general, if you want an asynchronous flow, you would chain callbacks e.g. callback checks is_open(), and if true calls async_write() with itself as the callback.
In either case, the callback returns.
This allows the event loop to run, calling your callback, and so on.
In short, you should make sure your asynchronous callbacks always return in a reasonable time frame.
I did my best to follow the instructions in the ZMQ termination whitepaper, but so far I'm failing miserably.
I have a parent class, which spawns a listener thread (using win32-pthreads).
Accoring to the whitepaper, when terminating, I should set the _stopped flag, delete the context, which in turn would call zmq_term() and release the blocking recv(). Instead, what I get is either:
calling delete _zmqContext crashes the application (probably with a segmentation fault)
replacing the delete with zmq_term(_zmqContext) does not release the blocking recv()
I'm adding a partial code sample, which is long because I'm not sure which part may be important.
AsyncZmqListener.hpp:
class AsyncZmqListener
{
public:
AsyncZmqListener(const std::string uri);
~AsyncZmqListener();
bool Start();
void Stop();
private:
static void* _threadEntryFunc(void* _this);
void _messageLoop();
private:
bool _stopped;
pthread_t _thread;
zmq::context_t* _zmqContext;
};
AsyncZmqListener.cpp:
AsyncZmqListener::AsyncZmqListener(const std::string uri) : _uri(uri)
{
_zmqContext = new zmq::context_t(1);
_stopped = false;
}
void AsyncZmqListener::Start()
{
int status = pthread_create(&_thread, NULL, _threadEntryFunc, this);
}
void AsyncZmqListener::Stop()
{
_stopped = true;
delete _zmqContext; // <-- Crashes the application. Changing to 'zmq_term(_zmqContext)' does not terminate recv()
pthread_join(_thread, NULL); // <-- This waits forever
}
void AsyncZmqListener::_messageLoop()
{
zmq::socket_t listener(*_zmqContext, ZMQ_PULL);
listener.bind(_uri.c_str());
zmq::message_t message;
while(!_stopped)
{
listener.recv(&message); // <-- blocks forever
process(message);
}
}
P.S.
I'm aware of this related question, but none of the answers quite match the clean exit flow described in the whitepaper. I will resolve to polling if I have to...
ZMQ recv() did unblock after its related context was terminated
I was not aware that recv() throws an ETERM exception when this happens.
Revised code that works:
void AsyncZmqListener::_messageLoop()
{
zmq::socket_t listener(*_zmqContext, ZMQ_PULL);
listener.bind(_uri.c_str());
zmq::message_t message;
while(!_stopped)
{
try
{
listener.recv(&message);
process(message);
}
catch(const zmq::error_t& ex)
{
// recv() throws ETERM when the zmq context is destroyed,
// as when AsyncZmqListener::Stop() is called
if(ex.num() != ETERM)
throw;
}
}
}
So I'm starting to do some research on alternatives for implementing a high volume client/server system, and I'm currently looking at Poco's Reactor framework since I'm using Poco for so much of my application frameworks now.
The incoming packet sizes are going to be pretty small, so I think it will work fine from the perspective of reading the data from the clients. But the operations that will be performed based on the client input will be relatively expensive and may need to be offloaded to another process or even another server. And the responses sent back to the client will sometimes be fairly large. So obviously I can't block the reactor thread while that is taking place.
So I'm thinking if I just read the data in the reactor event handler and then pass it to another thread(pool) that processes the data, it would work out better.
What I'm not too sure about is the process for sending the responses back to the client when the operations are complete.
I can't find too much information about the best ways to use the framework. But I've done some testing and it looks like the reactor will fire the WritableNotification event repeatedly while the socket is writable. So would the optimal process be to queue up the data that needs to be sent in the object that receives the WritableNotification events and send small chunks each time the event is received?
Update: So when I started testing this I was horrified to discover that server CPU usage went up to 100% on the CPU the server app was running on with a single connection. But after some digging I found what I was doing wrong. I discovered that I don't need to register for WritableNotification events when the service handler is created, I only need to register when I have data to send. Then once all of the data is sent, I should unregister the event handler. This way the reactor doesn't have to keep calling the event handlers over and over when there is nothing to send. Now my CPU usage stays close to 0 even with 100 connections. Whew!
i have wrote a class ServerConnector that copied from SocketConnector, but do not call the connect for socket, because the socket was connected already, if a reactor was started with a ServiceHandler for notifications in the run() function of TcpServerConnection, the class TcpServer would start a new thread. so, i got multithread of reactor-partten, but i do not konw it's best way or not.
class ServerConnector
template <class ServiceHandler>
class ServerConnector
{
public:
explicit ServerConnector(StreamSocket& ss):
_pReactor(0),
_socket(ss)
/// Creates a ServerConnector, using the given Socket.
{
}
ServerConnector(StreamSocket& ss, SocketReactor& reactor):
_pReactor(0),
_socket(ss)
/// Creates an acceptor, using the given ServerSocket.
/// The ServerConnector registers itself with the given SocketReactor.
{
registerConnector(reactor);
onConnect();
}
virtual ~ServerConnector()
/// Destroys the ServerConnector.
{
unregisterConnector();
}
//
// this part is same with SocketConnector
//
private:
ServerConnector();
ServerConnector(const ServerConnector&);
ServerConnector& operator = (const ServerConnector&);
StreamSocket& _socket;
SocketReactor* _pReactor;
};
the Echo-Service is a common ServiceHander
class EchoServiceHandler
{
public:
EchoServiceHandler(StreamSocket& socket, SocketReactor& reactor):
_socket(socket),
_reactor(reactor)
{
_reactor.addEventHandler(_socket, Observer<EchoServiceHandler, ReadableNotification>(*this, &EchoServiceHandler::onReadable));
_reactor.addEventHandler(_socket, Observer<EchoServiceHandler, ErrorNotification>(*this, &EchoServiceHandler::onError));
}
~EchoServiceHandler()
{
_reactor.removeEventHandler(_socket, Observer<EchoServiceHandler, ErrorNotification>(*this, &EchoServiceHandler::onError));
_reactor.removeEventHandler(_socket, Observer<EchoServiceHandler, ReadableNotification>(*this, &EchoServiceHandler::onReadable));
}
void onReadable(ReadableNotification* pNf)
{
pNf->release();
char buffer[4096];
try {
int n = _socket.receiveBytes(buffer, sizeof(buffer));
if (n > 0)
{
_socket.sendBytes(buffer, n);
} else
onError();
} catch( ... ) {
onError();
}
}
void onError(ErrorNotification* pNf)
{
pNf->release();
onError();
}
void onError()
{
_socket.shutdown();
_socket.close();
_reactor.stop();
delete this;
}
private:
StreamSocket _socket;
SocketReactor& _reactor;
};
The EchoReactorConnection works with class TcpServer to run reactor as a thread
class EchoReactorConnection: public TCPServerConnection
{
public:
EchoReactorConnection(const StreamSocket& s): TCPServerConnection(s)
{
}
void run()
{
StreamSocket& ss = socket();
SocketReactor reactor;
ServerConnector<EchoServiceHandler> sc(ss, reactor);
reactor.run();
std::cout << "exit EchoReactorConnection thread" << std::endl;
}
};
cppunit test case is same with TCPServerTest::testMultiConnections, but using EchoReactorConnection for multi-thread.
void TCPServerTest::testMultithreadReactor()
{
ServerSocket svs(0);
TCPServerParams* pParams = new TCPServerParams;
pParams->setMaxThreads(4);
pParams->setMaxQueued(4);
pParams->setThreadIdleTime(100);
TCPServer srv(new TCPServerConnectionFactoryImpl<EchoReactorConnection>(), svs, pParams);
srv.start();
assert (srv.currentConnections() == 0);
assert (srv.currentThreads() == 0);
assert (srv.queuedConnections() == 0);
assert (srv.totalConnections() == 0);
//
// same with TCPServerTest::testMultiConnections()
//
// ....
///
}