macOS: DNSSD crash after calling fd_set on its socket - c++

I have the following code, which crashes my program, upon calling FD_SET.
void handleEvents(DNSServiceRef service, const int32_t timeout)
{
if (!service)
return;
const int fd = DNSServiceRefSockFD( service );
const int nfds = fd + 1;
if (fd < 0)
return;
int32_t result = servus::Result::PENDING;
while(result == servus::Result::PENDING)
{
fd_set fdSet;
FD_ZERO( &fdSet );
FD_SET( fd, &fdSet ); /// < The crash occurs here
const int result = ::select( nfds, &fdSet, 0, 0, 0);
switch (result)
{
case 0: // timeout
return;
case -1: // error
std::cerr << "Select error: " << strerror( errno ) << " (" << errno
<< ")" << std::endl;
if( errno != EINTR )
{
withdraw();
return;
}
break;
default:
if(FD_ISSET( fd, &fdSet ))
{
const auto error = DNSServiceProcessResult(service);
if(error != kDNSServiceErr_NoError)
{
std::cerr << "DNSServiceProcessResult error: " << error << std::endl;
withdraw();
return;
}
}
break;
}
}
}
The crash log is :
Exception Type: EXC_GUARD
Exception Codes: 0x6000000000000012, 0x0000000000000002
Exception Note: EXC_CORPSE_NOTIFY
Termination Reason: LIBSYSTEM, [0x2]
External Modification Warnings:
Debugger attached to process.
Thread 0 Crashed:
0 libsystem_kernel.dylib 0x00007fff6d62d96e os_fault_with_payload + 10
1 libsystem_kernel.dylib 0x00007fff6d62e451 __darwin_check_fd_set_overflow.cold.2 + 31
2 libsystem_kernel.dylib 0x00007fff6d61967c __darwin_check_fd_set_overflow + 68
3 score 0x00000001004f097e handleEvents(_DNSServiceRef_t*, int) + 302
I really don't understand where are things going wrong - the "critical chain" that leads to the crash seems to be
const int fd = DNSServiceRefSockFD( service );
const int nfds = fd + 1;
int result = 0;
fd_set fdSet;
FD_ZERO( &fdSet );
FD_SET( fd, &fdSet ); /// < The crash occurs here
result = ::select( nfds, &fdSet, 0, 0, 0);
fd_set fdSet;
FD_ZERO( &fdSet );
FD_SET( fd, &fdSet ); /// < or here

select() now returns with errno set to EINVAL when nfds is greater than FD_SETSIZE. Use a smaller value for nfds or compile with -D_DARWIN_UNLIMITED_SELECT.

The actual issue in this case was that I was having too many file descriptors open, which can be solved by increasing the rlimit with the following code:
void setup_min_fd(int min_fds)
{
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) != 0)
return;
if (rlim.rlim_cur > rlim_t(min_fds))
return;
rlim.rlim_cur = rlim.min_fds;
setrlimit(RLIMIT_NOFILE, &rlim);
}

Related

iocp socket close_wait status, how to fix it?

It is not easy to write a iocp console server,socket pool and thread pool works well,but after some times leater, the server can not connect again,though nothing wrong happens, why? I use procexp_16.05.1446001339.exe to check the process properties, I found lots of close_wait status, after some times again, close_wait status disappears, but the server still can not connect.Why is that? And how to fix it ?
#include "stdafx.h"
#include "Winsock2.h"
#include "Windows.h"
#include "Winbase.h"
#include "tlhelp32.h"
#include "tchar.h"
#include "Psapi.h"
#include "Winternl.h"
#include "Shlwapi.h"
#include "mstcpip.h"
#include
#include "ws2tcpip.h"
#include "time.h"
#pragma comment( lib, "Kernel32.lib" )
#pragma comment( lib, "Shlwapi.lib" )
#pragma comment( lib, "Psapi.lib" )
#pragma comment( lib, "Winmm.lib" )
#pragma comment( lib, "Ws2_32.lib" )
#define DATA_BUFSIZE 10240
#define OP_ACCEPT 1
#define OP_RECV 2
#define OP_SEND 3
#define OP_DIS 4
#define OP_ONACCEPT 5
//iocp struct
struct iocp_overlapped{
OVERLAPPED m_ol; //
int m_iOpType; //do type
SOCKET m_skServer; //server socket
SOCKET m_skClient; //client
DWORD m_recvBytes; //recv msg bytes
char m_pBuf[DATA_BUFSIZE]; //recv buf
WSABUF m_DataBuf; //recv data buf
int m_recv_timeout; //recv timeout
int m_send_timeout;
SOCKADDR_IN m_addrClient; //client address
SOCKADDR_IN m_addrServer; //server address
int m_isUsed; //client is active 1 yes 0 not
time_t m_active; //the last active time
int m_isCrashed; //is crashed? 0 not 1 yes
int m_online; //is online 1 yes 0 not
int m_usenum; //
//void (*handler)(int,struct tag_socket_data*); data->handler(res, data);
};
static SOCKET m_sock_listen = INVALID_SOCKET; //the server listen socket
class WingIOCP{
private:
char* m_listen_ip; //listen ip
int m_port; //listen port
int m_max_connect; //max connection
int m_recv_timeout; //recv timeout
int m_send_timeout; //send timeout
unsigned long* m_povs; //clients
//iocp worker
static VOID CALLBACK worker(
DWORD dwErrorCode,
DWORD dwBytesTrans,
LPOVERLAPPED lpOverlapped
);
//accept ex
static BOOL accept(
SOCKET sAcceptSocket,
PVOID lpOutputBuffer,
DWORD dwReceiveDataLength,
DWORD dwLocalAddressLength,
DWORD dwRemoteAddressLength,
LPDWORD lpdwBytesReceived,
LPOVERLAPPED lpOverlapped
);
//disconnect a client socket and reuse it
static BOOL disconnect( SOCKET client_socket , LPOVERLAPPED lpOverlapped , DWORD dwFlags = TF_REUSE_SOCKET , DWORD reserved = 0);
//event callbacks
static void onconnect( iocp_overlapped *&povl );
static void ondisconnect( iocp_overlapped *&povl );
static void onclose( iocp_overlapped *&povl );
static void onrecv( iocp_overlapped *&povl );
static void onsend( iocp_overlapped *&povl );
static void onrun( iocp_overlapped *&povl, DWORD errorcode, int last_error );
static void onaccept(iocp_overlapped *&pOL);
public:
WingIOCP(
const char* listen = "0.0.0.0",
const int port = 6998,
const int max_connect = 10,
const int recv_timeout = 3000,
const int send_timeout = 3000
);
~WingIOCP();
BOOL start();
void wait();
};
/**
* # construct
*/
WingIOCP::WingIOCP(
const char* listen, //listen ip
const int port, //listen port
const int max_connect, //max connect
const int recv_timeout,//recv timeout in milliseconds
const int send_timeout //send timeout in milliseconds
)
{
this->m_listen_ip = _strdup(listen); //listen ip
this->m_port = port; //listen port
this->m_max_connect = max_connect; //max connect
this->m_recv_timeout = recv_timeout; //recv timeout
this->m_send_timeout = send_timeout; //send timeout
this->m_povs = new unsigned long[max_connect];//clients
}
/**
* # destruct
*/
WingIOCP::~WingIOCP(){
if( this->m_listen_ip )
{
free(this->m_listen_ip );
this->m_listen_ip = NULL;
}
if( this->m_povs )
{
delete[] this->m_povs;
this->m_povs = NULL;
}
if( m_sock_listen != INVALID_SOCKET )
{
closesocket( m_sock_listen );
m_sock_listen = INVALID_SOCKET;
}
WSACleanup();
}
/**
*#wait
*/
void WingIOCP::wait(){
while( true ){
Sleep(10);
}
}
//event callbacks
void WingIOCP::onconnect( iocp_overlapped *&pOL ){
printf("%ld onconnect\r\n",pOL->m_skClient);
pOL->m_online = 1;
pOL->m_active = time(NULL);
if( setsockopt( pOL->m_skClient, SOL_SOCKET,SO_UPDATE_ACCEPT_CONTEXT,(const char *)&pOL->m_skServer,sizeof(pOL->m_skServer) ) != 0 )
{
//setsockopt fail
//printf("1=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
WSASetLastError(0);
return;
}
// set send timeout
if( pOL->m_send_timeout > 0 )
{
if( setsockopt( pOL->m_skClient, SOL_SOCKET,SO_SNDTIMEO, (const char*)&pOL->m_send_timeout,sizeof(pOL->m_send_timeout)) !=0 )
{
//setsockopt fail
// printf("2=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
}
if( pOL->m_recv_timeout > 0 )
{
if( setsockopt( pOL->m_skClient, SOL_SOCKET,SO_RCVTIMEO, (const char*)&pOL->m_recv_timeout,sizeof(pOL->m_recv_timeout)) != 0 )
{
//setsockopt fail
// printf("3=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
}
linger so_linger;
so_linger.l_onoff = TRUE;
so_linger.l_linger = 0; // without close wait status
if( setsockopt( pOL->m_skClient,SOL_SOCKET,SO_LINGER,(const char*)&so_linger,sizeof(so_linger) ) != 0 ){
// printf("31=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
//get client ip and port
int client_size = sizeof(pOL->m_addrClient);
ZeroMemory( &pOL->m_addrClient , sizeof(pOL->m_addrClient) );
if( getpeername( pOL->m_skClient , (SOCKADDR *)&pOL->m_addrClient , &client_size ) != 0 )
{
//getpeername fail
// printf("4=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
// printf("%s %d connect\r\n",inet_ntoa(pOL->m_addrClient.sin_addr), pOL->m_addrClient.sin_port);
//keepalive open
int dt = 1;
DWORD dw = 0;
tcp_keepalive live ;
live.keepaliveinterval = 5000; //连接之后 多长时间发现无活动 开始发送心跳吧 单位为毫秒
live.keepalivetime = 1000; //多长时间发送一次心跳包 1分钟是 60000 以此类推
live.onoff = TRUE; //是否开启 keepalive
if( setsockopt( pOL->m_skClient, SOL_SOCKET, SO_KEEPALIVE, (char *)&dt, sizeof(dt) ) != 0 )
{
//setsockopt fail
// printf("5=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
if( WSAIoctl( pOL->m_skClient, SIO_KEEPALIVE_VALS, &live, sizeof(live), NULL, 0, &dw, &pOL->m_ol , NULL ) != 0 )
{
//WSAIoctl error
// printf("6=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
}
memset(pOL->m_pBuf,0,DATA_BUFSIZE);
//post recv
pOL->m_DataBuf.buf = pOL->m_pBuf;
pOL->m_DataBuf.len = DATA_BUFSIZE;
pOL->m_iOpType = OP_RECV;
DWORD RecvBytes = 0;
DWORD Flags = 0;
int code = WSARecv(pOL->m_skClient,&(pOL->m_DataBuf),1,&RecvBytes,&Flags,&(pOL->m_ol),NULL);
int error_code = WSAGetLastError();
if( 0 != code )
{
if( WSA_IO_PENDING != error_code )
{
// printf("7=>onconnect some error happened , error code %d \r\n", WSAGetLastError());
return;
}
}
else
{
//recv complete
onrecv( pOL );
}
}
void WingIOCP::ondisconnect( iocp_overlapped *&pOL ){
// printf("ondisconnect error %d\r\n",WSAGetLastError());
WSASetLastError(0);
pOL->m_online = 0; //set offline
pOL->m_active = time(NULL); //the last active time
pOL->m_iOpType = OP_ONACCEPT; //reset status
pOL->m_isUsed = 0; //
ZeroMemory(pOL->m_pBuf,sizeof(char)*DATA_BUFSIZE); //clear buf
if( !BindIoCompletionCallback( (HANDLE)pOL->m_skClient ,worker,0) ){
// printf("BindIoCompletionCallback error %ld\r\n",WSAGetLastError());
}
//post acceptex
int error_code = accept( pOL->m_skClient,pOL->m_pBuf,0,sizeof(SOCKADDR_IN)+16,sizeof(SOCKADDR_IN)+16,NULL, (LPOVERLAPPED)pOL );
//printf("accept error %d\r\n",WSAGetLastError());
int last_error = WSAGetLastError() ;
if( !error_code && ERROR_IO_PENDING != last_error ){
}
//printf("2=>ondisconnect some error happened , error code %d \r\n================================================\r\n\r\n", WSAGetLastError());
//printf("21=>ondisconnect some error happened , error code %d \r\n================================================\r\n\r\n", WSAGetLastError());
WSASetLastError(0);
}
void WingIOCP::onaccept(iocp_overlapped *&pOL){
pOL->m_active = time(NULL); //the last active time
pOL->m_iOpType = OP_ACCEPT; //reset status
printf("%ld reuse socket real complete , error code %d \r\n", pOL->m_skClient,WSAGetLastError());
WSASetLastError(0);
}
void WingIOCP::onclose( iocp_overlapped *&pOL ){
// printf("%ld close\r\n", pOL->m_skClient);
SOCKET m_sockListen = pOL->m_skServer;
SOCKET m_client = pOL->m_skClient;
int send_timeout = pOL->m_send_timeout;
int recv_timeout = pOL->m_recv_timeout;
pOL->m_iOpType = OP_DIS;
shutdown( pOL->m_skClient, SD_BOTH );
//socket reuse
if( !disconnect( pOL->m_skClient , &pOL->m_ol ) && WSA_IO_PENDING != WSAGetLastError()) {
// printf("1=>onclose some error happened , error code %d \r\n", WSAGetLastError());
}
//printf("onclose complete %d \r\n", WSAGetLastError());
}
void WingIOCP::onrecv( iocp_overlapped *&pOL ){
pOL->m_active = time(NULL);
// printf("recv:\r\n%s\r\n\r\n",pOL->m_pBuf);
ZeroMemory(pOL->m_pBuf,DATA_BUFSIZE);
}
void WingIOCP::onsend( iocp_overlapped *&povl ){
}
void WingIOCP::onrun( iocp_overlapped *&povl, DWORD errorcode, int last_error ){}
/**
* # acceptex
*/
BOOL WingIOCP::accept(
SOCKET sAcceptSocket,
PVOID lpOutputBuffer,
DWORD dwReceiveDataLength,
DWORD dwLocalAddressLength,
DWORD dwRemoteAddressLength,
LPDWORD lpdwBytesReceived,
LPOVERLAPPED lpOverlapped
)
{
WSASetLastError(0);
if( m_sock_listen == INVALID_SOCKET || !lpOverlapped )
{
return 0;
}
GUID guidAcceptEx = WSAID_ACCEPTEX;
DWORD dwBytes = 0;
LPFN_ACCEPTEX lpfnAcceptEx;
int res= WSAIoctl( m_sock_listen, SIO_GET_EXTENSION_FUNCTION_POINTER, &guidAcceptEx,
sizeof(guidAcceptEx), &lpfnAcceptEx, sizeof(lpfnAcceptEx), &dwBytes, NULL, NULL );
if( 0 != res )
{
return 0;
}
return lpfnAcceptEx( m_sock_listen, sAcceptSocket, lpOutputBuffer, dwReceiveDataLength,
dwLocalAddressLength, dwRemoteAddressLength, lpdwBytesReceived, lpOverlapped );
}
/**
* # disconnect socket and reuse the socket
*/
BOOL WingIOCP::disconnect( SOCKET client_socket , LPOVERLAPPED lpOverlapped , DWORD dwFlags , DWORD reserved )
{
WSASetLastError(0);
if( client_socket == INVALID_SOCKET || !lpOverlapped )
{
return 0;
}
GUID GuidDisconnectEx = WSAID_DISCONNECTEX;
DWORD dwBytes = 0;
LPFN_DISCONNECTEX lpfnDisconnectEx;
if( 0 != WSAIoctl( client_socket,SIO_GET_EXTENSION_FUNCTION_POINTER,&GuidDisconnectEx,
sizeof(GuidDisconnectEx),&lpfnDisconnectEx,sizeof(lpfnDisconnectEx),&dwBytes,NULL,NULL))
{
return 0;
}
return lpfnDisconnectEx(client_socket,lpOverlapped,/*TF_REUSE_SOCKET*/dwFlags,reserved);
}
/**
* # iocp worker thread
*/
VOID CALLBACK WingIOCP::worker( DWORD dwErrorCode,DWORD dwBytesTrans,LPOVERLAPPED lpOverlapped )
{
//why here get the error code 87 ?
//printf("worker error %d\r\n",WSAGetLastError());
if( NULL == lpOverlapped )
{
//not real complete
SleepEx(20,TRUE);//set warn status
WSASetLastError(0);
return;
}
//get overlapped data
iocp_overlapped* pOL = CONTAINING_RECORD(lpOverlapped, iocp_overlapped, m_ol);
//just a test
onrun( pOL, dwErrorCode, WSAGetLastError() );
switch( pOL->m_iOpType )
{
case OP_DIS:
ondisconnect(pOL);
break;
case OP_ONACCEPT:
onaccept(pOL);
break;
case OP_ACCEPT:
{
//new client connect
onconnect( pOL );
}
break;
case OP_RECV:
{
pOL->m_recvBytes = dwBytesTrans;
//check client offline
if( 0 == dwBytesTrans || WSAECONNRESET == WSAGetLastError() || ERROR_NETNAME_DELETED == WSAGetLastError()){
onclose( pOL );
}
else
{ //recv msg from client
pOL->m_recvBytes = dwBytesTrans;
onrecv( pOL );
}
}
break;
case OP_SEND:
{
}
break;
}
WSASetLastError(0);
}
BOOL WingIOCP::start(){
do{
WSADATA wsaData;
if( WSAStartup(MAKEWORD(2,2), &wsaData) != 0 )
{
return FALSE;
}
if(LOBYTE(wsaData.wVersion) != 2 || HIBYTE(wsaData.wVersion) != 2)
{
break;
}
m_sock_listen = WSASocket(AF_INET, SOCK_STREAM, 0, NULL, 0, WSA_FLAG_OVERLAPPED);
if( INVALID_SOCKET == m_sock_listen )
{
break;
}
//bind the worker thread
BOOL bReuse = TRUE;
BOOL bind_status = ::BindIoCompletionCallback((HANDLE)( m_sock_listen ), worker, 0 );
if( !bind_status )
{
break;
}
//set option SO_REUSEADDR
if( 0 != ::setsockopt( m_sock_listen, SOL_SOCKET, SO_REUSEADDR,(LPCSTR)&bReuse, sizeof(BOOL) ) )
{
//some error happened
break;
}
struct sockaddr_in ServerAddress;
ZeroMemory(&ServerAddress, sizeof(ServerAddress));
ServerAddress.sin_family = AF_INET;
ServerAddress.sin_addr.s_addr = inet_addr( this->m_listen_ip );
ServerAddress.sin_port = htons( this->m_port );
if ( SOCKET_ERROR == bind( m_sock_listen, (struct sockaddr *) &ServerAddress, sizeof( ServerAddress ) ) )
{
break;
}
if( 0 != listen( m_sock_listen , SOMAXCONN ) )
{
break;
}
//printf("1=>start get error %d\r\n",WSAGetLastError());
WSASetLastError(0);
//socket pool
for( int i = 0 ; i m_max_connect ; i++ )
{
SOCKET client = WSASocket(AF_INET,SOCK_STREAM,IPPROTO_TCP,0,0,WSA_FLAG_OVERLAPPED);
if( INVALID_SOCKET == client )
{
continue;
}
if( !BindIoCompletionCallback( (HANDLE)client ,worker,0) )
{
closesocket(client);
continue;
}
iocp_overlapped *povl = new iocp_overlapped();
if( NULL == povl )
{
closesocket(client);
continue;
}
DWORD dwBytes = 0;
ZeroMemory(povl,sizeof(iocp_overlapped));
povl->m_iOpType = OP_ACCEPT;
povl->m_skServer = m_sock_listen;
povl->m_skClient = client;
povl->m_recv_timeout = m_recv_timeout;
povl->m_isUsed = 0;
povl->m_active = 0;
povl->m_isCrashed = 0;
povl->m_online = 0;
povl->m_usenum = 1;
int server_size = sizeof(povl->m_addrServer);
ZeroMemory(&povl->m_addrServer,server_size);
getpeername(povl->m_skServer,(SOCKADDR *)&povl->m_addrServer,&server_size);
int error_code = accept( povl->m_skClient, povl->m_pBuf, 0, sizeof(SOCKADDR_IN)+16, sizeof(SOCKADDR_IN)+16, NULL, (LPOVERLAPPED)povl );
int last_error = WSAGetLastError() ;
if( !error_code && ERROR_IO_PENDING != last_error )
{
closesocket( client );
client = povl->m_skClient = INVALID_SOCKET;
delete povl;
povl = NULL;
//printf("client=>crate error %d\r\n",WSAGetLastError());
}else{
this->m_povs[i] = (unsigned long)povl;
}
//here all the last error is 997 , means nothing error happened
//printf("client=>start get error %d\r\n",WSAGetLastError());
WSASetLastError(0);
}
//printf("last start get error %d\r\n",WSAGetLastError());
WSASetLastError(0);
return TRUE;
} while( 0 );
if( m_sock_listen != INVALID_SOCKET )
{
closesocket( m_sock_listen );
m_sock_listen = INVALID_SOCKET;
}
WSACleanup();
return FALSE;
}
int _tmain(int argc, _TCHAR* argv[])
{
WingIOCP *iocp = new WingIOCP();
iocp->start();
iocp->wait();
delete iocp;
return 0;
}
The solution to any CLOSE_WAIT issue is to close the socket. Evidently you are leaking sockets at end of stream or on an error.
Correct setsockopt usage like MS sample.
[EDIT] After some search I find this issue about AcceptEx.
Good lock.

epoll multi process

I continue learn network programming using c/c++, and after that I have created multi process tcp server, I want to create simple http server, which return static resources, I use epoll so let me show my code
first of all I use fd passing for handle request in workers
so, my main function and head process
struct Descriptors{
int sv[2];
};
class Parent{
public:
static Parent& getInstance(){
static Parent instance;
return instance;
}
Parent(Parent const&) = delete;
void operator=(Parent const&) = delete;
void addFd(int fd){
m_fd.push_back(fd);
};
void run() {
startServer();
size_t index = 0;
while(true){
struct epoll_event Events[MAX_EVENTS];
int N = epoll_wait(m_epoll, Events, MAX_EVENTS, -1);
for (size_t i =0; i < N; ++i){
if (Events[i].events & EPOLLHUP){
epoll_ctl(m_epoll, EPOLL_CTL_DEL, Events[i].data.fd, &(Events[i]));
shutdown(Events[i].data.fd,SHUT_RDWR);
close(Events[i].data.fd);
continue;
}else {
if (Events[i].data.fd == m_masterSocket) {
handleConnection();
}else {
char * arg = "1";
ssize_t size = sock_fd_write(m_fd[index], arg, 1,Events[i].data.fd);
index = (1+index) % m_fd.size();
}
}
}
}
}
private:
Parent(){
m_numCpu = sysconf(_SC_NPROCESSORS_ONLN);
}
void startServer(){
m_masterSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
struct sockaddr_in SockAddr;
SockAddr.sin_family = AF_INET;
SockAddr.sin_port = htons(11141);
SockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
bind(m_masterSocket, (struct sockaddr *)(&SockAddr), sizeof(SockAddr));
set_nonblock(m_masterSocket);
listen(m_masterSocket, SOMAXCONN);
m_epoll = epoll_create1(0);
struct epoll_event Event;
Event.data.fd = m_masterSocket;
Event.events = EPOLLIN | EPOLLRDHUP;
epoll_ctl(m_epoll, EPOLL_CTL_ADD, m_masterSocket, &Event);
}
void handleConnection(){
int SlaveSocket = accept(m_masterSocket, 0, 0);
set_nonblock(SlaveSocket);
struct epoll_event Event;
Event.data.fd = SlaveSocket;
Event.events = EPOLLIN | EPOLLRDHUP;
epoll_ctl(m_epoll, EPOLL_CTL_ADD, SlaveSocket, &Event);
}
int m_epoll;
int m_masterSocket;
int m_numCpu;
std::vector<int> m_fd;
};
void parent(int sock){
Parent::getInstance().addFd(sock);
}
int main(int argc, char **argv){
int numCpu = sysconf(_SC_NPROCESSORS_ONLN);
std::vector<Descriptors> desc;
desc.resize(numCpu);
bool isParent = true;
for (int i = 0; i < numCpu && isParent; ++i){
std::cout << "pid my is = " << getpid() <<std::endl;
int sv[2];
if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) < 0) {
perror("socketpair");
exit(1);
}
pid_t forkId = fork();
switch (forkId){
case 0:{
isParent = false;
close(sv[0]);
child(sv[1]);
break;
}
case -1:
perror("fork");
exit(1);
default:
close(sv[1]);
parent(sv[0]);
break;
}
}
if (isParent){
Parent::getInstance().run();
int status;
waitpid(-1, &status, 0);
}
}
And my worker process is
void respond(int fd)
{
char mesg[99999], *reqline[3], data_to_send[BYTES], path[99999];
int rcvd, fileDesc, bytes_read;
memset( (void*)mesg, (int)'\0', 99999 );
const char *ROOT = "/home/web_server/";
int RecvResult = recv(fd,mesg, 99999, MSG_NOSIGNAL);
if (RecvResult == 0 && errno != EAGAIN){
shutdown(fd,SHUT_RDWR);
close(fd);
}else if (RecvResult >0){
printf("%s", mesg);
reqline[0] = strtok (mesg, " \t\n"); // split on lexemes
if ( strncmp(reqline[0], "GET\0", 4)==0 ) // if first 4 character equal
{
reqline[1] = strtok (NULL, " \t");
reqline[2] = strtok (NULL, " \t\n");
std::cout << "reqline 1 " << reqline[1] << std::endl;
std::cout << "reqline 2 " << reqline[2] << std::endl;
if ( strncmp( reqline[2], "HTTP/1.0", 8)!=0
&& strncmp(reqline[2], "HTTP/1.1", 8 ) !=0 )
{
write(fd, "HTTP/1.0 400 Bad Request\n", 25);
}
else
{
if ( strncmp(reqline[1], "/\0", 2)==0 )
reqline[1] = "/index.html";
strcpy(path, ROOT);
strcpy(&path[strlen(ROOT)], reqline[1]);
printf("file: %s\n", path);
if ( (fileDesc=open(path, O_RDONLY))!=-1 )
{
send(fd, "HTTP/1.0 200 OK\n\n", 17, 0);
while ( (bytes_read=read(fileDesc, data_to_send, BYTES))>0 )
write (fd, data_to_send, bytes_read);
}
else write(fd, "HTTP/1.0 404 Not Found\n", 23);
}
}
}
shutdown(fd,SHUT_RDWR);
close(fd);
}
void child(int sock)
{
int fd;
char buf[16];
ssize_t size;
sleep(1);
for (;;) {
size = sock_fd_read(sock, buf, sizeof(buf), &fd);
if (size <= 0)
break;
if (fd != -1) {
respond(fd);
}
}
printf("child processes is end\n");
}
And when I go in browser http://127.0.0.1:11141/ it is ok, and I get index.html, but when I run in apache benchmark, as
ab -n 10 -c 10 http://127.0.0.1:11141/
I get answer as
This is ApacheBench, Version 2.3
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/
Benchmarking 127.0.0.1 (be patient)...apr_socket_recv: Connection reset by peer (104)
Total of 2 requests completed
I don't understand where is my error, because I I think that my server in theory(because using epoll ) have to resolved C10K problem. but on the practice, my server can not resolved 10 connection. Could you help me please?
Thank you for useful links and any advices!
UPDATE
When I run as
strace -f ./server 2> error.txt
in end of error.txt
[pid 6552] write(6, 0x7ffdbff00390, 757) = -1 EPIPE (Broken pipe)
[pid 6552] --- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=6552, si_uid=1000} ---
[pid 6552] +++ killed by SIGPIPE +++
write(1, 0x7fc5ffbe3000, 83) = 83
write(1, 0x7fc5ffbe3000, 12) = 12
write(1, 0x7fc5ffbe3000, 20) = 20
write(1, 0x7fc5ffbe3000, 41) = 41
open(0x7ffdbff18e30, O_RDONLY) = 11
sendto(10, 0x403df9, 17, 0, NULL, 0) = 17
read(11, 0x7ffdbff00390, 1024) = 757
write(10, 0x7ffdbff00390, 757) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=6554, si_uid=1000} ---
+++ killed by SIGPIPE +++
So I think that problem in EPipe error, But I don't understand why...
Update
So I think that problem in close descriptor, but I don't understand how to fix it. Thank you for useful advices.
UPDATE
I Get error on function send in worker process
Seems I found my error, right version function void :
void respond(int fd)
{
char mesg[99999], *reqline[3], data_to_send[BYTES], path[99999];
int rcvd, fileDesc, bytes_read;
memset( (void*)mesg, (int)'\0', 99999 );
const char *ROOT = "/home/web_server/";
int RecvResult = recv(fd,mesg, 99999, MSG_NOSIGNAL);
//EAGAIN - "there is no data available right now, try again later
if (RecvResult == 0 && errno != EAGAIN){
shutdown(fd,SHUT_RDWR);
close(fd);
std::cout << "error recv" << std::endl;
return;
}else if (RecvResult >0){
printf("%s", mesg);
reqline[0] = strtok (mesg, " \t\n"); // split on lexemes
if ( strncmp(reqline[0], "GET\0", 4)==0 ) // if first 4 character equal
{
reqline[1] = strtok (NULL, " \t");
reqline[2] = strtok (NULL, " \t\n");
std::cout << "reqline 1 " << reqline[1] << std::endl;
std::cout << "reqline 2 " << reqline[2] << std::endl;
if ( strncmp( reqline[2], "HTTP/1.0", 8)!=0
&& strncmp(reqline[2], "HTTP/1.1", 8 ) !=0 )
{
send(fd, "HTTP/1.0 400 Bad Request\n", 25 , MSG_NOSIGNAL);
}
else
{
if ( strncmp(reqline[1], "/\0", 2)==0 )
reqline[1] = "/index.html";
strcpy(path, ROOT);
strcpy(&path[strlen(ROOT)], reqline[1]);
printf("file: %s\n", path);
if ( (fileDesc=open(path, O_RDONLY))!=-1 )
{
send(fd, "HTTP/1.0 200 OK\n\n", 17, MSG_NOSIGNAL);
while ( (bytes_read=read(fileDesc, data_to_send, BYTES))>0 )
{
if (bytes_read != -1)
send (fd, data_to_send, bytes_read, MSG_NOSIGNAL);
}
}
else send(fd, "HTTP/1.0 404 Not Found\n", 23, MSG_NOSIGNAL);
}
shutdown(fd,SHUT_RDWR);
close(fd);
}
}else {
std::cout << "Client disconnected unexpect" << std::endl;
}
}
Problem was that , that I close socket , and after that I try to read from this socket.

epoll loops on disconnection of a client

I am trying to implement a socket server by using epoll. I have 2 threads doing 2 tasks:
listening to incoming connection
writing on screen the data the client is sending.
For my test I have the client and the server on the same machine with 3 or 4 clients running.
The server works fine until I don't kill one of the client by issuing a CTRL-C: as soon I do that the server starts looping and printing at a very fast rate data from other client. The strange thing is that
the client sends data each 2 seconds but the rate of the server is higher
epoll_wait is also supposed to print something when one of the client disconnects as it is checking also for EPOLLHUP or EPOLLERR
epoll_wait should wait a bit before printing since I gave him a timeout of 3000 milliseconds.
Can you help? Could it be that I am passing in a wrong way the epoll descriptor to the other thread? I cannot understand since the code looks similar to many examples around.
Thanks a lot
Mn
// server.cpp
#include <iostream>
#include <cstdio>
#include <cstring>
extern "C" {
#include <sys/epoll.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
#include <pthread.h>
}
#define MAX_BACKLOG 10
void* readerthread(void* args){
int epfd = *((int*)args);
epoll_event outwait[10];
while(true){
int retpw = epoll_wait( epfd, outwait,20, 3000 );
if( retpw == -1 ){
printf("epoll error %m\n");
}else if( retpw == 0 ){
printf("nothing is ready yet\n");
continue;
}else{
for( int i=0;i<retpw;i++){
if( outwait[i].events & EPOLLIN ){
int fd = outwait[i].data.fd;
char buf[64];
if( -1 == read(fd,buf,64) ){
printf("error reading %m\n");
}
printf("%s\n",buf);
}else{
std::cout << "other event" << std::endl;
}
}
}
}
}
int main(){
int epfd = epoll_create(10);
if( -1 == epfd ){
std::cerr << "error creating EPOLL server" << std::endl;
return -1;
}
pthread_t reader;
int rt = pthread_create( &reader, NULL, readerthread, (void*)&epfd );
if( -1 == rt ){
printf("thread creation %m\n");
return -1;
}
struct addrinfo addr;
memset(&addr,0,sizeof(addrinfo));
addr.ai_family = AF_INET;
addr.ai_socktype = SOCK_STREAM;
addr.ai_protocol = 0;
addr.ai_flags = AI_PASSIVE;
struct addrinfo * rp,* result;
getaddrinfo( "localhost","59000",&addr,&result );
for( rp = result; rp != NULL; rp = rp->ai_next ){
// we want to take the first ( it could be IP_V4
// or IP_V6 )
break;
}
int sd = socket( AF_INET, SOCK_STREAM, 0 );
if(-1==sd ){
std::cerr << "error creating the socket" << std::endl;
return -1;
}
// to avoid error 'Address already in Use'
int optval = 1;
setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
if( -1==bind( sd, result->ai_addr, result->ai_addrlen ) ){
printf("%m\n");
std::cerr << "error binding" << std::endl;
return -1;
}
while(true){
std::cout << "listen" << std::endl;
if( -1== listen(sd, MAX_BACKLOG ) ){
std::cerr << "listen didn't work" << std::endl;
return -1;
}
std::cout << "accept" << std::endl;
sockaddr peer;
socklen_t addr_size;
int pfd = accept( sd, &peer ,&addr_size );
if( pfd == -1 ){
std::cerr << "error calling accept()" << std::endl;
return -1;
}
epoll_event ev;
ev.data.fd = pfd;
ev.events = EPOLLIN;
std::cout << "adding to epoll list" << std::endl;
if( -1 == epoll_ctl( epfd, EPOLL_CTL_ADD, pfd, &ev ) ){
printf("epoll_ctl error %m\n");
return -1;
}
}
}
// end of server.cpp
// client.cpp
#include <iostream>
#include <cstring>
#include <cstdio>
extern "C"{
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
}
int main(){
const char* servername = "localhost";
const char* serverport = "59000";
struct addrinfo server_address;
memset( &server_address, 0, sizeof(struct addrinfo) );
server_address.ai_family = AF_INET;
server_address.ai_socktype = SOCK_STREAM;
server_address.ai_protocol = 0; // any protocol
server_address.ai_flags = 0;
struct addrinfo * result, * rp;
int res = getaddrinfo( servername, serverport, &server_address, &result );
if( -1 == res ){
std::cout << "I cannot getaddress " << servername << std::endl;
return -1;
}
int fd = socket( server_address.ai_family
, server_address.ai_socktype
, server_address.ai_protocol );
if( -1 == fd ){
printf("I cannot open a socket %m\n");
return -1;
}
for( rp = result; rp != NULL; rp = rp->ai_next ){
std::cout << "************" << std::endl;
if( -1 == connect( fd, rp->ai_addr, rp->ai_addrlen ) ){
close(fd);
}else{
std::cout << "connected" << std::endl;
break;
}
}
if( rp == NULL ){
std::cerr << "I couldn't connect server " << servername << std::endl;
}
while(true){
sleep(2);
pid_t me = getpid();
char buf[64];
bzero( buf,sizeof(buf));
sprintf( buf,"%ld",me );
write(fd,buf,sizeof(buf));
printf("%s\n",buf);
}
}
// end of client.cpp
A client disconnection is signalled by an EOF condition on the file descriptor. The system considers EOF to be a state in which the file descriptor is 'readable'. But, of course, the EOF condition cannot be read. This is the source of your looping. epoll is acting like the file descriptor for the disconnected client is always readable. You can detect that you have an EOF condition by checking when read returns 0 bytes read.
The only way to deal with an EOF condition is to close the file descriptor in some way. Depending on exactly how the flow of things go, this could be with shutdown(sockfd, SHUT_RD), shutdown(sockfd, SHUT_RDWR) or close(sockfd);.
Unless you know that you need the shutdown(2) call for whatever reason, I would recommend you use close. Of course, you should remember to tell epoll that the file descriptor is no longer of interest before you close. I'm not sure what will happen if you don't, but one possibility is that epoll will error. Another is that epoll will mysteriously begin reporting events for a new file descriptor that has the same numeric value before you add it to the list epoll should care about.
Socket cleanly closed by the other side will become readable and read(2) will return 0, you have to check for that. As coded now - level-triggered poll - epoll_wait(2) returns every time without waiting telling that you still haven't read that end-of-stream.
Alternatively, you can switch to edge-triggered poll (EPOLLET) and react to EPOLLRDHUP too.

Setting IO On A Socket

I have a C++ program where I connect to my server with a socket and I need to set the overlapped for the socket. Doing the following does not work:
Function
int set_wsa_proxy_client ( proxy_client *node ) {
WSABUF wbuf;
DWORD bytes, flags;
int BufLen = 1024;
wbuf.buf = node->buf;
wbuf.len = node->len;
flags = 0;
int rr = WSARecv ( node->s , &wbuf , 1 , &bytes , &flags , &node->ov , NULL );
if (rr == FALSE) {
if (WSAGetLastError() != WSA_IO_PENDING) {
printf("PostRecv: WSARecv* failed: %d\n", WSAGetLastError());
if ( WSAGetLastError() == ERROR_SUCCESS ) { // this means it completed right away ...
//cout << endl << "ERROR_SUCCESS - set_wsa_lobby_client completed" << endl;
//cout << endl << "BYTES: " << node->len << endl;
return 0;
}
return WSAGetLastError();
}
}
return 0;
}
Extension
typedef struct OverlappedEx : OVERLAPPED {
int id;
} OverlappedEx;
proxy_client struct
struct proxy_client {
// ... blah blah blah
SOCKET s;
OverlappedEx ov;
// ... blah blah blah
}
Main
HANDLE ServerCompletionPort = CreateIoCompletionPort ( INVALID_HANDLE_VALUE , NULL , (ULONG_PTR)NULL , 0 );
if ( ServerCompletionPort == NULL ) { fprintf( stderr , "CreateIoCompletionPort failed: %d\n" , GetLastError() ); return -1; }
proxy_client *new_c = new proxy_client;
memset(&new_c->ov , 0 , sizeof(new_c->ov));
new_c->ov.hEvent = ServerCompletionPort;
new_c->s = (make socket)
// ... Connect and other stuff ...
HANDLE hrc = CreateIoCompletionPort( (HANDLE)new_c->s, new_c->ov.hEvent, (ULONG_PTR)pc, 0 ); // pc is the global struct of proxy_client
if (hrc == NULL)
fprintf(stderr, "CompletionThread: CreateIoCompletionPort failed: %d\n", GetLastError());
int r = 0;
if ( ( r = set_wsa_proxy_client ( new_c ) ) != 0 ) {
//
} else {
//
}
This does not seem to trigger the socket when I GetQueuedCompletionStatus for ServerCompletionPort, after sending the socket data (from the server). I was wondering how I can set an IO for a socket! Thank for the help! :-)

C++ CreateIoCompletionPort on new socket

EDIT: I am guessing the problem is I have to associate the OVERLAPPED or WSAOVERLAPPED in the container with my completion port. Is that correct?
I can get IO completions when someone connects to my server. I then use CreateIoCompletionPort on the new socket, with the completionport that original was used. But when they send me data, it does not get set off. Although, it still gets set off if someone else connects. My question is, why would this happen? I also make sure CreateIoCompletionPort returns the same handle as was the original. What gives?
EDIT:
DWORD WINAPI worker_thread(LPVOID lpParam) {
client_information_class *cicc = NULL;
HANDLE CompletionPort = (HANDLE)lpParam;
ULONG_PTR Key;
DWORD BytesTransfered;
OVERLAPPED *lpOverlapped = NULL;
DWORD error = NULL;
while(1) {
error = GetQueuedCompletionStatus(CompletionPort, &BytesTransfered, (PULONG_PTR)&Key, &lpOverlapped, 0);
cicc = CONTAINING_RECORD ( lpOverlapped, client_information_class, ol );
if ( error == TRUE ) {
cout << endl << "IO TRIGGERED" << endl;
switch ( cicc->operation ) {
/*#define OP_ACCEPT 0
#define OP_READ 1
#define OP_WRITE 2*/
case 0:{
if ( check_auth_progress ( cicc->client_socket , cicc->client_buff , BytesTransfered ) ) {
cout << "Client " << cicc->client_socket << " connected." << endl;
client_information_class *k = NULL;
SOCKADDR_STORAGE *LocalSockaddr=NULL, *RemoteSockaddr=NULL;
int LocalSockaddrLen,RemoteSockaddrLen;
k = (client_information_class *)Key;
k->lpfnGetAcceptExSockaddrs(
cicc->client_buff,
cicc->client_len - ((sizeof(SOCKADDR_STORAGE) + 16) * 2),
sizeof(SOCKADDR_STORAGE) + 16,
sizeof(SOCKADDR_STORAGE) + 16,
(SOCKADDR **)&cicc->LocalSockaddr,
&cicc->LocalSockaddrLen,
(SOCKADDR **)&cicc->RemoteSockaddr,
&cicc->RemoteSockaddrLen
);
client_information_class *cicc2 = NULL;
cicc2 = ( client_information_class *)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(client_information_class) + (sizeof(BYTE) * 4096));
if (cicc2 == NULL) {
fprintf(stderr, "Out of memory!\n");
}
cicc2->client_socket = cicc->client_socket;
cicc2->client_socketaddr_in = cicc->client_socketaddr_in;
cicc2->LocalSockaddr = cicc->LocalSockaddr;
cicc2->LocalSockaddrLen = cicc->LocalSockaddrLen;
cicc2->RemoteSockaddr = cicc->RemoteSockaddr;
cicc2->RemoteSockaddrLen = cicc->RemoteSockaddrLen;
HANDLE hrc = CreateIoCompletionPort( (HANDLE)cicc2->client_socket, CompletionPort, (ULONG_PTR)cic, 0 );
if (hrc == NULL) {
fprintf(stderr, "CompletionThread: CreateIoCompletionPort failed: %d\n", GetLastError());
return 0;
} else {
fprintf(stderr, "CompletionThread: CreateIoCompletionPort: %d\n", hrc);
}
cic->deleteNode ( cicc->client_socket , cic );
cic->addNode ( cicc2 );
} else {
cout << endl << "Something Happened ... " << endl;
}
}break;
case 1:{
if ( ParsePacket ( cicc->client_socket , data ) ) {
cout << "Client " << cicc->client_socket << " connected." << endl;
} else {
cout << endl << "Something Happened ... " << endl;
}
}break;
default:{
cout << endl << "Didnt catch that operation ... " << cicc->operation << endl;
}break;
}
} else if ( error == FALSE && &lpOverlapped == NULL ) {
// no packet was dequed...
fprintf(stderr, "[error == FALSE && &lpOverlapped == NULL] CompletionThread: GetQueuedCompletionStatus failed: %d [0x%x]\n", GetLastError(), &lpOverlapped->Internal);
} else if ( error == FALSE && &lpOverlapped != NULL ) {
if((DWORD)&lpOverlapped->Internal == 0x0) { // a timeout...
} else {
fprintf(stderr, "[error == FALSE && &lpOverlapped != NULL] CompletionThread: GetQueuedCompletionStatus failed: %d [0x%x]\n", GetLastError(), &lpOverlapped->Internal);
}
}
}
ExitThread(0);
return 0;
}
Id hate to do this again, but I was correct, you have to place the socket into a new mode (much like acceptex) using WSARECV: I did not know this, and its not very clear on the MSDN, and one of the sources I was looking at to learn IOCP, doesn't talk about it. Hopefully this helps someone :/
WSABUF wbuf;
DWORD bytes, flags;
wbuf.buf = cicc2->client_buff;
wbuf.len = cicc2->client_len;
flags = 0;
int rr = WSARecv ( cicc2->client_socket , &wbuf , 1 , &bytes , &flags , &cicc2->ol , NULL );
if (rr == FALSE) {
if (WSAGetLastError() != WSA_IO_PENDING) {
printf("PostRecv: WSARecv* failed: %d\n", WSAGetLastError());
closesocket(cicc2->client_socket);
cic->deleteNode ( cicc2->client_socket , cic );
}
fprintf(stderr, "PostRecv: WSARecv* failed: %d\n", GetLastError());
}