segmentation fault of massive sockets operation - c++

This issue have bothered me for weeks and I could not find any solution on the web. So I have to create a new question to you gurus.
I was trying to read/write on massive number of sockets, please see test code below. It behave normally when the sockets number is below 1500. When the number of sockets is beyond 1500, the program will crash unexpectedly. I know that I should use command ulimit -n 32768 to increase the open files number limit. But the program still can not behave correctly.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <netdb.h>
#include <errno.h>
#include <malloc.h>
#include <string.h>
int main(int argc, char* argv[])
{
if (argc!=2)
{
printf("usage: test <number of sockets>\n");
return -1;
}
int socketsNum=atoi(argv[1]);
if (socketsNum<=0)
{
printf("error: invalid sockets number\n");
return -1;
}
int *socketHandles=(int*)malloc(sizeof(int)*socketsNum);
if (socketHandles==NULL)
{
printf("error: failed to alloc socket handle memory\n");
return -1;
}
for (int i=0;i<socketsNum;i++)
{
socketHandles[i]=-1;
}
printf("creating %d sockets ...\n",socketsNum);
int createdSocketsNum=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
if (socketHandle==-1)
{
int lastError=errno;
printf("warning: socket() failed: index: %d, error: %d\n",i+1,lastError);
continue;
}
sockaddr_in sockAddr; // 0.0.0.0:0
memset(&sockAddr,0,sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
sockAddr.sin_port = htons(0);
if (bind( socketHandle, (sockaddr*) &sockAddr, sizeof(sockAddr)) == -1)
{
int lastError=errno;
printf("warning: bind() failed: index: %d, error: %d\n",i+1,lastError);
close(socketHandle);
continue;
}
socketHandles[i]=socketHandle;
createdSocketsNum++;
}
printf("created %d sockets.\n",createdSocketsNum);
//test reading;
printf("testing reading ...\n");
int readableNumber=0;
int unreadableNumber=0;
int readingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
readingSkippedNumber++;
continue;
}
fd_set rset;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, &rset, NULL, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unreadableNumber++;
}
else
{
readableNumber++;
}
}
printf("readable: %d, unreadable: %d, skipped: %d, total: %d\n",readableNumber,unreadableNumber,readingSkippedNumber,socketsNum);
//test writing
printf("testing writing ...\n");
int writableNumber=0;
int unwritableNumber=0;
int writingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
writingSkippedNumber++;
continue;
}
fd_set wset;
FD_ZERO(&wset);
FD_SET(socketHandle, &wset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, NULL, &wset, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unwritableNumber++;
}
else
{
writableNumber++;
}
}
printf("writable: %d, unwritable: %d, skipped: %d, total: %d\n",writableNumber,unwritableNumber,writingSkippedNumber,socketsNum);
printf("closing ...\n");
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
continue;
}
close(socketHandle);
}
free(socketHandles);
printf("completed!\n");
return 0;
}
Compile:
g++ TestSockets.cpp -ldl -g -ggdb -o TestSockets
Config:
ulimit -n 32768
Some typical results:
Good result of ./TestSockets 1500:
creating 1500 sockets ...
created 1500 sockets.
testing reading ...
readable: 0, unreadable: 1500, skipped: 0, total: 1500
testing writing ...
writable: 1372, unwritable: 128, skipped: 0, total: 1500
closing ...
completed!
Bad result of ./TestSockets 1900:
creating 1900 sockets ...
created 1900 sockets.
testing reading ...
warning: select() failed: index: 1797, error: 9
...(more lines trimmed)
warning: select() failed: index: 1820, error: 9
warning: select() failed: index: 1821, error: 22
readable: 0, unreadable: 1878, skipped: 0, total: 1900
testing writing ...
warning: select() failed: index: 1641, error: 9
...(more lines trimmed)
warning: select() failed: index: 1660, error: 9
warning: select() failed: index: 1661, error: 22
writable: 1751, unwritable: 128, skipped: 0, total: 1900
closing ...
completed!
Comment: because 1900>1751+128, it seems that the stack was damaged.
Bad result of ./TestSockets 2000:
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Segmentation fault
More Investigation:
According to gdb information. It seems that the stack memory was damaged during running:
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Program received signal SIGSEGV, Segmentation fault.
0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
78 int socketHandle=socketHandles[i];
(gdb) print socketHandles
$1 = (int *) 0x0
(gdb) info local
socketHandle = 0
rset = {fds_bits = {0 <repeats 32 times>}}
timeout = {tv_sec = 0, tv_usec = 0}
retCode = 0
i = 1601
socketsNum = 2000
unreadableNumber = 1601
unwritableNumber = 134514249
socketHandles = 0x0
createdSocketsNum = 2000
readableNumber = 0
readingSkippedNumber = 0
writableNumber = -136436764
writingSkippedNumber = 0
(gdb) info stack
#0 0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78

An fd_set is limited by the maximum value of the file descriptor (not the number of file descriptors set at the same time). Usually it's 1024.
Thus, if your socket value is greater than 1023, you cannot use select on it at all.
Redefining FD_SETSIZE is not supported on operating systems I know. You might be able to successfully redefine fd_set in your program, but select will only work up to FD_SETSIZE.

I have solved this headache problem. The fd_set on windows and Linux are totally different. On Linux if socket handle VALUE is bigger than FD_SETSIZE, there will be overrun issue on Linux version FD_SET macro. I make a workaround to alloc enough buffer for fd_set on Linux. such as,
char rsetBuffer[10240];
memset(rsetBuffer,0,10240);
fd_set& rset=(fd_set&)rsetBuffer;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
p.s. Definition of fd_set struct and FD_SET macro on windows and Linux:
on windows:
typedef struct fd_set {
u_int fd_count; /* how many are SET? */
SOCKET fd_array[FD_SETSIZE]; /* an array of SOCKETs */
} fd_set;
#define FD_SET(fd, set) do { \
u_int __i; \
for (__i = 0; __i < ((fd_set FAR *)(set))->fd_count; __i++) { \
if (((fd_set FAR *)(set))->fd_array[__i] == (fd)) { \
break; \
} \
} \
if (__i == ((fd_set FAR *)(set))->fd_count) { \
if (((fd_set FAR *)(set))->fd_count < FD_SETSIZE) { \
((fd_set FAR *)(set))->fd_array[__i] = (fd); \
((fd_set FAR *)(set))->fd_count++; \
} \
} \
} while(0)
on Linux:
/* fd_set for select and pselect. */
typedef struct
{
/* XPG4.2 requires this member name. Otherwise avoid the name
from the global namespace. */
#ifdef __USE_XOPEN
__fd_mask fds_bits[__FD_SETSIZE / __NFDBITS];
# define __FDS_BITS(set) ((set)->fds_bits)
#else
__fd_mask __fds_bits[__FD_SETSIZE / __NFDBITS];
# define __FDS_BITS(set) ((set)->__fds_bits)
#endif
} fd_set;
#define __FD_SET(d, set) \
((void) (__FDS_BITS (set)[__FD_ELT (d)] |= __FD_MASK (d)))
#define __FD_CLR(d, set) \
((void) (__FDS_BITS (set)[__FD_ELT (d)] &= ~__FD_MASK (d)))
#define __FD_ISSET(d, set) \
((__FDS_BITS (set)[__FD_ELT (d)] & __FD_MASK (d)) != 0)

Related

poll() method not working in Linux but working in Mac

I am using C++ code snippet for port forwarding. The requirement is to do the hand shake between two ports. It should be two way communication. That is to forward what ever iscoming on the source port to destination port. And then to forward the response of the destination port to the source port.
This piece of code is working as expected on my mac system. But when I am running this code on Linux system I am facing one issue.
Issue:
The C++ code that I am using is having 3 parts:
establish_connection_to_source();
open_connection_to_destination();
processconnetion();
On Linux: establish_connection_to_source(); and open_connection_to_destination(); is working perfectly fine. But processconnetion(); is havng one issue.
Following is the process connection method:
void processconnetion()
{
buffer *todest = new buffer(socket_list[e_source].fd,socket_list[e_dest].fd);
buffer *tosrc = new buffer(socket_list[e_dest].fd,socket_list[e_source].fd);
if (todest == NULL || tosrc == NULL){
fprintf(stderr,"out of mememory\n");
exit(-1);
}
unsigned int loopcnt;
profilecommuncation srcprofile(COMM_BUFSIZE);
profilecommuncation destprofile(COMM_BUFSIZE);
while (true) {
int withevent = poll(socket_list, 2, -1);
loopcnt++;
fprintf(stderr,"loopcnt %d socketswith events = %d source:0x%x dest:0x%x\n", loopcnt, withevent, socket_list[e_source].revents, socket_list[e_dest].revents);
if ((socket_list[e_source].revents | socket_list[e_dest].revents) & (POLLHUP | POLLERR)) {
// one of the connections has a problem or has Hungup
fprintf(stderr,"socket_list[e_source].revents= 0x%X\n", socket_list[e_source].revents);
fprintf(stderr,"socket_list[e_dest].revents= 0x%X\n", socket_list[e_dest].revents);
fprintf(stderr,"POLLHUP= 0x%X\n", POLLHUP);
fprintf(stderr,"POLLERR= 0x%X\n", POLLERR);
int result;
socklen_t result_len = sizeof(result);
getsockopt(socket_list[e_dest].fd, SOL_SOCKET, SO_ERROR, &result, &result_len);
fprintf(stderr, "result = %d\n", result);
fprintf(stderr,"exiting as one connection had an issue\n");
break;
}
if (socket_list[e_source].revents & POLLIN) {
srcprofile.increment_size(todest->copydata());
}
if (socket_list[e_dest].revents & POLLIN) {
destprofile.increment_size(tosrc->copydata());
}
}
delete todest;
delete tosrc;
close(socket_list[e_source].fd);
close(socket_list[e_dest].fd);
srcprofile.dumpseensizes("source");
destprofile.dumpseensizes("destination");
}
Here it is giving error - exiting as one connection had an issue that means that if ((socket_list[e_source].revents | socket_list[e_dest].revents) & (POLLHUP | POLLERR)) is returning true. The issue is with the destination port and not in case of source.
Note:
Variales used in the processconnetion(); method:
socket_list is a structure of type pollfd. Following is the description:
struct pollfd {
int fd;
short events;
short revents;
};
pollfd socket_list[3];
#define e_source 0
#define e_dest 1
#define e_listen 2
Following is the output at the time for exit:
connecting to destination: destination IP / 32001.
connected...
loopcnt 1 socketswith events = 1 source:0x0 dest:0x10
socket_list[e_source].revents= 0x0
socket_list[e_dest].revents= 0x10
POLLHUP= 0x10
POLLERR= 0x8
result = 0
exiting as one connection had an issue
int withevent = poll(socket_list, 2, -1); here the withevent value returned is 1
Socket List Initialisation:
guard( (socket_list[e_listen].fd = socket( PF_INET, SOCK_STREAM, IPPROTO_TCP )), "Failed to create socket listen, error: %s\n", "created listen socket");
void guard(int n, char *msg, char *success)
{
if (n < 0) {
fprintf(stderr, msg, strerror(errno) );
exit(-1);
}
fprintf(stderr,"n = %d %s\n",n, success);
}
I am not able to figure out the issue as it is working fine in mac. Any leads why this behaviour in Linux is highly appreciated. Thanks in advance.

Unable to set a key as 'redisClusterCommand()' is returning null pointer

I'm trying to run a simple program which will insert a key-value into my redis cluster of 6 instances(3 master,3 replica). I'm using hiredis-vip.
Here's the program:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <hircluster.h>
int main(int argc, char **argv)
{
struct timeval timeout = { 1, 500000 }; // 1.5 seconds
redisClusterContext *cc = redisClusterContextInit();
redisClusterSetOptionAddNodes(cc, "172.16.129.68:6379");
redisClusterSetOptionConnectTimeout(cc, timeout);
redisClusterConnect2(cc);
if (cc != NULL && cc->err) {
printf("Error: %s\n", cc->errstr);
// handle error
exit(-1);
}
redisReply *reply;
reply = (redisReply*)(redisClusterCommand(cc,"SET %s %s", "foo", "hello vishal"));
printf("SET: %s\n", reply->str);
freeReplyObject(reply);
redisClusterFree(cc);
return 0;
}
On running the program, I'm getting segmentation fault:
Program received signal SIGSEGV, Segmentation fault.
0x00000000004009ed in main (argc=1, argv=0x7fffffffe508) at cluster-example.c:30
30 printf("SET: %s\n", reply->str);
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.209.el6_9.2.x86_64 libgcc-4.4.7-18.el6_9.2.x86_64 libstdc++-4.4.7-18.el6_9.2.x86_64
(gdb) bt f
#0 0x00000000004009ed in main (argc=1, argv=0x7fffffffe508) at cluster-example.c:30
timeout = {tv_sec = 1, tv_usec = 500000}
cc = 0x7ffff7fdb010
reply = 0x0
redisReply * is having NULL value which is resulting in the segmentation fault when I use this pointer in printf().
What's wrong in the program?
Edit 1
I've updated a portion of my program after the suggestion from #Stamatis Liatsos:
reply = (redisReply*)(redisClusterCommand(cc,"SET %s %s", "foo", "hello vishal"));
if(cc->err)
printf("\n[%s::%d]Error: %s\n", __FILE__,__LINE__,cc->errstr);
else
printf("SET: %s\n", reply->str);
Here's the output which I'm getting:
[cluster-example.c::31]Error: ctx get by node is null
I found the solution to this exact issue over here.
We have to setup the context to use slots before connecting.
struct timeval timeout = { 1, 500000 }; // 1.5 seconds
redisClusterContext *cc = redisClusterContextInit();
redisClusterSetOptionAddNodes(cc, "172.16.129.68:6379");
redisClusterSetOptionConnectTimeout(cc, timeout);
redisClusterSetOptionRouteUseSlots(cc); //The function that has to be called.
redisClusterConnect2(cc);
if (cc != NULL && cc->err) {
printf("Error: %s\n", cc->errstr);
// handle error
exit(-1);
}

Communication between client and server is erratic

I modified thegeekinthecorner examples to be able to continuously send data.
I am using g++4.9.2.
I tried uninstalling the oficial latest OFED from here http://downloads.openfabrics.org/OFED/
OFED Distribution Software Installation Menu
1) View OFED Installation Guide
2) Install OFED Software
3) Show Installed Software
4) Configure IPoIB
5) Uninstall OFED Software
Q) Exit
Select Option [1-5]:5
Uninstalling the previous version of OFED
Running rpm -e --allmatches libibverbs libibverbs-devel libibverbs-utils libmthca libmlx4 libcxgb3 libnes libipathverbs libibcm libibumad libibumad-devel libibmad ibacm librdmacm librdmacm-utils librdmacm-devel opensm opensm-libs dapl perftest mstflint ibutils infiniband-diags qperf infinipath-psm opensm opensm-libs libipathverbs dapl libibcm libibmad libibumad libibumad-devel libibverbs libibverbs-devel libibverbs-utils libipathverbs libmthca libmlx4 librdmacm librdmacm-devel librdmacm-utils ibacm ibutils ibutils-libs libnes infinipath-psm
Failed to uninstall the previous installation
See /tmp/OFED.22320.logs/ofed_uninstall.log
[idf#node1 OFED-1.5.4-20110726-0732]$
[idf#node1 OFED-1.5.4-20110726-0732]$
If instead I just try to install it, I get this:
OFED Distribution Software Installation Menu
1) Basic (OFED modules and basic user level libraries)
2) HPC (OFED modules and libraries, MPI and diagnostic tools)
3) All packages (all of Basic, HPC)
4) Customize
Q) Exit
Select Option [1-4]:3
Please choose an implementation of MVAPICH2:
1) OFA (IB and iWARP)
2) uDAPL
Implementation [1]: 1
Enable ROMIO support [Y/n]:
Enable shared library support [Y/n]:
Enable Checkpoint-Restart support [y/N]:
Kernel 3.10.0-229.7.2.el7.x86_64 is not supported.
For the list of Supported Platforms and Operating Systems see
/mnt/gluster/Downloads/OFED-1.5.4-20110726-0732/docs/OFED_release_notes.txt
[idf#node1 OFED-1.5.4-20110726-0732]$
[idf#node2 Release]$ lspci | grep -i mel
02:00.0 InfiniBand: Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)
[idf#node2 Release]$
[idf#node1 Release]$ ibv_devinfo
hca_id: mlx4_0
transport: InfiniBand (0)
fw_ver: 2.7.200
node_guid: 0025:90ff:ff1a:081c
sys_image_guid: 0025:90ff:ff1a:081f
vendor_id: 0x02c9
vendor_part_id: 26428
hw_ver: 0xB0
board_id: SM_2092000001000
phys_port_cnt: 1
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1
port_lid: 2
port_lmc: 0x00
link_layer: InfiniBand
[idf#node1 Release]$ ifconfig -a
ib0: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 2044
inet 192.168.0.1 netmask 255.255.255.0 broadcast 192.168.0.255
inet6 fe80::225:90ff:ff1a:71 prefixlen 64 scopeid 0x20<link>
Infiniband hardware address can be incorrect! Please read BUGS section in ifconfig(8).
infiniband 80:00:00:48:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00 txqueuelen 256 (InfiniBand)
RX packets 5 bytes 280 (280.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 27 overruns 0 carrier 0 collisions 0
Below is the client and server. When I run this programs, the clients will send messages, but the number of messages it sends is erratic, error messages are often
Client:
#include <iostream>
#include <thread>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <rdma/rdma_cma.h>
#define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned non-zero)." ); } while (0)
#define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned zero/null)."); } while (0)
const int BUFFER_SIZE = 2048;
const int TIMEOUT_IN_MS = 500; /* ms */
struct context
{
struct ibv_context *ctx;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_comp_channel *comp_channel;
pthread_t cq_poller_thread;
};
struct connection
{
struct rdma_cm_id *id;
struct ibv_qp *qp;
struct ibv_mr *recv_mr;
struct ibv_mr *send_mr;
char *recv_region;
char *send_region;
int num_completions;
};
static pthread_t msgThread;
static void die(const char *reason);
static void build_context(struct ibv_context *verbs);
static void build_qp_attr(struct ibv_qp_init_attr *qp_attr);
static void * poll_cq(void *);
static void post_receives(struct connection *conn);
static void register_memory(struct connection *conn);
static int on_addr_resolved(struct rdma_cm_id *id);
static void on_completion(struct ibv_wc *wc);
static int on_connection(void *context);
static int on_disconnect(struct rdma_cm_id *id);
static int on_event(struct rdma_cm_event *event);
static int on_route_resolved(struct rdma_cm_id *id);
static struct context *s_ctx = NULL;
#include <mutex> // std::mutex, std::unique_lock
#include <condition_variable> // std::condition_variable
std::mutex mtx;
std::condition_variable cv;
bool ok_to_send_next_message = 1;
bool message_available()
{
return 0 != ok_to_send_next_message;
}
int main(int argc, char **argv)
{
struct addrinfo *addr;
struct rdma_cm_event *event = NULL;
struct rdma_cm_id *conn= NULL;
struct rdma_event_channel *ec = NULL;
if (argc != 3)
die("usage: client <server-address> <server-port>");
TEST_NZ(getaddrinfo(argv[1], argv[2], NULL, &addr));
TEST_Z(ec = rdma_create_event_channel());
TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP));
TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS));
freeaddrinfo(addr);
while (0 == rdma_get_cm_event(ec, &event))
//while (rdma_get_cm_event(ec, &event))
{
std::cout << "rdma_get_cm_event\n";
struct rdma_cm_event event_copy;
memcpy(&event_copy, event, sizeof(*event));
rdma_ack_cm_event(event);
if (on_event(&event_copy))
break;
}
rdma_destroy_event_channel(ec);
return 0;
}
void die(const char *reason)
{
fprintf(stderr, "%s\n", reason);
exit(EXIT_FAILURE);
}
void build_context(struct ibv_context *verbs)
{
if (s_ctx)
{
if (s_ctx->ctx != verbs)
die("cannot handle events in more than one context.");
return;
}
s_ctx = (struct context *)malloc(sizeof(struct context));
s_ctx->ctx = verbs;
TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx));
TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx));
TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 100, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */
TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0));
TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL));
}
void *SendMessages(void *context)
{
static int loopcount = 0;
while(1)
{
std::unique_lock<std::mutex> lck(mtx);
cv.wait(lck, message_available);
//std::this_thread::sleep_for(std::chrono::microseconds(50));
ok_to_send_next_message = 0;
struct connection *conn = (struct connection *)context;
struct ibv_send_wr wr, *bad_wr = NULL;
struct ibv_sge sge;
std::cout << "looping send..." << loopcount << '\n' << std::flush;
memset(&wr, 0, sizeof(wr));
wr.wr_id = (uintptr_t)conn;
wr.opcode = IBV_WR_SEND;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.send_flags = IBV_SEND_SIGNALED;
sge.addr = (uintptr_t)conn->send_region;
sge.length = BUFFER_SIZE;
sge.lkey = conn->send_mr->lkey;
snprintf(conn->send_region, BUFFER_SIZE, "message from active/client side with count %d", loopcount++);
TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));
}
}
void build_qp_attr(struct ibv_qp_init_attr *qp_attr)
{
std::cout << "build_qp_attr\n";
memset(qp_attr, 0, sizeof(*qp_attr));
qp_attr->send_cq = s_ctx->cq;
qp_attr->recv_cq = s_ctx->cq;
qp_attr->qp_type = IBV_QPT_RC;
qp_attr->cap.max_send_wr = 100;
qp_attr->cap.max_recv_wr = 100;
qp_attr->cap.max_send_sge = 1;
qp_attr->cap.max_recv_sge = 1;
}
void * poll_cq(void *ctx)
{
struct ibv_cq *cq;
struct ibv_wc wc;
while (1)
{
TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
ibv_ack_cq_events(cq, 1);
TEST_NZ(ibv_req_notify_cq(cq, 0));
int ne;
struct ibv_wc wc;
do
{
std::cout << "polling\n";
ne = ibv_poll_cq(cq, 1, &wc);
}
while(ne == 0);
on_completion(&wc);
//if (wc.opcode == IBV_WC_SEND)
if (wc.status == IBV_WC_SUCCESS)
{
{
ok_to_send_next_message = 1;
//while (message_available()) std::this_thread::yield();
//std::cout << "past yield\n";
std::unique_lock<std::mutex> lck(mtx);
cv.notify_one();
}
}
}
return NULL;
}
void post_receives(struct connection *conn)
{
std::cout << "post_receives\n";
struct ibv_recv_wr wr, *bad_wr = NULL;
struct ibv_sge sge;
wr.wr_id = (uintptr_t)conn;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
sge.addr = (uintptr_t)conn->recv_region;
sge.length = BUFFER_SIZE;
sge.lkey = conn->recv_mr->lkey;
TEST_NZ(ibv_post_recv(conn->qp, &wr, &bad_wr));
}
void register_memory(struct connection *conn)
{
std::cout << "register_memory\n";
conn->send_region = (char *)malloc(BUFFER_SIZE);
conn->recv_region = (char *)malloc(BUFFER_SIZE);
TEST_Z(conn->send_mr = ibv_reg_mr(
s_ctx->pd,
conn->send_region,
BUFFER_SIZE,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
TEST_Z(conn->recv_mr = ibv_reg_mr(
s_ctx->pd,
conn->recv_region,
BUFFER_SIZE,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
}
int on_addr_resolved(struct rdma_cm_id *id)
{
std::cout << "on_addr_resolved\n";
struct ibv_qp_init_attr qp_attr;
struct connection *conn;
build_context(id->verbs);
build_qp_attr(&qp_attr);
TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));
id->context = conn = (struct connection *)malloc(sizeof(struct connection));
conn->id = id;
conn->qp = id->qp;
conn->num_completions = 0;
register_memory(conn);
post_receives(conn);
TEST_NZ(rdma_resolve_route(id, TIMEOUT_IN_MS));
return 0;
}
void on_completion(struct ibv_wc *wc)
{
std::cout << "on_completion\n";
struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id;
if (wc->status != IBV_WC_SUCCESS)
{
//die("\ton_completion: status is not IBV_WC_SUCCESS.");
printf("\ton_completion: status is not IBV_WC_SUCCESS.");
printf("\t it is %d ", wc->status);
}
printf("\n");
if (wc->opcode & IBV_WC_RECV)
printf("\treceived message: %s\n", conn->recv_region);
else if (wc->opcode == IBV_WC_SEND)
printf("\tsend completed successfully.\n");
else
die("\ton_completion: completion isn't a send or a receive.");
if (5 == ++conn->num_completions)
rdma_disconnect(conn->id);
}
int on_connection(void *context)
{
std::cout << "on_connection\n";
TEST_NZ(pthread_create(&msgThread, NULL, SendMessages, context));
return 0;
}
int on_disconnect(struct rdma_cm_id *id)
{
struct connection *conn = (struct connection *)id->context;
printf("disconnected.\n");
rdma_destroy_qp(id);
ibv_dereg_mr(conn->send_mr);
ibv_dereg_mr(conn->recv_mr);
free(conn->send_region);
free(conn->recv_region);
free(conn);
rdma_destroy_id(id);
return 1; /* exit event loop */
}
int on_route_resolved(struct rdma_cm_id *id)
{
struct rdma_conn_param cm_params;
printf("route resolved.\n");
memset(&cm_params, 0, sizeof(cm_params));
TEST_NZ(rdma_connect(id, &cm_params));
return 0;
}
int on_event(struct rdma_cm_event *event)
{
int r = 0;
if (event->event == RDMA_CM_EVENT_ADDR_RESOLVED)
r = on_addr_resolved(event->id);
else if (event->event == RDMA_CM_EVENT_ROUTE_RESOLVED)
r = on_route_resolved(event->id);
else if (event->event == RDMA_CM_EVENT_ESTABLISHED)
r = on_connection(event->id->context);
else if (event->event == RDMA_CM_EVENT_DISCONNECTED)
r = on_disconnect(event->id);
else
die("on_event: unknown event.");
return r;
}
Server:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <inttypes.h>
#include <rdma/rdma_cma.h>
#define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned non-zero)." ); } while (0)
#define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned zero/null)."); } while (0)
const int BUFFER_SIZE = 2048;
struct context
{
struct ibv_context *ctx;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_comp_channel *comp_channel;
pthread_t cq_poller_thread;
};
struct connection
{
struct ibv_qp *qp;
struct ibv_mr *recv_mr;
struct ibv_mr *send_mr;
char *recv_region;
char *send_region;
};
static void die(const char *reason);
static void build_context(struct ibv_context *verbs);
static void build_qp_attr(struct ibv_qp_init_attr *qp_attr);
static void * poll_cq(void *);
static void post_receives(struct connection *conn);
static void register_memory(struct connection *conn);
static void on_completion(struct ibv_wc *wc);
static int on_connect_request(struct rdma_cm_id *id);
static int on_connection(void *context);
static int on_disconnect(struct rdma_cm_id *id);
static int on_event(struct rdma_cm_event *event);
static struct context *s_ctx = NULL;
int main(int argc, char **argv)
{
struct sockaddr_in6 addr;
struct rdma_cm_event *event = NULL;
struct rdma_cm_id *listener = NULL;
struct rdma_event_channel *ec = NULL;
uint16_t port = 0;
memset(&addr, 0, sizeof(addr));
addr.sin6_family = AF_INET6;
TEST_Z(ec = rdma_create_event_channel());
TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP));
TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr));
TEST_NZ(rdma_listen(listener, 100)); /* backlog=10 is arbitrary */
//printf("[ %"PRIu32" ]\n", *addr.sin6_addr.s6_addr32);
port = ntohs(rdma_get_src_port(listener));
printf("listening on port %d.\n", port);
while (rdma_get_cm_event(ec, &event) == 0)
{
struct rdma_cm_event event_copy;
memcpy(&event_copy, event, sizeof(*event));
rdma_ack_cm_event(event);
if (on_event(&event_copy))
break;
}
rdma_destroy_id(listener);
rdma_destroy_event_channel(ec);
return 0;
}
void die(const char *reason)
{
fprintf(stderr, "%s\n", reason);
exit(EXIT_FAILURE);
}
void build_context(struct ibv_context *verbs)
{
if (s_ctx)
{
if (s_ctx->ctx != verbs)
die("cannot handle events in more than one context.");
return;
}
s_ctx = (struct context *)malloc(sizeof(struct context));
s_ctx->ctx = verbs;
TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx));
TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx));
TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 100, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */
TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0));
TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL));
}
void build_qp_attr(struct ibv_qp_init_attr *qp_attr)
{
memset(qp_attr, 0, sizeof(*qp_attr));
qp_attr->send_cq = s_ctx->cq;
qp_attr->recv_cq = s_ctx->cq;
qp_attr->qp_type = IBV_QPT_RC;
qp_attr->cap.max_send_wr = 100;
qp_attr->cap.max_recv_wr = 100;
qp_attr->cap.max_send_sge = 1;
qp_attr->cap.max_recv_sge = 1;
}
void * poll_cq(void *ctx)
{
struct ibv_cq *cq;
struct ibv_wc wc;
while (1)
{
TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
ibv_ack_cq_events(cq, 1);
TEST_NZ(ibv_req_notify_cq(cq, 0));
while (ibv_poll_cq(cq, 1, &wc))
{
std::cout << "polling\n";
on_completion(&wc);
}
}
return NULL;
}
void post_receives(struct connection *conn)
{
std::cout << "post_receives\n";
struct ibv_recv_wr wr, *bad_wr = NULL;
struct ibv_sge sge;
wr.wr_id = (uintptr_t)conn;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
sge.addr = (uintptr_t)conn->recv_region;
sge.length = BUFFER_SIZE;
sge.lkey = conn->recv_mr->lkey;
TEST_NZ(ibv_post_recv(conn->qp, &wr, &bad_wr));
}
void register_memory(struct connection *conn)
{
conn->send_region = (char *)malloc(BUFFER_SIZE);
conn->recv_region = (char *)malloc(BUFFER_SIZE);
TEST_Z(conn->send_mr = ibv_reg_mr(
s_ctx->pd,
conn->send_region,
BUFFER_SIZE,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
TEST_Z(conn->recv_mr = ibv_reg_mr(
s_ctx->pd,
conn->recv_region,
BUFFER_SIZE,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
}
void on_completion(struct ibv_wc *wc)
{
if (wc->status != IBV_WC_SUCCESS)
die("on_completion: status is not IBV_WC_SUCCESS.");
if (wc->opcode & IBV_WC_RECV)
{
struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id;
post_receives(conn);
printf("received message: %s\n", conn->recv_region);
}
else if (wc->opcode == IBV_WC_SEND)
{
printf("send completed successfully.\n");
}
}
int on_connect_request(struct rdma_cm_id *id)
{
struct ibv_qp_init_attr qp_attr;
struct rdma_conn_param cm_params;
struct connection *conn;
printf("received connection request.\n");
build_context(id->verbs);
build_qp_attr(&qp_attr);
TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr));
id->context = conn = (struct connection *)malloc(sizeof(struct connection));
conn->qp = id->qp;
register_memory(conn);
post_receives(conn);
memset(&cm_params, 0, sizeof(cm_params));
TEST_NZ(rdma_accept(id, &cm_params));
return 0;
}
int on_connection(void *context)
{
struct connection *conn = (struct connection *)context;
struct ibv_send_wr wr, *bad_wr = NULL;
struct ibv_sge sge;
snprintf(conn->send_region, BUFFER_SIZE, "message from passive/server side with pid %d", getpid());
printf("connected. posting send...\n");
memset(&wr, 0, sizeof(wr));
wr.opcode = IBV_WR_SEND;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.send_flags = IBV_SEND_SIGNALED;
sge.addr = (uintptr_t)conn->send_region;
sge.length = BUFFER_SIZE;
sge.lkey = conn->send_mr->lkey;
TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));
return 0;
}
int on_disconnect(struct rdma_cm_id *id)
{
struct connection *conn = (struct connection *)id->context;
printf("peer disconnected.\n");
rdma_destroy_qp(id);
ibv_dereg_mr(conn->send_mr);
ibv_dereg_mr(conn->recv_mr);
free(conn->send_region);
free(conn->recv_region);
free(conn);
rdma_destroy_id(id);
return 0;
}
int on_event(struct rdma_cm_event *event)
{
std::cout << "on_event\n";
int r = 0;
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
r = on_connect_request(event->id);
else if (event->event == RDMA_CM_EVENT_ESTABLISHED)
r = on_connection(event->id->context);
else if (event->event == RDMA_CM_EVENT_DISCONNECTED)
r = on_disconnect(event->id);
else
die("on_event: unknown event.");
return r;
}
Here are a couple of runs. Totally random the number of message sent:
[idf#node1 Release]$ ./TGKITCClient 192.168.0.1 47819
rdma_get_cm_event
on_addr_resolved
build_qp_attr
register_memory
post_receives
rdma_get_cm_event
route resolved.
rdma_get_cm_event
on_connection
looping send...0
polling
on_completion
received message: message from passive/server side with pid 4188
polling
on_completion
send completed successfully.
looping send...1
polling
on_completion
send completed successfully.
^C
[idf#node1 Release]$
And then
[idf#node1 Release]$ ./TGKITCClient 192.168.0.1 55148
rdma_get_cm_event
on_addr_resolved
build_qp_attr
register_memory
post_receives
rdma_get_cm_event
route resolved.
rdma_get_cm_event
on_connection
looping send...0
polling
on_completion
received message: message from passive/server side with pid 4279
polling
on_completion
send completed successfully.
looping send...1
polling
on_completion
send completed successfully.
looping send...2
polling
on_completion
send completed successfully.
looping send...3
polling
on_completion
send completed successfully.
looping send...4
polling
on_completion
send completed successfully.
looping send...5
polling
on_completion
send completed successfully.
looping send...6
polling
on_completion
send completed successfully.
looping send...7
polling
on_completion
send completed successfully.
looping send...8
rdma_get_cm_event
disconnected.
polling
on_completion
send completed successfully.
on_completion: status is not IBV_WC_SUCCESS. it is 5 [idf#node1 Release]$
Here is the server side:
on_event
peer disconnected.
on_event
received connection request.
post_receives
on_event
connected. posting send...
polling
send completed successfully.
polling
post_receives
received message: message from active/client side with count 0
polling
post_receives
received message: message from active/client side with count 1
polling
post_receives
received message: message from active/client side with count 2
polling
post_receives
received message: message from active/client side with count 3
polling
post_receives
received message: message from active/client side with count 4
polling
post_receives
received message: message from active/client side with count 5
polling
post_receives
received message: message from active/client side with count 6
polling
post_receives
received message: message from active/client side with count 7
on_event
peer disconnected.
Make sure that the most recent drivers and firmware on the cards are installed. Beyond that, using the RDMA packages included with most OS distributions when trying to run IB is a dangerous game to play.
It is strongly recommended that for applications like these the Open Fabrics Enterprise Distribution should be used to provide openib, opensm and a variety of other useful infiniband related packages for analysis diagnostics and tuning of a network. The official OFED packages can be found on the OpenFabrics website.
Based on the question it looks like IPoIB is being used but the specific configuration is not mentioned. IPoIB is not necessarily the best way to take advantages of the hardware resources available in the IB cards.
In addition to those considerations making sure that the subnetmanager is setup and configured correctly. Some switches have built-in subnet managers that can be access and configured through a management interface, in other cases it might make more sense to run and configure the subnet manager on one of the nodes that you are using. OpenSM is a common subnet manager that is included with OFED distributions and there are many online guides available for setting up and configuring a subnet manager based on the type of network being setup.
OFED also includes a variety of IB testing and profiling tools. ibdiagnet is a useful tool for debugging IB network issues. And there are many guides available online that show different ways to make use of the tool as well as the other tools included in OFED.
Depending on the type of IB switch used there may additionally be some network management and diagnostic tools that would allow for further analysis of the network. The configuration of IB hardware and the low-level software that manages it is sometimes more critical to overall performance than the actual code being run. But with that being said recompiling and linking to relevant libraries from the correct version of OFED might be advisable if significant software of hardware configuration changes are made.

ioctrl using SCSI pass through

Using Windows I can easily communicate with my USB device using the following simplified code:
DWORD dwJunk; // discard results from DeviceIOControl()
int iReply;
char cBuffer[100];
// cBuffer is initialized here.
HANDLE hDevice; // handle to the drive to be examined
CString sDrive = _T(\\\\.\\H:); // drive H: for this test
hDevice = CreateFile(sDrive, // drive to open
GENERIC_READ | GENERIC_WRITE, // read and write access to the drive
FILE_SHARE_READ | FILE_SHARE_WRITE, // share mode
NULL, // default security attributes
OPEN_EXISTING, // disposition
0, // file attributes
NULL); // do not copy file attributes
iReply = DeviceIoControl(hDevice, IOCTL_SCSI_PASS_THROUGH_DIRECT, &cBuffer, sizeof(cBuffer), &cBuffer, sizeof(cBuffer), &dwJunk, (LPOVERLAPPED)NULL);
I'm trying to do the same in linux but have not been able to figure out the ioctrl() parameters, or better put the structure. A code snippet would be vey much appreciated. Thanks.
Unfortunately the code I modified using your link didn't return any results. Here's the stripped code I used. ioctl() returned without errors:
#define DEF_TIMEOUT 5000 // 5 seconds
char cDiskName[] = "/dev/sg3";
int fd = open(cDiskName, O_RDWR);
if (fd < 0)
{
printf("Open error: %s, errno=%d (%s)\n", cDiskName, errno, strerror(errno));
return 1;
}
unsigned char turCmbBlk[] = {0x00, 0, 0, 0, 0, 0};
struct sg_io_hdr io_hdr;
unsigned char cIOBuffer[100];
// buffer initialization code omitted
memset(&io_hdr, 0, sizeof(struct sg_io_hdr));
io_hdr.interface_id = 'S';
io_hdr.cmd_len = sizeof(turCmbBlk);
io_hdr.mx_sb_len = sizeof(cIOBuffer);
io_hdr.dxfer_direction = SG_DXFER_NONE;
io_hdr.cmdp = turCmbBlk;
io_hdr.sbp = cIOBuffer;
io_hdr.timeout = DEF_TIMEOUT;
if (ioctl(fd, SG_IO, &io_hdr) < 0)
{
printf("ioctl error: errno=%d (%s)\n", errno, strerror(errno));
}
// Code returned here without any errors but cIOBuffer remains unchanged.
Maybe the call needs a different request code?
Here's some more documentation:
Notes on Linux's SG driver version 2.1.36
SCSI-Programming, page 8 (handle_SCSI_cmd function), page 9, page 11 (example) and some more
Generic SCSI Target Subsystem for Linux
See here:
#include <sys/ioctl.h>
int ioctl(int d, int request, ...);
Parameters:
Filedescriptor (must be open!)
Request code number (depends on device)
Untyped pointer to memory (going to / coming from driver)
Example
#include <stdio.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <linux/usbdevice_fs.h>
void main(int argc, char **argv)
{
const char *filename;
int fd;
filename = argv[1];
fd = open(filename, O_WRONLY);
ioctl(fd, USBDEVFS_RESET, 0);
close(fd);
return;
}
Documentation:
ioctl(2) - Linux man page
IOCTL(2)
Generic I/O Control operations (GNU libc)
The ioctl() Requests
usb.c (Example that might help you)
Linux / Unix Command: ioctl
How to Reset USB Device in Linux (Example)
An example Program with IOCTL
Edit
#define BUFF_SIZE 100 // - Buffersize
#define DEF_TIMEOUT 5000 // 5 seconds
char cDiskName[] = "/dev/sg3";
int fd = open(cDiskName, O_RDWR);
if (fd < 0)
{
printf("Open error: %s, errno=%d (%s)\n", cDiskName, errno, strerror(errno));
return 1;
}
unsigned char turCmbBlk[] = {0x00, 0, 0, 0, 0, 0};
struct sg_io_hdr *p = (struct sg_io_hdr *) malloc(sizeof(struct sg_io_hdr)); // - dynamic memory allocation - free() required somewhere
unsigned char cIOBuffer[BUFF_SIZE];
unsigned char replyBuffer[BUFF_SIZE]; // - dxfer buffer
// buffer initialization code omitted
memset(p, 0, sizeof(struct sg_io_hdr));
p->interface_id = 'S';
p->cmd_len = sizeof(turCmbBlk);
p->mx_sb_len = BUFF_SIZE;
p->dxfer_direction = SG_DXFER_NONE;
p->cmdp = turCmbBlk;
p->sbp = cIOBuffer;
p->timeout = DEF_TIMEOUT;
p->flags = SG_FLAG_DIRECT_IO; // - Does this help?
p->dxferp = replyBuffer; // - Set dxferp buffer - (A)
p->dxfer_len = BUFF_SIZE; // - buffersize
if (ioctl(fd, SG_IO, p) < 0)
{
printf("ioctl error: errno=%d (%s)\n", errno, strerror(errno));
}
// Code returned here without any errors but cIOBuffer remains unchanged.
Note (A): Please try setting your input / output buffer you work on here.
Documentation:
SCSI Generic HOWTO, SG_IO_HDR_T
Tour the Linux generic SCSI driver

Socket class Error

ListenSocket.h
// class does not contain WSASTARTUP () and WSACLEANUP ()
#ifndef LISTENTHREAD_H
#define LISTENTHREAD_H
#include "includes.h"
#include "LOGMSGs.h"
// 1, CListenSocket: class is used to create the listener thread local variable.
// This class can be reused. When you call Close () is closed, re-calling Open () the new listening port. But the system did not use the feature.
class CListenSocket
{
public:
// main method:
// BIND each object only to a port.
CListenSocket(u_short nPort, int nSndSize = 0);
// to release SOCKET
~CListenSocket(){};
// Create server listening SOCKET, specific options see the code. Fails to return false.
bool Open(); // call can be repeated
// error return INVALID_SOCKET
SOCKET Accept(u_long & nClientIP);
// repeated calls. Usually not, can be used to take the initiative to close the SOCKET.
// close the re-call after Open () re-use the object.
void Close(); // call can be repeated
bool IsOpen() { return m_bState; }
bool Rebuild();
public:
SOCKET Socket() { return m_sockListen; }
protected:
// main member variables:
const u_short m_nPort;
const int m_nSndBuf;
SOCKET m_sockListen;
// network status is normal sign.
// When the value is false that the object is not available. May not have Open (), may also be a network error.
bool m_bState;
time_t m_tCloseTime; // SOCKET last closed the time delay for the automatic re-SOCKET
};
#endif // LISTENTHREAD_H
ListenSocket.cpp
#include "ListenSocket.h"
long s_nSocketCount = 0;
int REBUILDLISTENDELAYSEC;
CListenSocket::CListenSocket(u_short nPort, int nSndBuf /*= 0*/) // 0: Default
: m_nPort(nPort), m_nSndBuf(nSndBuf)
{
m_sockListen = INVALID_SOCKET;
m_bState = false;
// m_nPort = nPort;
m_tCloseTime = 0;
}
// Error returned INVALID_SOCKET
SOCKET CListenSocket::Accept(u_long & nClientIP)
{
/*
// Reconstruction SOCKET
if(!m_bState)
{
if(clock() < m_tCloseTime + REBUILDLISTENDELAYSEC*CLOCKS_PER_SEC)
return INVALID_SOCKET;
else
{
LOGMSG("Anti-crash system start listening SOCKET [%d] re under construction...", m_nPort);
if(Open())
{
LOGMSG("... listen SOCKET reconstruction success.");
PrintText("Listen SOCKET [%d] failed to rebuild SOCKET success. Server continues to run in the ...", m_nPort);
}
else
{
Error("... listen SOCKET reconstruction has failed. Server will not accept new connections");
PrintText("Listen SOCKET [%d] error, [%d] seconds after the re-SOCKET. Server continues to run in the ...", m_nPort, REBUILDLISTENDELAYSEC); // nDelaySec);
}
m_tCloseTime = clock();
}
}
//*/
if(!m_bState)
{
Error("ACCEPT inner exception a1");
return INVALID_SOCKET;
}
// ACCEPT
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
int len = sizeof(addr);
SOCKET newsock = accept(m_sockListen, (sockaddr*)&addr, (int*)&len); // receive to the other side of the map, you can use
#ifdef PROFILE_X
// Analysis Accept speed (cycle speed)
const int nTimes2 = ACCEPTPROFILESEC; // Statistics once every 30 seconds the speed ACCEPT
static clock_t tNextTime2 = clock() + nTimes2 * CLOCKS_PER_SEC; //? Only one monitor thread, no sharing violation
static long nCount2 = 0; //? Only one monitor thread, no sharing violation
if(clock() >= tNextTime2)
{
PrintText("Each [%d] seconds to execute a [%d] times Accept ()", nTimes2, InterlockedExchange(&nCount2, 0));
tNextTime2 = clock() + nTimes2 * CLOCKS_PER_SEC;
}
else
{
InterlockedIncrement(&nCount2);
}
#endif // PROFILE
if(newsock == INVALID_SOCKET)
{
// Network Error
int err = WSAGetLastError();
if(err != WSAEWOULDBLOCK)
{
PrintText("Listen SOCKET %d failed, %s seconds after the re-SOCKET.", m_nPort, REBUILDLISTENDELAYSEC);
Error("Listen SOCKET [%d] failed [%d], [%s] seconds after the re-SOCKET.", m_nPort, err, REBUILDLISTENDELAYSEC);
Close();
}
else
Error("ACCEPT inner exception a2");
return INVALID_SOCKET;
}
else
{
nClientIP = addr.sin_addr.S_un.S_addr;
InterlockedIncrement(&s_nSocketCount);
}
// Check whether the SOCKET closed
fd_set readmask;
FD_ZERO(&readmask);
FD_SET(newsock, &readmask);
struct timeval timeout = {0, 0};
/*
char nTemp;
if(select(FD_SETSIZE, &readmask, (fd_set *) 0, (fd_set *) 0, &timeout)
&& recv(newsock, &nTemp, 1, MSG_PEEK) == 0)
{
#ifdef ALPHA_X
LOGMSG("ACCEPT a new SOCKET is invalid .");
#endif
closesocket(newsock);
InterlockedDecrement(&s_nSocketCount);
return INVALID_SOCKET;
}
//else*/
//*
fd_set exceptmask;
FD_ZERO(&exceptmask);
FD_SET(newsock, &exceptmask);
int ret = select(FD_SETSIZE, &readmask, (fd_set *) 0, (fd_set *) &exceptmask, &timeout);
if(ret < 0)
{
Error("ACCEPT a new SOCKET is invalid . can't read"); // Not trigger
closesocket(newsock);
InterlockedDecrement(&s_nSocketCount);
return INVALID_SOCKET;
}
else if(ret > 0)
{
if(FD_ISSET(newsock, &exceptmask))
{
LOGMSG("ACCEPT a new SOCKET is invalid.except"); // Not trigger
closesocket(newsock);
InterlockedDecrement(&s_nSocketCount);
return INVALID_SOCKET;
}
else if(FD_ISSET(newsock, &readmask))
{
char nTemp;
if(recv(newsock, &nTemp, 1, MSG_PEEK) == 0)
{
#ifdef ALPHA_X
LOGMSG("ACCEPT a new SOCKET is invalid. recv==0"); // Not trigger
#endif
closesocket(newsock);
InterlockedDecrement(&s_nSocketCount);
return INVALID_SOCKET;
}
}
}
//*/
#ifdef PROFILE_X
// analysis Accept speed (received valid SOCKET)
const int nTimes = ACCEPTPROFILESEC; // Statistics once every 10 seconds the speed ACCEPT
static clock_t tNextTime = clock() + nTimes * CLOCKS_PER_SEC; //? Only one monitor thread, no sharing violation
static long nCount = 0; //? Only one monitor thread, no sharing violation
if(clock() >= tNextTime)
{
LOGPROFILE("Port [%d] for every [%d] seconds, the successful implementation of the [%d] times Accept()",
m_nPort, nTimes, InterlockedExchange(&nCount, 0));
tNextTime = clock() + nTimes * CLOCKS_PER_SEC;
}
else
{
InterlockedIncrement(&nCount);
}
#endif // PROFILE
return newsock;
}
Main.cpp
#include "includes.h"
#include "IniFile.h"
#include "LOGMSGs.h"
#include "ListenSocket.h"
CListenSocket Sock(9985);
int main()
{
Sock.Open();
if(!Sock.Open())
{
Sock.Rebuild();
}
if(Sock.IsOpen())
PrintText("okey");
Sock.Socket();
u_long ip;
Sock.Accept(ip);
}
but i always got this error : ACCEPT inner exception a2 while it should work anyclue why?
CListenSocket Sock(9985);
int main()
{
Sock.Open();
if(!Sock.Open())
/* I think you meant 'IsOpen()' */
{
Sock.Rebuild();
}
if(Sock.IsOpen())
PrintText("okey");
Sock.Socket();
u_long ip;
Sock.Accept(ip);
}
Incidentally, this code sure reads funny. It feels like a generic toolkit programmed without a specific goal in mind. Maybe I'm missing it, but I have to think you'd have better results if you just wrote the network code that actually needed, and then abstract out the common bits into some helper routines later. There's no point in trying too hard to make the be-all and end-all network helper library, but there is a huge point in making tools that collapse common cases.
Feel free to ignore that last paragraph if you know what you're doing :) but if you're just starting out, I'd like to suggest writing a few smaller clients and servers, and then try writing your abstraction layer.