Related
Trying to write a server for DTLS that will currently just output the text that it receives. The working client is taken from https://github.com/stepheny/openssl-dtls-custom-bio and it sends and receives to its own server just fine.
However, when it sends to this server something strange is happening. Firstly the connection happens only sometimes, there seems to be no way to determine if the connection will start or not. Secondly, and that is even stranger the data is "delayed". One needs to send 6 messages for 1 message to arrive.
So this is the situation:
Start the server.
Start the client.
Hope for connection.
If connected type 5 messages in client to send to server, they are sent, but the server keeps having an error decoding them.
Once you send the 6th message you can note that the 1st message arrives on server.
Once you send the 7th, you will get the 2nd. Etc.
It should be noted that we are not talking about a time delay, there is no way to simply read 5 empty messages at the start of the server, the queue is empty. Only once the 6th message is sent is the queue populated with the 1st real message.
Code:
//server.cpp
#include "DTLSConnection.hpp"
#include <iostream>
#include <chrono>
#include <thread>
int main(int argc, char *argv[])
{
try
{
DTLSConnection con("192.168.31.177:1235");
std::cout << "Connection created" << std::endl;
ssize_t ret;
for(;;)
{
ret = con.recv([](Client* c) {
try{
std::cout << c->SSL_read_alt() << std::endl;
std::cout << "I am in onmessage" << std::endl;
}
catch(std::string &e)
{
std::cerr << "EXCEPTION: " << e << std::endl;
}
});
std::cout << "Returned value is " << ret << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
catch(std::string &e)
{
std::cerr << "EXCEPTION: " << e << std::endl;
}
return 0;
}
// CustomBIO.hpp
#include <memory>
#include <deque>
#include <vector>
#include <unordered_map>
#include <cstdio> // temporary
#include <cstring>
#include <cassert>
#include <openssl/ssl.h>
#include <signal.h>
const char *sdump_addr(const struct sockaddr *sa)
{
static char buf[1024];
switch (sa->sa_family)
{
case AF_INET:
memmove(buf, "INET: ", 6);
inet_ntop(AF_INET, &((struct sockaddr_in *)sa)->sin_addr, buf+6, sizeof(buf)-6);
sprintf(buf+strlen(buf), ":%d", ntohs(((struct sockaddr_in *)sa)->sin_port));
break;
case AF_INET6:
memmove(buf, "INET6: [", 8);
inet_ntop(AF_INET6, &((struct sockaddr_in6 *)sa)->sin6_addr, buf+8, sizeof(buf)-8);
sprintf(buf+strlen(buf), "]:%d", ntohs(((struct sockaddr_in6 *)sa)->sin6_port));
break;
default:
memmove(buf, "unknown", 8);
break;
}
return buf;
}
struct Packet
{
size_t capacity = 0;
size_t len = 0;
char * data = nullptr;
Packet() = default;
Packet(size_t cap)
{
init(cap);
}
Packet(char *b, size_t cap)
{
capacity=cap;
len=capacity;
data = new char[capacity];
memcpy(data, b, capacity);
}
Packet(char* b, char* e)
{
capacity=e-b;
len=capacity;
data = new char[capacity];
memcpy(data, b, capacity);
}
char* begin()
{
return data;
}
char* end()
{
return data ? data+len : nullptr;
}
void swap(Packet& that)
{
std::swap(this->capacity, that.capacity);
std::swap(this->len, that.len);
std::swap(this->data, that.data);
}
void init(size_t cap)
{
data = new char[cap];
len = 0;
capacity = cap;
}
void free()
{
if(!data) return;
delete data;
len = 0;
capacity = 0;
data = nullptr;
}
};
// used for both reading and writing
struct CustomBIO
{
using dataBuffer = Packet;
int sockfd;
sockaddr_storage thisAddr{};
socklen_t thisAddr_len{sizeof(sockaddr_storage)};
sockaddr_storage thatAddr{};
socklen_t thatAddr_len{sizeof(sockaddr_storage)};
template<typename T>
T* getThat()
{
return reinterpret_cast<T*>(&thatAddr);
}
std::deque<dataBuffer> receivingQueue{};
bool peekmode{false};
};
inline CustomBIO* BIO_get_CBIO(BIO* b)
{
return reinterpret_cast<CustomBIO *>(BIO_get_data(b));
}
extern "C"
{
int BIO_s_custom_write_ex(BIO *b, const char *data, size_t dlen, size_t *written);
int BIO_s_custom_write(BIO *b, const char *data, int dlen);
int BIO_s_custom_read_ex(BIO *b, char *data, size_t dlen, size_t *readbytes);
int BIO_s_custom_read(BIO *b, char *data, int dlen);
int BIO_s_custom_gets(BIO *b, char *data, int size);
int BIO_s_custom_puts(BIO *b, const char *data);
long BIO_s_custom_ctrl(BIO *b, int cmd, long larg, void *pargs);
int BIO_s_custom_create(BIO *b);
int BIO_s_custom_destroy(BIO *b);
// long BIO_s_custom_callback_ctrl(BIO *, int, BIO_info_cb *);
BIO_METHOD *BIO_s_custom();
void BIO_s_custom_meth_free();
int BIO_s_custom_write_ex(BIO *b, const char *data, size_t dlen, size_t *written)
{
fprintf(stderr, "BIO_s_custom_write_ex(BIO[0x%016lX], data[0x%016lX], dlen[%ld], *written[%ld])\n", (long unsigned int)b, (long unsigned int)data, dlen, *written);
fflush(stderr);
return -1;
}
int BIO_s_custom_write(BIO *b, const char *data, int dlen)
{
int ret;
CustomBIO *cbio;
ret = -1;
fprintf(stderr, "BIO_s_custom_write(BIO[0x%016lX], data[0x%016lX], dlen[%ld])\n", (unsigned long)b, (unsigned long)data, (long)dlen);
fflush(stderr);
cbio = BIO_get_CBIO(b);
// dump_addr((struct sockaddr *)&cbio->txaddr, ">> ");
// dump_hex((unsigned const char *)data, dlen, " ");
ret = sendto(cbio->sockfd, data, dlen, 0, cbio->getThat<const sockaddr>(), cbio->thatAddr_len);
if (ret >= 0)
{
fprintf(stderr, " %d bytes sent\n", ret);
}
else
{
fprintf(stderr, " ret: %d errno: [%d] %s\n", ret, errno, strerror(errno));
fprintf(stderr, " socket: %d\n", cbio->sockfd);
fprintf(stderr, " thatAddrLen: %d\n", cbio->thatAddr_len);
fprintf(stderr, " thatAddr: %s\n", sdump_addr(cbio->getThat<sockaddr>()));
}
return ret;
}
int BIO_s_custom_read_ex(BIO *b, char *data, size_t dlen, size_t *readbytes)
{
fprintf(stderr, "BIO_s_custom_read_ex(BIO[0x%016lX], data[0x%016lX], dlen[%ld], *readbytes[%ld])\n", (long unsigned int)b, (long unsigned int)data, (long int)dlen, *readbytes);
fflush(stderr);
return -1;
}
int BIO_s_custom_read(BIO *b, char *data, int dlen)
{
int ret;
CustomBIO *cbio;
ret = -1;
fprintf(stderr, "BIO_s_custom_read(BIO[0x%016lX], data[0x%016lX], dlen[%ld])\n", (long unsigned int)b, (long unsigned int)data, (long int)dlen);
fprintf(stderr, " probe peekmode %d\n", ((CustomBIO *)BIO_get_data(b))->peekmode);
fflush(stderr);
cbio = BIO_get_CBIO(b);
if(!cbio->receivingQueue.empty())
{
if(cbio->receivingQueue.front().len > (size_t)dlen)
{
fprintf(stderr, "if(cbio->receivingQueue.front().len > (size_t)dlen)");
memmove(data, cbio->receivingQueue.front().data, dlen);
ret = dlen;
if(!cbio->peekmode)
{
CustomBIO::dataBuffer rest{cbio->receivingQueue.front().begin()+ret, cbio->receivingQueue.front().end()};
cbio->receivingQueue.front().swap(rest);
}
}
else
{
Packet &pac = cbio->receivingQueue.front();
ret = pac.len;
memmove(data, pac.data, ret);
if(!cbio->peekmode)
{
pac.free();
cbio->receivingQueue.pop_front();
}
}
fprintf(stderr, " %d bytes read from queue\n", ret);
fflush(stderr);
}
else
{
fprintf(stderr, " The queue is empty\n");
/*ret = recvfrom(cbio->sockfd, data, dlen, 0, cbio->getThat<sockaddr>(), &cbio->thatAddr_len); // not right
if(ret>0 && cbio->peekmode)
{
// todo
}*/
}
return ret;
}
int BIO_s_custom_gets(BIO *b, char *data, int size)
{
fprintf(stderr, "BIO_s_custom_gets(BIO[0x%016lX], data[0x%016lX], size[%d]\n", (long unsigned int)b, (long unsigned int)data, size);
if(size <= 1)
{
return 0;
}
else
{
size = BIO_s_custom_read(b, data, size-1);
data[size] = '\0';
return size;
}
}
int BIO_s_custom_puts(BIO *b, const char *buf)
{
fprintf(stderr, "BIO_s_custom_puts(BIO[0x%016lX], buf[0x%016lX]\n", (long unsigned int)b, (long unsigned int)buf);
size_t size = std::strlen(buf);
return size > 0 ? BIO_s_custom_write(b, buf, size) : 0;
}
long BIO_s_custom_ctrl(BIO *b, int cmd, long larg, void *pargs)
{
long ret = 0;
fprintf(stderr, "BIO_s_custom_ctrl(BIO[0x%016lX], cmd[%d], larg[%ld], pargs[0x%016lX])\n", (long unsigned int)b, cmd, larg, (long unsigned int)pargs);
if(pargs)
{
for(int i=0; ; ++i)
{
fprintf(stderr, "[%d]=%X ", i, (int)((unsigned char*)pargs)[i]);
if(((unsigned char*)pargs)[i] == 0) break;
}
}
fprintf(stderr, "\n");
fflush(stderr);
switch(cmd)
{
case BIO_CTRL_FLUSH: // 11
case BIO_CTRL_DGRAM_SET_CONNECTED: // 32
case BIO_CTRL_DGRAM_SET_PEER: // 44
case BIO_CTRL_DGRAM_GET_PEER: // 46
ret = 1;
break;
case BIO_CTRL_WPENDING: // 13
ret = 0;
break;
case BIO_CTRL_DGRAM_QUERY_MTU: // 40
case BIO_CTRL_DGRAM_GET_FALLBACK_MTU: // 47
ret = 1500;
// ret = 9000; // jumbo?
break;
case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD: // 49
ret = 96; // random guess
break;
case BIO_CTRL_DGRAM_SET_PEEK_MODE: // 71
BIO_get_CBIO(b)->peekmode = (larg != 0);
ret = 1;
break;
case BIO_CTRL_PUSH: // 6
case BIO_CTRL_POP: // 7
case BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT: // 45
ret = 0;
break;
default:
fprintf(stderr, "BIO_s_custom_ctrl(BIO[0x%016lX], cmd[%d], larg[%ld], pargs[0x%016lX])\n", (long unsigned int)b, cmd, larg, (long unsigned int)pargs);
fprintf(stderr, " unknown cmd: %d\n", cmd);
fflush(stderr);
ret = 0;
break;
}
return ret;
}
int BIO_s_custom_create(BIO *b)
{
fprintf(stderr, "BIO_s_custom_create(BIO[0x%016lX])\n", (long unsigned int)b);
fflush(stderr);
return 1;
}
int BIO_s_custom_destroy(BIO *b)
{
fprintf(stderr, "BIO_s_custom_destroy(BIO[0x%016lX])\n", (long unsigned int)b);
fflush(stderr);
return 1;
}
BIO_METHOD *_BIO_s_custom = nullptr;
BIO_METHOD *BIO_s_custom()
{
if (!_BIO_s_custom)
{
_BIO_s_custom = BIO_meth_new(BIO_get_new_index()|BIO_TYPE_SOURCE_SINK, "BIO_s_custom");
//BIO_meth_set_callback_ctrl(_BIO_s_custom, BIO_s_custom_callback_ctrl);
BIO_meth_set_create(_BIO_s_custom, BIO_s_custom_create);
BIO_meth_set_ctrl(_BIO_s_custom, BIO_s_custom_ctrl);
BIO_meth_set_destroy(_BIO_s_custom, BIO_s_custom_destroy);
BIO_meth_set_gets(_BIO_s_custom, BIO_s_custom_gets);
BIO_meth_set_puts(_BIO_s_custom, BIO_s_custom_puts);
BIO_meth_set_read_ex(_BIO_s_custom, BIO_s_custom_read_ex);
BIO_meth_set_read(_BIO_s_custom, BIO_s_custom_read);
BIO_meth_set_write_ex(_BIO_s_custom, BIO_s_custom_write_ex);
BIO_meth_set_write(_BIO_s_custom, BIO_s_custom_write);
}
return _BIO_s_custom;
}
void BIO_s_custom_meth_free()
{
if (_BIO_s_custom)
BIO_meth_free(_BIO_s_custom);
_BIO_s_custom = NULL;
}
}
// DTLSConnection.hpp
#include <string>
#include <list>
#include <functional>
#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#include <sys/epoll.h>
#include <cerrno>
#include <iostream> // temp
#include "CustomBIO.hpp"
constexpr int TIME_OUT = 10000; // ms
char cookie_str[] = "BISCUIT!"; // how to change this
//нужен способ, чтобы клиент был извествен (knownclients) но еще не подключен (не прошел ssl_accept())
// (void) BIO_dgram_get_peer(SSL_get_rbio(ssl), &peer);
// see https://github.com/nplab/DTLS-Examples/blob/master/src/dtls_udp_echo.c
int generate_cookie([[maybe_unused]] SSL *ssl, unsigned char *cookie, unsigned int *cookie_len)
{
memmove(cookie, cookie_str, sizeof(cookie_str)-1);
*cookie_len = sizeof(cookie_str)-1;
return 1;
}
int verify_cookie([[maybe_unused]] SSL *ssl, const unsigned char *cookie, unsigned int cookie_len)
{
return sizeof(cookie_str)-1==cookie_len && memcmp(cookie, cookie_str, sizeof(cookie_str)-1)==0;
}
void throw_SSL_error_if_error(SSL* ssl, int ret, std::string str)
{
if(ret>0) return; // SSL_ERROR_NONE
str += " ret="+std::to_string(ret)+' ';
auto sslError = SSL_get_error(ssl, ret);
if(sslError == SSL_ERROR_SYSCALL){
throw std::string{str+"SSL_ERROR_SYSCALL + error "}+std::to_string(errno);
}
}
namespace std
{
template<> struct hash<sockaddr>
{
size_t operator()(sockaddr const& val) const noexcept
{
size_t res = 0;
for(unsigned long h : val.sa_data)
{
res = (res << 1) ^ h;
}
return res;
}
};
}
bool operator==(const sockaddr& l, const sockaddr& r)
{
if(l.sa_family != r.sa_family) return false;
for(int i=0; i<14; ++i)
{
if(l.sa_data[i] != r.sa_data[i]) return false;
}
return true;
}
class DTLSConnection;
class SSLSetterUpper
{
friend DTLSConnection;
SSL_CTX *ctx;
SSLSetterUpper()
{
int ret; // because it is C;
SSL_load_error_strings();
SSL_library_init();
const SSL_METHOD *mtd = DTLS_server_method();
ctx = SSL_CTX_new(mtd);
SSL_CTX_set_min_proto_version(ctx, DTLS1_2_VERSION);
SSL_CTX_use_certificate_chain_file(ctx, "server-cert.pem");
SSL_CTX_use_PrivateKey_file(ctx, "server-key.pem", SSL_FILETYPE_PEM);
ret = SSL_CTX_load_verify_locations(ctx, "root-ca.pem", nullptr);
if(ret != 1)
{
throw std::string{"SSL_CTX_load_verify_locations failed"};
}
ret = SSL_CTX_set_default_verify_file(ctx);
if(ret != 1)
{
throw std::string{"SSL_CTX_set_default_verify_file failed"};
}
SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, nullptr);
SSL_CTX_set_cookie_generate_cb(ctx, generate_cookie);
SSL_CTX_set_cookie_verify_cb(ctx, verify_cookie);
}
SSL* generateSSL() const
{
return SSL_new(ctx);
}
};
struct Client
{
CustomBIO cbio;
SSL *ssl;
explicit Client(SSL *ssl)
:cbio{}, ssl{ssl}
{
cbio.peekmode = false;
BIO *bio = BIO_new(BIO_s_custom());
BIO_set_data(bio, (void *)&cbio);
BIO_set_init(bio, 1);
SSL_set_bio(ssl, bio, bio);
}
bool on_init() const
{
int ret = DTLSv1_listen(this->ssl, nullptr);
//throw_SSL_error_if_error(ssl, ret, "DTLSv1_listen failed");
std::cout << "DTLSv1_listen " << ret << std::endl;
return (ret==1);
}
bool on_connect() const
{
int ret = SSL_accept(ssl);
if(ret!=1) return false;
std::cout << "ssl = " << SSL_state_string_long(ssl) << std::endl;
std::cout << "SSL_accept successful!" << std::endl;
return true;
}
std::string SSL_read_alt() const
{
Packet p(2000);
std::cout << "sizeA = " << cbio.receivingQueue.size() << std::endl;
int ret = ::SSL_read(ssl,p.data,p.capacity);
std::cout << "ret = " << ret << std::endl;
std::cout << "sizeB = " << cbio.receivingQueue.size() << std::endl;
std::cout << "pdata0 " << p.data[0] << std::endl;
//std::cout << SSL_get_error(ssl, ret) << std::endl;
throw_SSL_error_if_error(ssl, ret, "sslread");
p.len = ret;
std::cerr << "plen" << std::endl;
std::cerr << p.len << std::endl;
std::string result(p.data,p.len);
if(result[0] == '\0') std::cout << "res = " << result << std::endl;
p.free();
return result;
}
};
class DTLSConnection
{
static const SSLSetterUpper sslSetup;
int epoll_fd;
std::unordered_map<sockaddr, std::shared_ptr<Client>> knownClients;
std::unordered_map<sockaddr, std::shared_ptr<Client>> connectedClients;
std::shared_ptr<Client> incomingClient;
public:
// 127.0.0.1:1234 or [::1]:1234
explicit DTLSConnection(std::string thisAddress)
{
std::list<sockaddr_storage> addresses;
if (thisAddress[0]=='[')
{
auto pos = thisAddress.find(']', 1);
if (pos == std::string::npos)
{
throw std::string{"invalid target"};
}
int port = std::stoi(thisAddress.substr(pos+2));
if (port<1||port>65535)
{
throw std::string{"invalid port"};
}
addresses.emplace_back();
auto* thisAddr = (sockaddr_in6 *)&(addresses.back());
thisAddr->sin6_family = AF_INET6;
if ( ! inet_pton(AF_INET6, thisAddress.substr(1, pos).c_str(), &thisAddr->sin6_addr) )
{
throw std::string{"invalid ipv6 address"};
}
thisAddr->sin6_port = htons(port);
}
else
{
auto pos = thisAddress.find(':');
if (pos == std::string::npos)
{
throw std::string{"invalid target"};
}
int port = std::stoi(thisAddress.substr(pos+1));
if (port<1||port>65535)
{
throw std::string{"invalid port"};
}
addresses.emplace_back();
auto * thisAddr = (sockaddr_in *)&(addresses.back());
thisAddr->sin_family = AF_INET;
if ( ! inet_pton(AF_INET, thisAddress.substr(0, pos).c_str(), &thisAddr->sin_addr) )
{
throw std::string{"invalid ipv4 address"};
}
thisAddr->sin_port = htons(port);
}
epoll_fd = epoll_create1(EPOLL_CLOEXEC);
for(auto &address : addresses)
{
epoll_event epe {};
epe.data.fd = socket(address.ss_family, SOCK_DGRAM/*|SOCK_NONBLOCK*/|SOCK_CLOEXEC, 0);
if(bind(epe.data.fd, (const sockaddr*)&address, sizeof(address)) != 0)
{
throw std::string{"failed to bind"};
}
epe.events = EPOLLIN|EPOLLET;
epoll_ctl(epoll_fd, EPOLL_CTL_ADD, epe.data.fd, &epe);
}
//signal(SIGINT, signal_handler); // do i need this?
incomingClient = std::make_shared<Client>(sslSetup.generateSSL());
}
ssize_t recv(const std::function<void(Client *)>& onmessage)
{
std::cout << "recv(onmessage)" << std::endl;
epoll_event epe{};
int ret;
ret = epoll_wait(epoll_fd, &epe, 1, TIME_OUT);
if (ret==-1)
{
throw std::string{"epoll_wait failed"};
}
if(ret==0)
{
return ret; // wait longer
}
Packet packet{2000};
ret = recvfrom(epe.data.fd, packet.data, packet.capacity, 0, incomingClient->cbio.getThat<sockaddr>(), &incomingClient->cbio.thatAddr_len);
packet.len = ret;
if(ret==0) return ret;
if(ret<0)
{
switch(errno)
{
case EAGAIN:
//case EWOULDBLOCK:
return ret;
case EBADF:
throw std::string{"EBADF"};
case ECONNREFUSED:
throw std::string{"ECONNREFUSED"};
case EFAULT:
throw std::string{"EFAULT"};
case EINTR:
throw std::string{"EINTR"};
case EINVAL:
throw std::string{"EINVAL"};
case ENOMEM:
throw std::string{"ENOMEM"};
case ENOTCONN:
throw std::string{"ENOTCONN"};
case ENOTSOCK:
throw std::string{"ENOTSOCK"};
default:
throw std::string{"Unknwon errno with negative return from recvfrom: "}+std::to_string(errno);
}
}
auto known = knownClients.find(*incomingClient->cbio.getThat<sockaddr>());
auto connected = connectedClients.find(*incomingClient->cbio.getThat<sockaddr>());
std::cout << "START" << std::endl;
for(auto &pair : knownClients)
{
std::cout << sdump_addr(&pair.first) << std::endl;
}
std::cout << "END" << std::endl;
if(known == knownClients.end())
{
std::cout << "inetaddr = " << sdump_addr(incomingClient->cbio.getThat<sockaddr>()) << std::endl;
ret = 0;
incomingClient->cbio.receivingQueue.push_back(std::move(packet));
incomingClient->cbio.sockfd = epe.data.fd;
if( incomingClient->on_init() )
{
std::cout << "inc = " << incomingClient->on_connect() << std::endl; //скорее всего, всегда будет ложь
std::cout << "on_init if" << std::endl;
knownClients[*incomingClient->cbio.getThat<sockaddr>()] = incomingClient;
incomingClient = std::make_shared<Client>(sslSetup.generateSSL());
}
}
else if(connected == connectedClients.end())
{
std::cout << "elseif" << std::endl;
ret = 0;
auto cli = known->second;
cli->cbio.receivingQueue.push_back(std::move(packet));
if( cli->on_connect() )
{
std::cout << "cli->cbio.receivingQueue.size()" << cli->cbio.receivingQueue.size() << std::endl;
connectedClients[*cli->cbio.getThat<sockaddr>()] = cli;
SSL_write(cli->ssl, "hello", 6);
}
}
else
{
std::cout << "else" << std::endl;
std::cout << sdump_addr(incomingClient->cbio.getThat<sockaddr>()) << " has been found as connected" << std::endl;
connected->second->cbio.receivingQueue.push_back(std::move(packet));
onmessage(connected->second.get());
}
return ret;
}
};
const SSLSetterUpper DTLSConnection::sslSetup{};
The output from running the server (CustomBIO output truncated) is:
Connection created
recv(onmessage)
START
END
inetaddr = INET: 192.168.31.177:58897
probe peekmode 0
211 bytes read from queue
36 bytes sent
probe peekmode 0
The queue is empty
DTLSv1_listen -1
Returned value is 0
recv(onmessage)
START
END
inetaddr = INET: 192.168.31.177:58897
probe peekmode 0
219 bytes read from queue
DTLSv1_listen 1
1180 bytes sent
probe peekmode 0
The queue is empty
inc = 0
on_init if
Returned value is 0
recv(onmessage)
START
INET: 192.168.31.177:58897
END
elseif
86 bytes sent
823 bytes sent
167 bytes sent
79 bytes sent
25 bytes sent
probe peekmode 0
219 bytes read from queue
probe peekmode 0
The queue is empty
Returned value is 0
recv(onmessage)
START
INET: 192.168.31.177:58897
END
elseif
probe peekmode 0
1088 bytes read from queue
618 bytes sent
ssl = SSL negotiation finished successfully
SSL_accept successful!
cli->cbio.receivingQueue.size()0
43 bytes sent
Returned value is 0
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
824 bytes read from queue
probe peekmode 0
The queue is empty
ret = -1
sizeB = 0
pdata0 �
EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0
Returned value is 824
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
58 bytes read from queue
probe peekmode 0
The queue is empty
ret = -1
sizeB = 0
pdata0 `
EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0
Returned value is 58
recv(onmessage)
Returned value is 0
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
131 bytes read from queue
probe peekmode 0
The queue is empty
ret = -1
sizeB = 0
pdata0
EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0
Returned value is 131
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
14 bytes read from queue
probe peekmode 0
The queue is empty
ret = -1
sizeB = 0
pdata0 �
EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0
Returned value is 14
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
The queue is empty
ret = -1
sizeB = 0
pdata0
EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0
Returned value is 61
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
55 bytes read from queue
ret = 18
sizeB = 0
pdata0 d
plen
18
dfksaiopfjiaosjfio
I am in onmessage
Returned value is 55
recv(onmessage)
Returned value is 0
recv(onmessage)
START
INET: 192.168.31.177:58897
END
else
INET: 192.168.31.177:58897 has been found as connected
sizeA = 1
probe peekmode 0
46 bytes read from queue
ret = 9
sizeB = 0
pdata0 s
plen
9
sdasdasda
I am in onmessage
Returned value is 46
recv(onmessage)
Returned value is 0
recv(onmessage)
Where you see EXCEPTION: sslread ret=-1 SSL_ERROR_SYSCALL + error 0 that means that SSL_read has returned -1. Sorry for somewhat dirty code.
In case somebody else will have a similar issue. The problem was that the wait between calling server's recv function was 1 second. In that time client thought that server has not responded and began doing weird things. Lowering the delay solved the problem.
client readline write to shared memory.and send a msg to server.
server get msg and read from shared memrory.
But the server cannot output correctly,
The server did not output anything,I do not know why.
the man pages says that:
If no message of the requested type is available and IPC_NOWAIT isn't specified in msgflg, the calling process is blocked until one of the following conditions occurs
but the server is always blocked.
I use gdb to debug it,find out that std::cout does not work
debug context
Breakpoint 1, main () at shared_mem_server.cpp:35
35 sem_init(reinterpret_cast<sem_t*>(shm),0,1);
(gdb) p shm
$1 = 0x7ffff7ff6000 ""
(gdb) x/10w 0x7ffff7ff6000
0x7ffff7ff6000: 0 0 0 0
0x7ffff7ff6010: 0 0 0 0
0x7ffff7ff6020: 0 0
(gdb) s
__new_sem_init (sem=0x7ffff7ff6000, pshared=0, value=1) at sem_init.c:31
31 sem_init.c: No such file or directory.
(gdb) return
Make __new_sem_init return now? (y or n) n
Not confirmed
(gdb) finish
Run till exit from #0 __new_sem_init (sem=0x7ffff7ff6000, pshared=0, value=1)
at sem_init.c:31
main () at shared_mem_server.cpp:37
37 msgrcv(msgid,&msg,256,ret_type,0);
Value returned is $2 = 0
(gdb) x/10w 0x7ffff7ff6000
0x7ffff7ff6000: 1 0 0 0
0x7ffff7ff6010: 0 0 0 0
0x7ffff7ff6020: 0 0
(gdb) p sem_sz
$3 = 32
(gdb) n
38 sem_p(reinterpret_cast<sem_t*>(shm));
(gdb) n
39 if(shm + sem_sz == "q")
(gdb) x/10w 0x7ffff7ff6000
0x7ffff7ff6000: 0 0 0 0
0x7ffff7ff6010: 0 0 0 0
0x7ffff7ff6020: 3355185 0
(gdb) x/12w 0x7ffff7ff6000
0x7ffff7ff6000: 0 0 0 0
0x7ffff7ff6010: 0 0 0 0
0x7ffff7ff6020: 3355185 0 0 0
(gdb) n
41 std::cout << "shared memory " << shm + sem_sz;
(gdb) n
42 sem_v(reinterpret_cast<sem_t*>(shm));
(gdb) q
Below is the code
server code:
#include <iostream>
#include <sys/shm.h>
#include <sys/msg.h>
#include "error.h"
#include "sempv.h"
const int SHM_SIZE=1024;
struct msg_form{
long msg_type;
char msg_text[256];
};
int main(){
key_t key;
int shmid,msgid,ret_type = 888,sem_sz = sizeof(sem_t);
char *shm;
msg_form msg;
if((key = ftok(".",'v')) < 0)
unix_error("ftok error");
if((shmid = shmget(key,SHM_SIZE,IPC_CREAT|0666)) == -1)
unix_error("create shared memory error");
if((shm = (char*)shmat(shmid,0,0)) == (void*)-1){
unix_error("attach shared memeory error");
}
if((msgid = msgget(key,IPC_CREAT|07777)) == -1)
unix_error("msgget error");
sem_init(reinterpret_cast<sem_t*>(shm),0,1);
while(true){
msgrcv(msgid,&msg,256,ret_type,0);
sem_p(reinterpret_cast<sem_t*>(shm));
if(shm + sem_sz == "q")
break;
std::cout << "shared memory " << shm + sem_sz;
sem_v(reinterpret_cast<sem_t*>(shm));
}
shmdt(shm);
shmctl(shmid,IPC_RMID,0);
shmctl(msgid,IPC_RMID,0);
return 0;
}
client code
#include <iostream>
#include <sys/shm.h>
#include <sys/msg.h>
#include "error.h"
#include "sempv.h"
#include <string>
using std::string;
const int SHM_SIZE=1024;
struct msg_form{
long msg_type;
char msg_text[256];
};
int main(){
key_t key;
int shmid,msgid,sem_sz = sizeof(sem_t);
char *shm;
int err;
msg_form msg;
string s;
if((key = ftok(".",'v')) < 0)
unix_error("ftok error");
if((shmid = shmget(key,SHM_SIZE,0)) == -1)
unix_error("shmget error");
if((shm = (char*)shmat(shmid,0,0)) == (void*)-1){
unix_error("attach shared memeory error");
}
if((msgid = msgget(key,0777)) == -1)
unix_error("msgget error");
std::cout << "key is " << key << std::endl;
while(getline(std::cin,s)){
sem_p(reinterpret_cast<sem_t*>(shm));
memset(shm+sem_sz,0,SHM_SIZE-sem_sz);
memcpy(shm+sem_sz,s.c_str(),s.size());
msg.msg_type = 888;
sprintf(msg.msg_text,"shared memory write signal");
if((err = msgsnd(msgid,&msg,sizeof(msg.msg_text),0)) == -1)
unix_error("msgsnd error");
sem_v(reinterpret_cast<sem_t*>(shm));
//std::cout << "message send\n";
}
return 0;
}
General remarks:
System V IPC are deprecated, for any new project, it is advised to use the POSIX counterparts (man 7 shm_overview and man 7 mq_overview)
The client/server synchronization is weak: the client may overwrite the shared memory segment while the server is reading it. You should use a mutex to read/write into the shared memory segment (when one is reading/writing the other is blocked): cf. man 7 sem_overview
As you don't make any cleanup in the server, make sure to remove the queue and shared memory identifiers with ipcs/ipcrm under the shell between each tries of your application
In the server:
The error checking is wrong: if shmget()/msgget return -1 and errno is equal to EEXIST, you continue but you didn't get any shm/msg identifier as it is -1!
In the client:
For the sake of robustness, use preferably snprintf() instead of sprintf() to force the check of the bounds of the buffer
The client should not use IPC_CREAT for msgget(). Generally, it is the role of server to create the resources in a server/client application
Here is the C version of your modified code:
Server:
#include <stdio.h>
#include <sys/shm.h>
#include <sys/msg.h>
#include <stdlib.h>
#include <semaphore.h>
const int SHM_SIZE=1024;
struct msg_form{
long msg_type;
char msg_text[256];
};
static void unix_error(const char *str)
{
fprintf(stderr, "%s\n", str);
exit(1);
}
int main(){
key_t key;
int shmid,msgid,ret_type = 888,sem_sz = sizeof(sem_t);
char *shm;
struct msg_form msg;
if((key = ftok(".",'v')) < 0)
unix_error("ftok error");
if((shmid = shmget(key,SHM_SIZE,IPC_CREAT|0666)) == -1)
unix_error("create shared memory error");
if((shm = (char *)shmat(shmid,0,0)) == (void*)-1){
unix_error("attach shared memory error");
}
if((msgid = msgget(key,IPC_CREAT|07777)) == -1)
unix_error("msgget error");
sem_init((sem_t *)(shm),1,1);
while(1){
msgrcv(msgid,&msg,256,ret_type,0);
sem_wait((sem_t *)(shm));
if (*(shm + sem_sz) == 'q' && *(shm + sem_sz + 1) == '\n') {
sem_post((sem_t *)(shm));
break;
}
printf("shared memory: %s", shm + sem_sz);
sem_post((sem_t *)(shm));
}
shmdt(shm);
shmctl(shmid,IPC_RMID,0);
msgctl(msgid,IPC_RMID,0);
return 0;
}
Client:
#include <stdio.h>
#include <sys/shm.h>
#include <sys/msg.h>
#include <stdlib.h>
#include <semaphore.h>
#include <string.h>
const int SHM_SIZE=1024;
struct msg_form{
long msg_type;
char msg_text[256];
};
static void unix_error(const char *str)
{
fprintf(stderr, "%s\n", str);
exit(1);
}
int main(){
key_t key;
int shmid,msgid,sem_sz = sizeof(sem_t);
char *shm;
int err;
struct msg_form msg;
char s[256];
if((key = ftok(".",'v')) < 0)
unix_error("ftok error");
if((shmid = shmget(key,SHM_SIZE,0)) == -1)
unix_error("shmget error");
if((shm = (char*)shmat(shmid,0,0)) == (void*)-1){
unix_error("attach shared memeory error");
}
if((msgid = msgget(key,0777)) == -1)
unix_error("msgget error");
printf("key is 0x%x\n", (int)key);
while(fgets(s, sizeof(s) - 1, stdin)) {
sem_wait((sem_t *)(shm));
memset(shm+sem_sz,0,SHM_SIZE-sem_sz);
memcpy(shm+sem_sz,s, strlen(s));
msg.msg_type = 888;
snprintf(msg.msg_text, 256, "shared memory write signal");
if((err = msgsnd(msgid,&msg,sizeof(msg.msg_text),0)) == -1) {
sem_post((sem_t *)(shm));
unix_error("msgsnd error");
}
sem_post((sem_t *)(shm));
}
return 0;
}
By gcc docs: x86-transactional-memory-intrinsics.html, when transaction failed/abort, _xbegin() should return a abort status . However, I find it return 0 sometimes. And the frequency is very high. What kind of situation that **_xbegin()**will return 0?
After checking manual, I find many situations may cause this result. For example, CPUID, SYSTEMCALL, CFLUSH.etc. However, I don't think my code has triggered any of them.
Here is my code: Simulating a small bank, a random account transfer 1$ to another account.
#include "immintrin.h"
#include <thread>
#include <unistd.h>
#include <iostream>
using namespace std;
#define n_threads 1
#define OPSIZE 1000000000
typedef struct Account{
long balance;
long number;
} __attribute__((aligned(64))) account_t;
typedef struct Bank{
account_t* accounts;
long size;
} bank_t;
bool done = 0;
long *tx, *_abort, *capacity, *debug, *failed, *conflict, *zero;
void* f1(bank_t* bank, int id){
for(int i=0; i<OPSIZE; i++){
int src = rand()%bank->size;
int dst = rand()%bank->size;
while(src == dst){
dst = rand()%bank->size;
}
while(true){
unsigned stat = _xbegin();
if(stat == _XBEGIN_STARTED){
bank->accounts[src].balance++;
bank->accounts[dst].balance--;
_xend();
asm volatile("":::"memory");
tx[id]++;
break;
}else{
_abort[id]++;
if (stat == 0){
zero[id]++;
}
if (stat & _XABORT_CONFLICT){
conflict[id]++;
}
if (stat & _XABORT_CAPACITY){
capacity[id]++;
}
if (stat & _XABORT_DEBUG){
debug[id]++;
}
if ((stat & _XABORT_RETRY) == 0){
failed[id]++;
break;
}
if (stat & _XABORT_NESTED){
printf("[ PANIC ] _XABORT_NESTED\n");
exit(-1);
}
if (stat & _XABORT_EXPLICIT){
printf("[ panic ] _XBEGIN_EXPLICIT\n");
exit(-1);
}
}
}
}
return NULL;
}
void* f2(bank_t* bank){
printf("_heartbeat function\n");
long last_txs=0, last_aborts=0, last_capacities=0, last_debugs=0, last_faileds=0, last_conflicts=0, last_zeros = 0;
long txs=0, aborts=0, capacities=0, debugs=0, faileds=0, conflicts=0, zeros = 0;
while(1){
last_txs = txs;
last_aborts = aborts;
last_capacities = capacities;
last_debugs = debugs;
last_conflicts = conflicts;
last_faileds = faileds;
last_zeros = zeros;
txs=aborts=capacities=debugs=faileds=conflicts=zeros = 0;
for(int i=0; i<n_threads; i++){
txs += tx[i];
aborts += _abort[i];
faileds += failed[i];
capacities += capacity[i];
debugs += debug[i];
conflicts += conflict[i];
zeros += zero[i];
}
printf("txs\t%ld\taborts\t\t%ld\tfaileds\t%ld\tcapacities\t%ld\tdebugs\t%ld\tconflit\t%ld\tzero\t%ld\n",
txs - last_txs, aborts - last_aborts , faileds - last_faileds,
capacities- last_capacities, debugs - last_debugs, conflicts - last_conflicts,
zeros- last_zeros);
sleep(1);
}
}
int main(int argc, char** argv){
int accounts = 10240;
bank_t* bank = new bank_t;
bank->accounts = new account_t[accounts];
bank->size = accounts;
for(int i=0; i<accounts; i++){
bank->accounts[i].number = i;
bank->accounts[i].balance = 0;
}
thread* pid[n_threads];
tx = new long[n_threads];
_abort = new long[n_threads];
capacity = new long[n_threads];
debug = new long[n_threads];
failed = new long[n_threads];
conflict = new long[n_threads];
zero = new long[n_threads];
thread* _heartbeat = new thread(f2, bank);
for(int i=0; i<n_threads; i++){
tx[i] = _abort[i] = capacity[i] = debug[i] = failed[i] = conflict[i] = zero[i] = 0;
pid[i] = new thread(f1, bank, i);
}
// sleep(5);
for(int i=0; i<n_threads;i++){
pid[i]->join();
}
return 0;
}
Supplements:
All accounts is 64bit aligned. I printed bank->accounts[0], bank->accounts1 address. 0xf41080,0xf410c0。
Using -O0 and asm volatile("":::"memory");therefore there is no instruction reordering problems.
Abort rate increases at time. Here is the result
txs 84 aborts 0 faileds 0 capacities 0 debugs 0 conflit 0 zero 0
txs 17070804 aborts 71 faileds 68 capacities 9 debugs 0 conflit 3 zero 59
txs 58838 aborts 9516662 faileds 9516661 capacities 0 debugs 0 conflit 1 zero 9516661
txs 0 aborts 9550428 faileds 9550428 capacities 0 debugs 0 conflit 0 zero 9550428
txs 0 aborts 9549254 faileds 9549254 capacities 0 debugs 0 conflit 0 zero 9549254
Even through n_threads is 1, the result is same.
If I add coarse lock after fallback as follow, the result seems be correct.
int fallback_lock;
bool
rtm_begin(int id)
{
while(true) {
unsigned stat;
stat = _xbegin ();
if(stat == _XBEGIN_STARTED) {
return true;
} else {
_abort[id]++;
if (stat == 0){
zero[id]++;
}
//call some fallback function
if (stat& _XABORT_CONFLICT){
conflict[id]++;
}
//will not succeed on a retry
if ((stat & _XABORT_RETRY) == 0) {
failed[id]++;
//grab a fallback lock
while (!__sync_bool_compare_and_swap(&fallback_lock,0,1)) {
}
return false;
}
}
}
}
....
in_rtm = rtm_begin(id);
y = fallback_lock;
accounts[src].balance--;
accounts[dst].balance++;
if (in_rtm){
_xend();
}else{
while(!__sync_bool_compare_and_swap(&fallback_lock, 1, 0)){
}
}
The hardware documentation on RTM suggests the following:
The value of EAX can be '0' following an RTM abort. For example, a CPUID instruction when used inside an RTM region causes a transactional abort and may not satisfy the requirements for setting any of the EAX bits. This may result in an EAX value of '0'.
(Where, EAX is the hardware register used to communicate status, that GCC will in turn return to you as the return value of )
I have the following source compiled by g++ 4.8.2 :
g++ --std=c++11 testx.cpp -pthread -lrt -O2 -o testx.exe
g++ --std=c++11 testx.cpp -pthread -lrt -o testy.exe
layout.h :
#pragma once
typedef struct DBInfo_
{
volatile int seqno ;
char groupname[32] ;
char action[32] ;
int booklayer ;
} DBInfo ;
#define DBARRAYSIZE 100
static DBInfo *conf;
#define STATE_FILE "/strategy2/test.shared"
testx.cpp :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/file.h>
#include <ctype.h>
#include <pthread.h>
#include <sys/stat.h>
#include <assert.h>
#include "layout.h"
int g_DBSharedMemIdx = 0 ;
volatile int iGlbErrorSeqLock = 0 ;
void create_shmem(void)
{
int shm_fd;
if((shm_fd = shm_open(STATE_FILE, (O_CREAT | O_EXCL | O_RDWR),
(S_IREAD | S_IWRITE))) > 0 ) {
printf("O_CREAT | O_EXCL | O_RDWR \n"); /* We are the first instance */
}
else if((shm_fd = shm_open(STATE_FILE, (O_CREAT | O_RDWR),
(S_IREAD | S_IWRITE))) < 0)
{
printf("Could not create shm object. \n");
exit( 0 ) ;
}
int iTotalByte = sizeof(DBInfo)*DBARRAYSIZE ;
ftruncate(shm_fd, iTotalByte );
conf = (DBInfo*) mmap(0, iTotalByte , (PROT_READ | PROT_WRITE), MAP_SHARED, shm_fd, 0) ;
if(conf == MAP_FAILED)
{
printf(" mmap error ....\n") ;
exit( 0 ) ;
}
g_DBSharedMemIdx = 0 ;
(conf+g_DBSharedMemIdx)->seqno = 0 ;
strcpy( (conf+g_DBSharedMemIdx)->groupname,"" ) ;
strcpy( (conf+g_DBSharedMemIdx)->action,"" ) ;
(conf+g_DBSharedMemIdx)->booklayer = 0 ;
}//create_shmem
int getDBInfo(DBInfo& pDBInfo)
{
if( pDBInfo.seqno == (conf+g_DBSharedMemIdx)->seqno)
return 0 ;
volatile int ilocalseqno1 = 0 , ilocalseqno2 = 0 ;
volatile int icnt=0;
while( 1 ){
icnt++ ;
if( icnt >= 10000 ){
iGlbErrorSeqLock = 1 ;
break ;
}
ilocalseqno1 = (conf+g_DBSharedMemIdx)->seqno ;
if( ilocalseqno1 % 2 ){
printf("***************************************************************************** \n");
continue;
}
strcpy( pDBInfo.action,(conf+g_DBSharedMemIdx)->action ) ;
pDBInfo.booklayer = (conf+g_DBSharedMemIdx)->booklayer ;
ilocalseqno2 = (conf+g_DBSharedMemIdx)->seqno ;
if( ilocalseqno1 != ilocalseqno2 ){
printf("***************************************************************************** \n");
continue;
}
pDBInfo.seqno = ilocalseqno2 ;
break ;
} //while
if( iGlbErrorSeqLock == 1 )
return -1 ;
return 1 ;
}//getDBInfo
void *ThreadONE(void *param)
{
printf("TestThread...(%p)\n",param) ;
pthread_detach(pthread_self());
while( 1 ){
(conf+g_DBSharedMemIdx)->seqno++ ;
strcpy( (conf+g_DBSharedMemIdx)->groupname,"group1" ) ;
strcpy( (conf+g_DBSharedMemIdx)->action,"RUN" ) ;
(conf+g_DBSharedMemIdx)->booklayer = 3 ;
if( (conf+g_DBSharedMemIdx)->seqno % 2 == 1 )
(conf+g_DBSharedMemIdx)->seqno++ ;
usleep( 1 ) ;
}//while
}//ThreadONE
void *ThreadTWO(void *param)
{
DBInfo pDBInfo ;
printf("TestThread...(%p)\n",param) ;
pthread_detach(pthread_self());
int icnt = 0 ;
while( 1 ){
int iret = getDBInfo(pDBInfo);
if( iret < 0 ){
printf("iGlbErrorSeqLock happened \n") ;
assert( iGlbErrorSeqLock == 100 );
}else if(iret == 1){
icnt++ ;
//printf("icnt=(%d)\n",icnt);
}
usleep( 1 ) ;
}//while
}//ThreadTWO
int main(int argc, char** argv)
{
create_shmem();
sleep( 1 ) ;
pthread_t tid ;
pthread_create(&tid , NULL, ThreadONE, (void*)(long)3);
pthread_create(&tid , NULL, ThreadTWO, (void*)(long)3);
while( 1 )
sleep( 5 ) ;
} //main
testy.exe(compiled without -O2) run will see "**************" 2 times per second ,
testx.exe(compiled with -O2) run will see "***********" not quite easy .
I have this test because in my original source , sometimes call function
like getDBInfo will be trapped in endless loop if compiled with -O2,
and it will be fine without -O2 , so I am curious what compiler do in
getDBInfo function and what should I do according to it .
I've been looking at this for a few hours. I've tried everything I can think of, and frankly It doesn't make sense. I actively send and receive with the socket with no problems, but as soon as I change the data to a different message, same style, it stops recieving. I'm using TCP. I have a manager process send up to N router messages with table data. I later send a packet, same style, it receive it, and then stops receiving.... The code gets back to the top of the loop, but just doesn't get any more data.
Oh the networking code I'm using is a copy and paste of beejs TCP server client code. http://beej.us/guide/bgnet/output/html/multipage/clientserver.html
Manager thread, this part works
for(vector< vector<int> >::iterator it = table.begin(); it!=table.end(); ++it ){
vector< int > d = *it;
for(vector<int>::iterator itA = d.begin(); itA!=d.end(); ++itA ){
cout << "Sending... "<< *itA << endl;
s <<*itA<<" ";
}
if (send(new_fd, s.str().c_str(), 13, 0) == -1)
perror("Serv:send");
sleep(2);
logs << "Sent to router " << i <<":\n" << s.str();
writeLog(logs.str().c_str());
s.str("");
logs.str("");
}
s<<"done";
if (send(new_fd, s.str().c_str(), 13, 0) == -1)
perror("Serv:send");
writeLog(s.str().c_str());
manage 2, where only the first message gets through
for(vector <vector <int > >::iterator it = toSendPackets.begin(); it != toSendPackets.end(); ++it){
sleep(3);
vector<int> tsp = *it;
int a,b,c = 0;
for(vector<int>::iterator itr = tsp.begin(); itr != tsp.end(); ++itr){
if(c==0){
a = *itr;
}
if(c==1){
b = *itr;
}
c++;
}
ss.str("");
ss << a << " " << b;
for(int i = 0; i < numN; i++){
int curSoc = socketList[i];
stringstream sl;
sl<<"sent:"<< ss.str().c_str();
cout << "sending.. " << ss.str() << " to " << i << endl;
if (send(curSoc, "HOP", strlen("HOP")+1, 0) == -1)
perror("Serv:send");
sleep(2);
if (send(curSoc, ss.str().c_str(), strlen(ss.str().c_str())+1, 0) == -1)
perror("Serv:send");
writeLog(sl.str().c_str());
sleep(1);
}
}
Router code.
The manager code above and manager code 2 both send to this part of the code.
It gets the first send, in this case "HOP" and then nothing? I removed the HOP packet parsing, so it litterally should only state that something was read.
if(tid == 0){// TCP
stringstream s;
bool proc = true;
while(!doneFlag){
proc = true;
cout << "TCP RECEIVING... " << endl;
int numbytes = 0;
while(numbytes==0){
if ((numbytes = recv(sockfd, buf, MAXDATASIZE, 0)) == -1) {
perror("recvROUTERThread0");
exit(1);
}
}
buf[numbytes] = '\0';
numbytes = 0;
if(strcmp("Quit",buf)==0){
writeLog("Quit read",outName);
doneFlag = true;
close(net.sockfd);
floodUDP("Quit");
pthread_exit(NULL);
}
else if(strcmp("HOP",buf)==0){
cout << "HOP READ" << endl;
numbytes = 0;
while(numbytes==0){
if ((numbytes = recv(sockfd, buf, MAXDATASIZE, 0)) == -1) {
perror("recvROUTERThread0");
exit(1);
}
}
s << id << "R: Receiving a routing command! " << buf;
cout << s.str().c_str() << endl;
writeLog(s.str().c_str(),outName);
HOPpacket hpo = genHopOrig(s.str().c_str());
if(hpo.s == atoi(id)){
printHOP(hpo);
// cout << "PACKET " << pr << endl;
stringstream sl;
char* hop = generateHopPacket(hpo);
sl << "Generating HOP packet and sending.. " << hop;
writeLog(sl.str().c_str(),outName);
sendHOP(hop);
}
}
else{
cout << "Table row data from manager" << endl;
s.str("");
s << id << "R: MANAGER MESSAGE: " << buf << endl;
cout << s.str() << endl;
writeLog(s.str().c_str(),outName);
int intID = atoi(id);
vector <int> tr = processTR(buf,intID,basePN);
table.push_back(tr);
}
}
}
My output. In this case there are 10 routers running. Note I didn't change my prints to state that it was sending HOP then 0 5 ..
sending.. 0 5 to 0
HOP READ
WRITTING Manager log:12-11-23::4:6:26:
sent:0 5
sending.. 0 5 to 1
HOP READ
WRITTING Manager log:12-11-23::4:6:29:
sent:0 5
sending.. 0 5 to 2
HOP READ
WRITTING Manager log:12-11-23::4:6:32:
sent:0 5
sending.. 0 5 to 3
HOP READ
WRITTING Manager log:12-11-23::4:6:35:
sent:0 5
sending.. 0 5 to 4
HOP READ
WRITTING Manager log:12-11-23::4:6:38:
sent:0 5
sending.. 0 5 to 5
HOP READ
WRITTING Manager log:12-11-23::4:6:41:
sent:0 5
sending.. 0 5 to 6
HOP READ
WRITTING Manager log:12-11-23::4:6:44:
sent:0 5
sending.. 0 5 to 7
HOP READ
WRITTING Manager log:12-11-23::4:6:47:
sent:0 5
sending.. 0 5 to 8
HOP READ
WRITTING Manager log:12-11-23::4:6:50:
sent:0 5
sending.. 0 5 to 9
HOP READ
WRITTING Manager log:12-11-23::4:6:53:
sent:0 5
sending.. 3 9 to 0
WRITTING Manager log:12-11-23::4:6:59:
sent:3 9
sending.. 3 9 to 1
WRITTING Manager log:12-11-23::4:7:2:
sent:3 9
sending.. 3 9 to 2
WRITTING Manager log:12-11-23::4:7:5:
sent:3 9
sending.. 3 9 to 3
WRITTING Manager log:12-11-23::4:7:8:
sent:3 9
sending.. 3 9 to 4
WRITTING Manager log:12-11-23::4:7:11:
sent:3 9
sending.. 3 9 to 5
WRITTING Manager log:12-11-23::4:7:14:
sent:3 9
sending.. 3 9 to 6
WRITTING Manager log:12-11-23::4:7:17:
sent:3 9
sending.. 3 9 to 7
WRITTING Manager log:12-11-23::4:7:20:
sent:3 9
sending.. 3 9 to 8
WRITTING Manager log:12-11-23::4:7:23:
sent:3 9
sending.. 3 9 to 9
WRITTING Manager log:12-11-23::4:7:26:
sent:3 9
There is a problem when you recv data, TCP is a stream based socket not a message based one, so if you use:
send( sock, buf1, len1, 0 ); // Send HOP, since it is small, you OS merge this
send( sock, buf2, len2, 0 ); // with next send!
and then try to receive data using recv it is not guaranteed that you receive data in 2 separate calls to recv, so you may receive both sent buffers in one call to recv:
recv( sock, buf, len, 0 ); // This may receive both buffers in one call
so your next call to recv will be blocked for data that already received in first call! Also they may be another problem for when you send large buffer, then recv may receive less data than a single message passed using send.
You must define a protocol that define end of message in the received stream and then receive your data according to that protocol. for example, you may first send length of message or define something that indicate end of it(for example \0 or \r\n).
Sorry for my incomplete description of the error. In your comment you say that you have increased the HOP message size! But it certainly isn't a good practice, also increased size is so small that never force OS to send it immediately( actually there is no certain size that force OS do that ). If you want OS to send your data immediately, you should disable Nagle algorithm using TCP_NO_DELAY option, but before doing that take a look at How do I use TCP_NODELAY?. Doing this is not a good practice either and beside that while doing this cause your packet sent immediately as you call send but it never force OS on receiver side to receive messages separately!! so what is the correct way of doing this?
I explain the problem in detail:
// I don't know exact value of MAXDATASIZE but I will assume it is 128
char buf[ MAXDATASIZE ];
int numbytes = recv( sock, buf, MAXDATASIZE, 0 );
if( numbyte == -1 ) {
// Handle error
}
// I assume HOP_MSG is a defined constant that contain value of HOP message
if( strcmp(buf, HOP_MSG) == 0 ) { // <-- (1)
while( (numbytes = recv(sock, buf, MAXDATASIZE, 0)) != -1 ) { // <-- (2)
if( numbytes == 0 ) break;
}
if( numbytes == -1 ) {
// Handle error
}
}
But wait! in line that marked with (1) I assumed recv read HOP_MSG completely and only HOP_MSG, but why?? As I said before TCP is a stream protocol and there is no message boundary in it, so it may read only 2 bytes!! or it read 1KB( that is certainly more than HOP_MSG, so what should I do??
The working answer is something like follow:
int receive_till_zero( SOCKET sock, char* tmpbuf, int& numbytes ) {
int i = 0;
do {
// Check if we have a complete message
for( ; i < numbytes; i++ ) {
if( buf[i] == '\0' ) {
// \0 indicate end of message! so we are done
return i + 1; // return length of message
}
}
int n = recv( sock, buf + numbytes, MAXDATASIZE - numbytes, 0 );
if( n == -1 ) {
return -1; // operation failed!
}
numbytes += n;
} while( true );
}
void remove_message_from_buffer( char* buf, int& numbytes, int msglen ) {
// remove complete message from the buffer.
memmove( buf, buf + msglen, numbytes - msglen );
numbytes -= msglen;
}
void main() {
SOCKET s;
char buf[ MAXDATASIZE ];
int numbytes = 0, msglen;
// Initialize socket and connect to server, you already do that
while( true ) {
msglen = receive_till_zero( s, buf, numbytes );
if( msglen == -1 ) {/* Handle error */}
if( !strcmp(buf, HOP_MSG) ) {
remove_message_from_buffer( buf, numbytes, msglen );
msglen = receive_till_zero( s, buf, numbytes );
if( msglen == -1 ) {/* Handle error */}
std::cout << "Message received from server: " << buf << std::endl;
remove_message_from_buffer( buf, numbytes, msglen );
}
}
}
By debugging this code you will certainly understand its purpose, receive_till_zero assume there is already some pending data in the buffer from previous call to recv, so it will first check if there is a complete message in the buffer or not and also it never assume receiving data completed just by one call to recv so it will call recv in a loop until it see a \0 in the buffer. After we finished with data in the buffer we call remove_message_from_buffer to eat that data and only that data, and not just start receiving from the start of buffer, since they may already some data in the buffer.
As you see code is a little complicated, for a better programming model and a better C++ code you may use boost::asio that have a very good design and work perfectly with C++ and iostream