How to serialize a vector into an array of chars - c++

I have a vector defined as:
std::vector<message *>
where message is:
struct message{
static unsigned int last_id;
unsigned int id;
std::string msg;
std::string timestamp;
}
My objective is to send this information using Winsock (from server to client), but this only allows sending chars as it appears in WinSock2.h. Taking this into account, I want to serialize all the information (id, msg and timestamp) in an array of chars in order to send it all together, and in the client, have a function to deserialize so as to have the same vector I had in the server.
How could I implement it?
Any help is appreciated.

The following offers a simple approach for the serialization problem.
However, note that it is not portable. It assumes same environment conditions on both sides (client/server), i.e. endianness and sizeof int and size_t. This assumption is probably unsatisfactory when writing server/client programs, and your code should handle this aspect as well.
For example, if you can say that 32 bits is a sufficient size for the id value and the length of your strings, you can use htonl when serializing, and ntohl when deserializing.
Serializer:
class MessageSerializer
{
public:
MessageSerializer(const message& messageStruct)
: m_msgRef(messageStruct)
, m_msgLength(m_msgRef.msg.length())
, m_timeLength(m_msgRef.timestamp.length())
{}
size_t RequiredBufferSize() const
{
return sizeof(int) + sizeof(size_t)*2 + m_msgLength + m_timeLength;
}
void Serialize(void* buffer) const
{
PushNum (buffer, m_msgRef.id);
PushString (buffer, m_msgRef.msg.c_str(), m_msgLength);
PushString (buffer, m_msgRef.timestamp.c_str(), m_timeLength);
}
private:
const message& m_msgRef;
const size_t m_msgLength;
const size_t m_timeLength;
template<typename INTEGER>
void PushNum(void*& buffer, INTEGER num) const
{
INTEGER* ptr = static_cast<INTEGER*>(buffer);
//copying content
*ptr = num;
//updating the buffer pointer to point the next position to copy
buffer = ++ptr;
}
void PushString(void*& buffer, const char* cstr, size_t length) const
{
PushNum(buffer, length);
//copying string content
memcpy(buffer, cstr, length);
//updating the buffer pointer to point the next position to copy
char* ptr = static_cast<char*>(buffer);
ptr += length;
buffer = ptr;
}
};
Deserializer:
class MessageDeserializer
{
public:
MessageDeserializer(const char* messageBuffer)
: m_msgBuffer(messageBuffer)
{}
void Deserialize(message& messageOut)
{
messageOut.id = PopNum<int>(m_msgBuffer);
messageOut.msg = PopString(m_msgBuffer);
messageOut.timestamp = PopString(m_msgBuffer);
}
private:
const void* m_msgBuffer;
template<typename INTEGER>
INTEGER PopNum(const void*& buffer) const
{
const INTEGER* ptr = static_cast<const INTEGER*>(buffer);
//copying content
INTEGER retVal = *ptr;
//updating the buffer pointer to point the next position to copy
buffer = ++ptr;
return retVal;
}
std::string PopString(const void*& buffer) const
{
size_t length = PopNum<size_t>(buffer);
const char* ptr = static_cast<const char*>(buffer);
//copying content
std::string retVal(ptr, length);
//updating the buffer pointer to point the next position to copy
ptr += length;
buffer = ptr;
return retVal;
}
};
Then your using code could be something like:
//...
MessageSerializer serializer(*myVector[i]);
char* buffer = new char[serializer.RequiredBufferSize()];
serializer.Serialize(buffer);
and:
//...
message myMsg;
MessageDeserializer(input).Deserialize(myMsg);

You can use the Boost serialization library to save/load your structure to an array of char. The boost library is widely used in C++, and if you're not familiar with it, I'd recommend taking a look at it.
Instead of using winsock, you could learn to use Boost sockets and make your C++ code work on almost any platform, instead of just Windows, but that's another topic.
Here's an example of how to serialize your vector, and recover it from the other side of the socket:
#include <vector>
#include <boost/serialization/nvp.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
struct message {
static unsigned int last_id;
unsigned int id;
std::string msg;
std::string timestamp;
template <class ArchiveT>
void serialize(ArchiveT& ar, const unsigned int /*version*/) // function used to serialize (save/load) data from the boost serialization library
{
ar & boost::serialization::make_nvp("LastId", last_id);
ar & boost::serialization::make_nvp("Id", id);
ar & boost::serialization::make_nvp("Msg", msg);
ar & boost::serialization::make_nvp("Timestamp", timestamp);
}
};
unsigned int message::last_id;
template <class T>
void serialize_save(const T& obj, std::string& outString)
{
std::stringstream binaryOut;
boost::archive::binary_oarchive outArchive(binaryOut);
outArchive << obj;
outString = binaryOut.str();
}
template <class T>
void serialize_load(T& dataOut, const void* data, const size_t dataSize)
{
const char* dataPtr = reinterpret_cast<const char*>(data);
std::string dataString(dataPtr, dataPtr + dataSize);
std::stringstream dataStream(dataString);
boost::archive::binary_iarchive binArchive(dataStream);
binArchive >> dataOut;
}
void init_vector(std::vector<message*>& vect) {
const size_t vectorSize = 2;
vect.resize(vectorSize);
for (size_t i = 0; i < vectorSize; i++) {
vect[i] = new message();
vect[i]->last_id = 0;
vect[i]->id = 1;
vect[i]->msg = "This is a message";
vect[i]->timestamp = "12:02pm";
}
}
int main() {
std::vector<message*> messages;
init_vector(messages); // initialize the vector. set it to any data
std::string outputBuffer;
serialize_save(messages, outputBuffer); // save the vector to a string (array of char)
socket_write(outputBuffer.c_str(), outputBuffer.size()); // write the serialized data to the socket
// on the reception side
std::string receiveBuffer;
socket_read(receiveBuffer); // receive socket data
std::vector<message*> receivedMessages;
serialize_load(receivedMessages, receiveBuffer.c_str(), receiveBuffer.size()); // from the array of character recover the vector
// here the vector receivedMessages contains the same values saved in init_vector()
}
You can change the export format if you'd like by changing the boost::archive::binary_iarchive object. For instance, replace it to boost::archive::xml_iarchive for serializing objects to XML. There are other formats provided by the library. Another advantage is that it supports versioning.

Related

C++: Populate a struct with data from a buffer

I was wondering if I could have some recommendations on how to take data from a buffer and load them into a struct. For example, I have dealing with a DNS response buffer. I need to populate a DNS answer struct so that I can interpret the data. So far, I have the following:
int DugHelp::getPacket() {
memset(buf, 0, 2000); // clearing the buffer to make sure no "garbage" is there
if (( n = read(sock, buf, 2000)) < 0 {
exit(-1);
}
// trying to populate the DNS_Answers struct
dnsAnswer = (struct DNS_Answer *) & buf;
. . .
}
This is the struct that I have defined:
struct DNS_Answer{
unsigned char name [255];
struct {
unsigned short type;
unsigned short _class;
unsigned int ttl;
unsigned in len;
} types;
unsigned char data [2000];
};
It depends on the data format of buf. If the format is same with DNS_Answer. You can use memcpy. If their formats are same, you should align the bytes first.
#pragma pack (1)
struct DNS_Answer{
unsigned char name [255];
struct {
unsigned short type;
unsigned short _class;
unsigned int ttl;
unsigned in len;
} types;
unsigned char data [2000];
};
#pragma pop(1)
Then,
memcpy(dnsAnswer, buf, sizeof(DNS_Answer));
If their data formats aren't same, you have to parse them by yourself, or you can use DFDL (A data format descripation language.)
I do something a bit like this (rather untested) code:
Library Code:
namespace net {
using byte = unsigned char;
enum class endian
{
#ifdef _WIN32
little = 0,
big = 1,
native = little
#else
little = __ORDER_LITTLE_ENDIAN__,
big = __ORDER_BIG_ENDIAN__,
native = __BYTE_ORDER__,
#endif
};
constexpr bool is_little_endian()
{
return endian::native == endian::little;
}
template<typename POD>
byte* write_to_buffer(POD const& pod, byte* pos)
{
if(is_little_endian())
std::reverse_copy((byte*)&pod, (byte*)& pod + sizeof(pod), pos);
else
std::copy((byte*)&pod, (byte*)& pod + sizeof(pod), pos);
return pos + sizeof(pod);
}
template<typename POD>
byte const* read_from_buffer(byte const* pos, POD& pod)
{
if(is_little_endian())
std::copy(pos, pos + sizeof(pod), (byte*)&pod);
else
std::reverse_copy(pos, pos + sizeof(pod), (byte*)&pod);
return pos + sizeof(pod);
}
} // namespace net
Application Code:
struct DNS_Answer{
unsigned char name [255];
struct {
unsigned short type;
unsigned short _class;
unsigned int ttl;
unsigned int len;
} types;
unsigned char data [2000];
};
net::byte* write_to_buffer(DNS_Answer const& ans, net::byte* buf)
{
auto pos = buf;
pos = net::write_to_buffer(ans.name, pos);
pos = net::write_to_buffer(ans.types.type, pos);
pos = net::write_to_buffer(ans.types._class, pos);
pos = net::write_to_buffer(ans.types.ttl, pos);
pos = net::write_to_buffer(ans.types.len, pos);
pos = net::write_to_buffer(ans.data, pos);
return pos;
}
net::byte const* read_from_buffer(net::byte const* buf, DNS_Answer& ans)
{
auto pos = buf;
pos = net::read_from_buffer(pos, ans.name);
pos = net::read_from_buffer(pos, ans.types.type);
pos = net::read_from_buffer(pos, ans.types._class);
pos = net::read_from_buffer(pos, ans.types.ttl);
pos = net::read_from_buffer(pos, ans.types.len);
pos = net::read_from_buffer(pos, ans.data);
return pos;
}
This should be pretty portable, deals with different byte orders and avoids potential alignment problems. You can also transfer non-pod types by breaking them down into several POD pieces and sending those separately. For example std::string can be sent as a std::size_t for the length and the rest as a char array.

How to assign a std::string back to a std::vector<char>.data() passed as a void* pointer?

Consider the below code, which passes an input string as a source void* to function filter() along with a vector<char> as a destination void*.
How can it assign the processed string back to the destination void* so that it can be passed back to main()?
size_t filter(void* destination, const void* source, size_t source_size)
{
std::string source_string(static_cast<const char*>(source));
std::string destination_string;
// Do some processing on destination_string
// HAS NO EFFECT ????????????????????
destination = (&destination_string);
return destination_string.size();
}
int main()
{
std::vector<char> buffer;
buffer.reserve(100);
auto return_size = filter(buffer.data(), "Appollo", 7);
std::string str(buffer.begin(), buffer.begin()+return_size);
std::cout << str.c_str();
}
P.S. This is a problem from an online coding challenge.
The statement destination = (&destination_string) simply assigns the memory address of a local std::string variable to a pointer that is also local to the function. It is not copying the character data into the caller's buffer at all.
Try something more like this instead:
size_t filter(void* destination, size_t destination_size, const void* source, size_t source_size)
{
std::string source_string(static_cast<const char*>(source), source_size);
std::string destination_string;
// Do some processing on destination_string
auto size = std::min(destination_string.size(), destination_size);
std::copy_n(destination_string.begin(), size, static_cast<char*>(destination));
return size;
}
int main()
{
std::vector<char> buffer;
buffer.resize(100);
auto return_size = filter(buffer.data(), buffer.size(), "Appollo", 7);
std::string str(buffer.data(), return_size);
std::cout << str;
return 0;
}

C++ serialize multiple objects to one file and deserialize limited number of them

Before I start, consider this code:
One data transfer object ObjectDTO
class ObjectDTO {
public:
int id;
string string1;
string string2;
string string3;
int code1;
vector<string> stringList1;
private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive &archive, const unsigned int version) {
archive & id;
archive & string1;
archive & string2;
archive & string3;
archive & code1;
archive & stringList1;
}
Serialization
void OutputStreamService::writeReportsToFile(vector<ObjectDTO> objects, int filename){
ofstream outputFileStream(to_string(filename));
boost::archive::binary_oarchive outputArchive(outputFileStream);
outputArchive << objects;
}
Deserialization
vector<ObjectDTO> InputStreamService::readObjects() {
ifstream inputFileStream(to_string(fileNumber++));
boost::archive::binary_iarchive inputArchive(inputFileStream);
vector<ObjectDTO> objects;
inputArchive >> objects;
return objects;
}
I am using Boost Serialization C++ librarys to serialize a vector of ObjectDTOs and read it back later.
Supose i generated 30GB of random ObjectDTOs and saved it to the same file
How can i read only some of them to avoid reaching memory limit?
I am using Boost Serialization because it was the simples way i found to solve the first problem but i can change to any other approach if necessary!
Use Google Protocol buffers instead, there are CodedOutputStream class for serialization and CodedInputStream for deserialization.
One of CodedOutputStream methods is WriteVarint32, which allows to write a number which could be used as an index in the stream.
In CodeInputStream there is corresponding ReadVarint32 method, eg.
Serialization:
char text[[]] = "Hello world!";
coded_output->WriteVarint32(strlen(text));
coded_output->WriteRaw(text, strlen(text));
Deserialization:
uint32 size;
coded_input->ReadVarint32(&size);
char* text = new char[size + 1];
coded_input->ReadRaw(buffer, size);
The last line allows you to read the content of serialized stream starting from given index.
Here are my two methods to serialize/deserialize streams with given length at the start.
template < class T>
void TProtoBufSerializer::SerializeImplementation(const T& protoBuf, std::vector<char>& buffer )
{
int bufLength = protoBuf.ByteSize() + google::protobuf::io::CodedOutputStream::VarintSize32(protoBuf.ByteSize());
buffer.resize(bufLength);
google::protobuf::io::ArrayOutputStream arrayOutput(&buffer[0], bufLength);
google::protobuf::io::CodedOutputStream codedOutput(&arrayOutput);
codedOutput.WriteVarint32(protoBuf.ByteSize());
protoBuf.SerializeToCodedStream(&codedOutput);
}
template < class T>
bool TProtoBufSerializer::DeSerializeImplementation(std::vector<char>& buffer, T& protoBuf )
{
bool deserialized = false;
google::protobuf::io::ArrayInputStream arrayInput(&buffer[0],buffer.size());
google::protobuf::io::CodedInputStream codedInput(&arrayInput);
unsigned int object_size;
bool header_readed = codedInput.ReadVarint32(&object_size);
if(header_readed && object_size > 0)
{
if( buffer.size() >= codedInput.CurrentPosition() + object_size )
{
google::protobuf::io::CodedInputStream::Limit limit = codedInput.PushLimit(object_size);
if(protoBuf.ParseFromCodedStream(&codedInput))
{
std::vector<char>::iterator it = buffer.begin();
std::advance(it,codedInput.CurrentPosition());
std::move(it,buffer.end(),buffer.begin() );
buffer.resize(buffer.size() - codedInput.CurrentPosition());
deserialized = true;
}
else
{
throw TProtoBufSerializerPayloadException();
}
codedInput.PopLimit(limit);
}
}
else
{
//varint32 which is used in header is at the most 5 bytes long,
//if given buffer is 5 bytes or more long and header still cannot be decoded - raise exception
if(buffer.size() >= 5)
{
throw TProtoBufSerializerHeaderException();
}
}
return deserialized;
}
I solved the problem discarding Boost Serialization and vectors in favor of arrays with plain old C++ write and read on ofstream and ifstream respectively.
My OutputStreamService writeObjectsToFile ended like this:
void OutputStreamService::writeObjectssToFile(ObjectDTO * objects, int filename){
ofstream outputFileStream(to_string(filename), std::ios::binary);
outputFileStream.write((char *)&objects, sizeof(objects));
}
And InputStreamService with readObjects:
ObjectDTO * InputStreamService::readObjects() {
ifstream inputFileStream(to_string(fileNumber++), std::ios::binary);
ObjectDTO objects[10];
inputFileStream.read((char *)&objects, sizeof(objects));
return objects;
}
This way i can define 10 or any other integer as the number of objects i want to read in.
To solve the mais problem, i can now calculate the aprox number of objects my memory can handle and then limit the number of reads!
Ty!

Serialization of struct

Suppose i have a struct whose member values i want to send over the network to another system using winsock 2. I'm using C++ language.
How do i convert it to char * keeping in mind that the struct has to be serialized before sending and also how do i deserialize the char * into struct at the other end? I found boost serialization as a suggestion to similar question but can anyone illustrate with a small code snippet for both serialization and deserialization ?
This question might seem very basic but the other answers to the related posts did not help much.
Following example shows a simplest way to serialize struct into char array and de-serialize it.
#include <iostream>
#include <cstring>
#define BUFSIZE 512
#define PACKETSIZE sizeof(MSG)
using namespace std;
typedef struct MSG
{
int type;
int priority;
int sender;
char message[BUFSIZE];
}MSG;
void serialize(MSG* msgPacket, char *data);
void deserialize(char *data, MSG* msgPacket);
void printMsg(MSG* msgPacket);
int main()
{
MSG* newMsg = new MSG;
newMsg->type = 1;
newMsg->priority = 9;
newMsg->sender = 2;
strcpy(newMsg->message, "hello from server\0");
printMsg(newMsg);
char data[PACKETSIZE];
serialize(newMsg, data);
MSG* temp = new MSG;
deserialize(data, temp);
printMsg(temp);
return 0;
}
void serialize(MSG* msgPacket, char *data)
{
int *q = (int*)data;
*q = msgPacket->type; q++;
*q = msgPacket->priority; q++;
*q = msgPacket->sender; q++;
char *p = (char*)q;
int i = 0;
while (i < BUFSIZE)
{
*p = msgPacket->message[i];
p++;
i++;
}
}
void deserialize(char *data, MSG* msgPacket)
{
int *q = (int*)data;
msgPacket->type = *q; q++;
msgPacket->priority = *q; q++;
msgPacket->sender = *q; q++;
char *p = (char*)q;
int i = 0;
while (i < BUFSIZE)
{
msgPacket->message[i] = *p;
p++;
i++;
}
}
void printMsg(MSG* msgPacket)
{
cout << msgPacket->type << endl;
cout << msgPacket->priority << endl;
cout << msgPacket->sender << endl;
cout << msgPacket->message << endl;
}
You can also have a look at Protocol Buffers from Google which is a platform/language independent library for sending data between hosts.
However, the paradigm is shifted towards writing the protocol first and then fitting your data structures into it. The advantage of this though is that it forces your software architecture to fit well with simple data types.
You can just do
struct MyStruct {
int data;
char* someNullTerminatedName; // Assuming not larger than 1023 chars
std::ostream& serialize(std::ostream& os) const {
char null = '\0';
os.write((char*)&data, sizeof(data));
os.write(someNullTerminatedName, strlen(someNullTerminatedName));
os.write(&null, 1);
return os;
}
std::istream& deserialize(std::istream& is) {
char buffer[1024];
int i = 0;
is.read((char*)&data, sizeof(data));
do { buffer[i] = is.get(); ++i; } while(buffer[i] != '\0');
if (someNullTerminatedName != NULL) free(someNullTerminatedName);
someNullTerminatedName = (char*)malloc(i);
for (i = 0; buffer[i] != '\0'; ++i) {
someNullTerminatedName[i] = buffer[i];
}
return is;
}
};
It's up to you to take care of endianness and differences in size of ints and whatnot.
Example:
MyStruct foo, bar;
std::stringstream stream;
foo.serialize(stream);
// ... Now stream.str().c_str() contains a char* buffer representation of foo.
// For example it might contain [ 1f 3a 4d 10 h e l l o w o r l d \0 ]
bar.deserialize(stream);
// ... Now bar is a copy, via a serial stream of data, of foo.
If you have a socket library that exposes its interface via C++ iostreams then you don't even need the stringstream.
If your struct is POD you can use memcpy:
::memcpy(data, &your_struct, sizeof(YourStruct)));
and vice versa at reception:
::memcpy(&your_struct, data, sizeof(YourStruct)));
Where data is a char*. Don't forget you have to Allocate it, making sure it's big enought and delete it at the end.
Ok I will take the example from the boost web site as I don't understand what you can not understand from it.
I added some comments and changes to it how you can transfer via network. The network code itself is not in here. For this you can take a look at boost::asio.
int main() {
// create and open a character archive for output
// we simply use std::strinstream here
std::stringstream ofs;
// create class instance
const gps_position g(35, 59, 24.567f);
// save data to archive
{
boost::archive::text_oarchive oa(ofs);
// write class instance to archive
oa << g;
// archive and stream closed when destructors are called
}
// now we have const char* ofs.str().c_str()
// transfer those bytes via network
// read them on the other machine
gps_position newg;
{
// create and open an archive for input
std::stringstream ifs(the_string_we_read_from_the_network);
boost::archive::text_iarchive ia(ifs);
// read class state from archive
ia >> newg;
// archive and stream closed when destructors are called
}
return 0;
}

Store read memory process in a buffer then search it

I want to read the memory from a process for 16 MB (FFFFFF) and store it in a array, in a way that when I search inside the array like: array[i], i will be the real memory address.
Lets say I want to search from 000000 to FFFFFF, I want to make that jump sizeof(value), get the address from that address and store it in a var.
then if(var==value) return address.
i have this:
ReadProcessMemory(phandle,(void*)address,buffer,0xFFFFFF,0);
EDIT:
i have this (by BlueWanderer answer):
class offset_buffer{
private:
char *buf;
int offset;
public:
offset_buffer(char *in_buf, int in_offset)
: buf(in_buf), offset(in_offset){
}
char & operator[](int in_index){
return buf[in_index - offset];
}
void setOffset(int off){
offset=off;
}
void ReadMemory(){
LPBYTE point;
DWORD primeiroAddress = 0x000000;
DWORD finalAddress = 0xFFFFFF;
//LPBYTE buffer = new BYTE[finalAddress-primeiroAddress];
HANDLE phandle = OpenProcess(PROCESS_VM_READ,0,TargetPID);
ReadProcessMemory(phandle,(void*)primeiroAddress, buf, sizeof(buf), 0);
CloseHandle(phandle);
}
};
main(){
char *buffer = new char[0xFFFFFFF-0x0000000];
int address = 0x0000000;
offset_buffer b(buffer,address);
std::ostringstream ss;
int i=0;
TListItem *ListIt;
b.ReadMemory();
for(address=0x0000000;address<0xFFFFFFF;address+=sizeof(int)){
if(b[address]==StrToInt(Edit1->Text.c_str())){
ss << std::hex << address;
showValue();
ss.str(std::string());
}
}
what is wrong?? can someone help me? why it doesn't work
You want something like this?
class offset_buffer
{
private:
char *buf;
int offset;
public:
offset_buffer(char *in_buf, int in_offset)
: buf(in_buf), offset(in_offset)
{
}
char & operator[](int in_index)
{
return buf[in_index - offset];
}
};
It will map your real address to the index in the array
offset_buffer b(buffer, address);
if (b[0x0C2F8E3] == 123) return 0x0C2F8E3;