Problems using Protocol Buffers to read messages from file - c++

I'm trying to use Google Protocol Buffers to read multiple messages from a file. The documentation suggests using CodedInputStream.
But if I try and read more than a very small message I get a failure from MergeFromCodedStream
For example, if I have a message defined as:
message Chunk {
repeated int64 values = 1 [packed=true];
}
And try to write the message to file and then read it back:
int main() {
GOOGLE_PROTOBUF_VERIFY_VERSION;
{
Chunk chunk;
for (int i = 0; i != 26; ++i)
chunk.add_values(i);
std::ofstream output("D:\\temp.bin");
OstreamOutputStream raw_output(&output);
if (!writeDelimitedTo(chunk, &raw_output)){
std::cout << "Unable to write chunk\n";
return 1;
}
}
{
std::ifstream input("D:\\temp.bin");
IstreamInputStream raw_input(&input);
Chunk in_chunk;
if (!readDelimitedFrom(&raw_input, &in_chunk)) { // <--- Fails here
std::cout << "Unable to read chunk\n";
return 1;
}
std::cout << "Num values in chunk " << in_chunk.values_size() << "\n";
}
google::protobuf::ShutdownProtobufLibrary();
}
where writeDelimitedTo and readDelimitedFrom come from this answer by the author of the C++ protobuf libraries:
bool writeDelimitedTo(
const google::protobuf::MessageLite& message,
google::protobuf::io::ZeroCopyOutputStream* rawOutput) {
google::protobuf::io::CodedOutputStream output(rawOutput);
const int size = message.ByteSize();
output.WriteVarint32(size);
uint8_t* buffer = output.GetDirectBufferForNBytesAndAdvance(size);
if (buffer != NULL) {
message.SerializeWithCachedSizesToArray(buffer);
} else {
message.SerializeWithCachedSizes(&output);
if (output.HadError()) return false;
}
return true;
}
bool readDelimitedFrom(
google::protobuf::io::ZeroCopyInputStream* rawInput,
google::protobuf::MessageLite* message) {
google::protobuf::io::CodedInputStream input(rawInput);
uint32_t size;
if (!input.ReadVarint32(&size)) return false;
google::protobuf::io::CodedInputStream::Limit limit =
input.PushLimit(size);
if (!message->MergeFromCodedStream(&input)) return false; // <-- Fails here
if (!input.ConsumedEntireMessage()) return false;
input.PopLimit(limit);
return true;
}
if i only write 25 values to my message it works, 26 and it fails. I've shown where it is failing in the code.
I've tried debugging into the protobuf library and it seems to be failing to read new data into the buffer but I don't know why.
I'm using Visual Studio 2013 and protobuf 2.6.1.

As #rashimoto correctly pointed out I was failing to open my files in binary mode!
With that fixed I can successfully write multiple messages to file:
int main() {
GOOGLE_PROTOBUF_VERIFY_VERSION;
{
std::vector<Chunk> chunks = createChunks(NUM_CHUNKS, CHUNK_SIZE);
std::ofstream output("D:\\temp.bin", std::ios::binary);
OstreamOutputStream raw_output(&output);
for (Chunk& chunk : chunks) {
if (!writeDelimitedTo(chunk, &raw_output)){
std::cout << "Unable to write chunk\n";
return 1;
}
}
}
{
std::ifstream input("D:\\temp.bin", std::ios::binary);
IstreamInputStream raw_input(&input);
std::vector<Chunk> chunks(NUM_CHUNKS);
for (auto& chunk : chunks) {
if (!readDelimitedFrom(&raw_input, &chunk)) {
std::cout << "Unable to read chunk\n";
return 1;
}
}
std::cout << "Num values in first chunk " << chunks[0].values_size() << "\n";
}
google::protobuf::ShutdownProtobufLibrary();
}

Related

Get rid of noise while using recv in C++

I am receiving data following a certain format over a TCP server by serializing them.
the class of the object:
class Command {
private:
char letter;
int x;
int y;
std::string button;
public:
Command(char _letter, int _x, int _y, std::string _button) {
letter = _letter;
x = _x;
y = _y;
button = _button;
}
Command(std::string serializedCmd)
{
size_t delimPos = 0;
std::vector<std::string> parts;
while ((delimPos = serializedCmd.find(SERIALIZE_DELIM)) != std::string::npos)
{
parts.push_back(serializedCmd.substr(0, delimPos));
serializedCmd.erase(0, delimPos + 1);
}
if (parts.empty()) {
this->letter = '$';
this->x = -1;
this->y = -1;
this->button = "nan";
return;
}
this->letter = (char)atoi(parts.at(0).data());
this->x = atoi(parts.at(1).data());
this->y = atoi(parts.at(2).data());
this->button = parts.at(3);
}
Command() {}
~Command() {}
std::string serialize()
{
return std::to_string(letter) + SERIALIZE_DELIM + std::to_string(x) + SERIALIZE_DELIM + std::to_string(y) + SERIALIZE_DELIM + button + SERIALIZE_DELIM;
}
char getLetter() { return letter; }
int getX() { return x; }
int getY() { return y; }
std::string getButton() { return button; }
bool isEmpty() {
return((this->letter == '$') && (this->x == -1) && (this->y == -1) && (this->button == "nan"));
}
void printCommand() {
std::cout << "letter: " << letter << std::endl;
std::cout << "x : " << x << std::endl;
std::cout << "y : " << y << std::endl;
std::cout << "button: " << button << std::endl;
std::cout << "================" << std::endl;
}
};
The data after being DeSerialized at the clients end follows this format:
||{key}|{x}|{y}|{button}||
Example: ||$|20|40|nan||
The problem is that when using recv to get the data, it seems that I'm picking up some noise around the command.
Example:
Sending:
||$|301|386|nan||
Reciving:
(¿ⁿ8T√|301|386|╠╠↕▼
The command is there although it's crowded with noise for some reason.
The code I'm using to receive the data:
char buf[4096];
Command c;
std::string commandTemp = "";
while (true) {
memset(buf, '\0', 4096);
const int size = recv(sock, buf, sizeof(buf), 0);
std::string s(buf,size);
std::cout << s << std::endl;
buf[size] = 0;
commandTemp = buf;
if (!commandTemp.empty()) {
try {
c = Command(commandTemp);
exe(c); //executes command (unrelated)
}
catch (const std::exception& err) {
std::cout << "Couldn't execute!!!!!!!!" << std::endl;
}
}
else {
std::cout << "Error empty command!\n";
}
}
If I am missing any information I will happily provide it.
Can someone maybe tell what the problem is?
You have to loop on the recv till you get the entire message
This may not be the immediate cause of you problem but you will hit it eventually.
TCP is a stream protocol, not a message protocol. All that TCP guarantees is that the bytes you send are received once and in order. But you might send one 100 byte message and receive 20 5 byte messages. You will say "but it works now", true if on the same machine or the messages are small but not true with larger message over a real netwrok so you must do this
char buf[4096];
Command c;
std::string commandTemp = "";
while (true) {
memset(buf, '\0', 4096);
int offset = 0;
int len = ??;
while(len > 0){
const int size = recv(sock, buf + offset, sizeof(buf) - offset, 0);
if (size == 0)
break; // record that we got incomplete message
offset += size;
len -= size;
}
....
Note that you need to know the length in advance too. So either send fixed length message or prepend a fixed size length to each message and read that first

Reading stdin in c ++ without using getline

I'm trying to convert a program (it's a bridge between vscode and a debug)
This program is written in C#.
It was based on the o vscode-mono-debug
(https://github.com/Microsoft/vscode-mono-debug/blob/master/src/Protocol.cs)
Well,
In C # I can read the standard input as a stream:
byte[] buffer = new byte[BUFFER_SIZE];
Stream inputStream = Console.OpenStandardInput();
_rawData = new ByteBuffer();
while (!_stopRequested) {
var read = await inputStream.ReadAsync(buffer, 0, buffer.Length);
if (read == 0) {
// end of stream
break;
}
if (read > 0) {
_rawData.Append(buffer, read);
ProcessData();
}
}
I try this :
#define _WIN32_WINNT 0x05017
#define BUFFER_SIZE 4096
#include<iostream>
#include<thread>
#include <sstream>
using namespace std;
class ProtocolServer
{
private:
bool _stopRequested;
ostringstream _rawData;
public:
void Start()
{
char buffer[BUFFER_SIZE];
while (!cin.eof())
{
cin.getline(buffer,BUFFER_SIZE);
if (cin.fail())
{
//error
break;
}
else
{
_rawData << buffer;
}
}
}
};
int main()
{
ProtocolServer *server = new ProtocolServer();
server->Start();
return 0;
}
Input:
Content-Length: 261\r\n\r\n{\"command\":\"initialize\",\"arguments\":{\"clientID\":\"vscode\",\"adapterID\":\"advpl\",\"pathFormat\":\"path\",\"linesStartAt1\":true,\"columnsStartAt1\":true,\"supportsVariableType\":true,\"supportsVariablePaging\":true,\"supportsRunInTerminalRequest\":true},\"type\":\"request\",\"seq\":1}
This reads the first 2 lines correctly. Since the protocol does not put \n at the end, it gets stuck in cin.getline in the 3 interaction.
Switching to read() causes it to stay stopped at cin.read (), and does not read anything at all.
I found some similar questions:
StackOverFlow Question
And examples:
Posix_chat_client
But I do not need it to be necessarily asynchronous, but it works on windows and linux.
I'm sorry for my English
Thanks!
What you want is known as unformatted input operations.
Here's a 1:1 translation using just std::iostream. The only "trick" is using and honouring gcount():
std::vector<char> buffer(BUFFER_SIZE);
auto& inputStream = std::cin;
_rawData = std::string {}; // or _rawData.clear(), e.g.
while (!_stopRequested) {
inputStream.read(buffer.data(), buffer.size());
auto read = inputStream.gcount();
if (read == 0) {
// end of stream
break;
}
if (read > 0) {
_rawData.append(buffer.begin(), buffer.begin() + read);
ProcessData();
}
}
I'd personally suggest dropping that read == 0 check in favour of the more accurate:
if (inputStream.eof()) { break; } // end of stream
if (!inputStream.good()) { break; } // failure
Note that !good() also catches eof(), so you can
if (!inputStream.good()) { break; } // failure or end of stream
Live Demo
Live On Coliru
#include <iostream>
#include <vector>
#include <atomic>
struct Foo {
void bar() {
std::vector<char> buffer(BUFFER_SIZE);
auto& inputStream = std::cin;
_rawData = std::string {};
while (!_stopRequested) {
inputStream.read(buffer.data(), buffer.size());
auto read = inputStream.gcount();
if (read > 0) {
_rawData.append(buffer.begin(), buffer.begin() + read);
ProcessData();
}
if (!inputStream.good()) { break; } // failure or end of stream
}
}
protected:
void ProcessData() {
//std::cout << "got " << _rawData.size() << " bytes: \n-----\n" << _rawData << "\n-----\n";
std::cout << "got " << _rawData.size() << " bytes\n";
_rawData.clear();
}
static constexpr size_t BUFFER_SIZE = 128;
std::atomic_bool _stopRequested { false };
std::string _rawData;
};
int main() {
Foo foo;
foo.bar();
}
Prints (e.g. when reading its own source file):
got 128 bytes
got 128 bytes
got 128 bytes
got 128 bytes
got 128 bytes
got 128 bytes
got 128 bytes
got 92 bytes

zlib inflate decompression operation

I have a data buffer which contains multiple compressed members, it could be deflate or zlib compressed member.
I found that zlib inflate call returns Z_STREAM_END after processing the first compressed block, Here multiple compressed member can be in any number(here in my example Its 3). But this data comes from other sides which doesn't communicated detail about number of compressed member in a data.
So how could I implement the use of zlib inflate functionality so that it could work over multiple compressed member ?
Following is a sample quick & dirty example in which I try to elaborate my problem.
This referred the case with zlib 1.2.5 library.
/* example.c -- understanding zlib inflate/decompression operation
*/
#define CHECK_ERR(err, msg) { \
if (err != Z_OK) { \
std::cerr << msg << " error: " << err << std::endl; \
exit(1); \
} \
}
/* ===========================================================================
* deflate() to create compressed data
*/
void test_deflate(std::vector<uint8_t> & input_data, std::vector<uint8_t>& compr)
{
z_stream c_stream; /* compression stream */
int err;
compr.clear();
c_stream.zalloc = (alloc_func)0;
c_stream.zfree = (free_func)0;
c_stream.opaque = (voidpf)0;
err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
CHECK_ERR(err, "deflateInit");
c_stream.next_in = &input_data[0];
c_stream.avail_in = input_data.size();
for (;;) {
uint8_t c_buffer[10] = {};
c_stream.next_out = &c_buffer[0];
c_stream.avail_out = 10;
err = deflate(&c_stream, Z_FINISH);
if (err == Z_STREAM_END)
{
for (int i = 0; i < (10 - c_stream.avail_out); i++)
compr.push_back(c_buffer[i]);
break;
}
CHECK_ERR(err, "deflate");
for (int i = 0; i < (10 - c_stream.avail_out); i++)
compr.push_back(c_buffer[i]);
}
std::cout << "Compressed data (size = " << std::dec << compr.size() << ") = ";
for (int i = 0; i < compr.size(); i++)
std::cout << (uint32_t) compr[i];
std::cout << std::endl;
err = deflateEnd(&c_stream);
CHECK_ERR(err, "deflateEnd");
}
/* ===========================================================================
* Test inflate()
*/
void test_inflate(std::vector<uint8_t> &compr,
std::vector<uint8_t> &uncompr)
{
int err;
z_stream d_stream; /* decompression stream */
uncompr.clear();
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.avail_in = 0;
d_stream.next_in = Z_NULL;
err = inflateInit(&d_stream);
CHECK_ERR(err, "inflateInit");
d_stream.avail_in = compr.size();
d_stream.next_in = &compr[0];
for(;;) {
uint8_t d_buffer[10] = {};
d_stream.next_out = &d_buffer[0];
d_stream.avail_out = 10;
err = inflate(&d_stream, Z_NO_FLUSH);
if (err == Z_STREAM_END) {
for (int i = 0; i < (10 - d_stream.avail_out); i++)
uncompr.push_back(d_buffer[i]);
if (d_stream.avail_in == 0)
break;
}
CHECK_ERR(err, "inflate");
for (int i = 0; i < (10 - d_stream.avail_out); i++)
uncompr.push_back(d_buffer[i]);
}
err = inflateEnd(&d_stream);
CHECK_ERR(err, "inflateEnd");
std::cout << "Uncompressed data (size = " << std::dec << uncompr.size() << ") = ";
for (int i = 0; i < uncompr.size(); i++)
std::cout << (uint32_t) uncompr[i];
std::cout << std::endl;
}
/* ===========================================================================
* Usage: example
*/
int main(int argc, char **argv)
{
std::vector<uint8_t> input_data;
std::vector<uint8_t> compr, multiple_compr;
std::vector<uint8_t> uncompr;
std::cout << "Input Data (in hex) = ";
for (int i=0; i<32; i++) {
input_data.push_back((uint8_t)i);
if( i && (i % 2 == 0))
std::cout << " ";
std::cout << std::hex << (uint32_t)input_data[i];
}
std::cout << std::endl;
// create compressed buffer-1 from input data
test_deflate(input_data, compr);
// copy compressed buffer-1 data into multiple compressed member buffer
multiple_compr = compr;
compr.clear();
// create compressed buffer-2 from input data
test_deflate(input_data, compr);
// append data of compressed buffer-2 into multiple compressed member buffer
for(int i=0; i< compr.size(); i++)
{
multiple_compr.push_back(compr[i]);
}
// create decompressed output
test_inflate(multiple_compr, uncompr);
// compare decompressed data with input data
std::vector<uint8_t> final_data;
final_data.push_back(input_data);
final_data.push_back(input_data);
if (final_data == uncompr)
std::cout << "Matched" << std::endl;
else
std::cout << "Not Matched" << std::endl;
return 0;
}
1) Here second time inflate call returns error, But I wants it proceed successfully why it work like this ?
2) When I use Z_FINISH in the inflate call argument it returns with error, why can't I use Z_FINISH here ?
Kindly correct my example and suggest some optimized approach to do the same.
Simply repeat the inflate operation on the remaining data.
You can save some unnecessary free's and malloc's by using inflateReset() instead of inflateEnd() and inflateInit(). You may have some leftover data from the last inflate in next_in and avail_in, so use that first, and then reload.

How to send / receive binary data serialized with Protocol Buffers using ZMQ

I need to send an object (serialized with GPB) on a ZMQ socket. Currently the code have an extra copy. How do I directly write serialized array into message_ts data?
ABT_CommunicationProtocol introPacket;
// Fill the packet
message_t introMessage;
size_t dataLenght = introPacket.ByteSize();
char* temp = new char[dataLenght];
introPacket.SerializeToArray(temp, dataLenght); // write data to temp
memcpy(introMessage.data(), temp, dataLenght); // copy data to message
this->serverRquest.send(introMessage);
Don't use zmq_send but zmq_sendmsg
int cgi_msg_cnx_pool::PbToZmq(::google::protobuf::Message *src, zmq_msg_t *dest)
{
int size = src->ByteSize();
int rc = zmq_msg_init_size(dest, size);
if (rc==0)
{
try
{
rc = src->SerializeToArray(zmq_msg_data(dest), size)?0:-1;
}
catch (google::protobuf::FatalException fe)
{
std::cout << "PbToZmq " << fe.message() << std::endl;
}
}
return rc;
}
int cgi_msg_cnx_pool::ZmqToPb(zmq_msg_t *src, ::google::protobuf::Message *dest)
{
int rc = 0;
try
{
rc = dest->ParseFromArray(zmq_msg_data(src), zmq_msg_size(src))?0:-1;
}
catch (google::protobuf::FatalException fe)
{
std::cout << "ZmqToPb " << fe.message() << std::endl;
}
return rc;
}

extracting file inside of directory with miniz

Is there someone that can explain me how i can get files from directories inside a zipfile.
I use c++ and miniz(code.google.com/p/miniz/). thank you in advance.
here my code i have that i use right now:
size_t uncomp_size;
mz_bool status;
mz_zip_archive zip_archive;
memset(&zip_archive, 0, sizeof(zip_archive));
status = mz_zip_reader_init_file(&zip_archive, "data.zip", 0);
if (!status){
puts("failed to open zip file\n");
return 0;
}
try{
void* p = NULL;
std::string file_to_extract = "data//test.txt";
int file_index = mz_zip_reader_locate_file(&zip_archive, file_to_extract.c_str(), NULL, MZ_ZIP_FLAG_IGNORE_PATH);
if (file_index < 0)
{
mz_bool is_dir = mz_zip_reader_is_file_a_directory(&zip_archive,file_index);
if(is_dir){
throw std::exception("file_index = folder");
}else{
throw std::exception("cannot find file in zip(0)");
}
}
p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, &uncomp_size, NULL);
if(!p){
throw std::exception("cannot find file in zip(1)");
}
std::fstream fp1("test.txt",ios::binary|ios::out);
fp1.write(reinterpret_cast<char*>(p),uncomp_size);
fp1.close();
delete p;
}catch(std::exception ex){
cout << ex.what() << endl;
}
mz_zip_reader_end(&zip_archive);
This code is working for me using miniz from here.
string str_zip; // The zip archive in string form.
string str_unzip; // The uncompressed contents of the first file in the zip archive.
// Read in or assign zip contents to the string.
// In my case I receive the zip file via a web service.
// The processing all takes place in memory.
// But you can easily read a file's contents into the zipfile string, as well.
typedef unsigned char uint8;
typedef unsigned short uint16;
typedef unsigned int uint;
mz_zip_archive zip_archive;
mz_bool status;
// Now try to open the archive.
memset(&zip_archive, 0, sizeof(zip_archive));
// You can provide the zip data in memory as I did...
status = mz_zip_reader_init_mem(&zip_archive, str_zip.c_str(), str_zip.size(), 0);
// Or you can just give a filename...
// status = mz_zip_reader_init_file(&zip_archive, "myfile.zip", 0);
if (!status)
{
cout << "zip file appears invalid..." << endl;
return;
}
// Get the first file in the archive.
if (mz_zip_reader_get_num_files(&zip_archive) != 1)
{
cout << "zip file does not contain our 1 file..." << endl;
return;
}
mz_zip_archive_file_stat file_stat;
if (!mz_zip_reader_file_stat(&zip_archive, 0, &file_stat))
{
cout << "zip file read error..." << endl;
mz_zip_reader_end(&zip_archive);
return;
}
// Unzip the file to heap.
size_t uncompressed_size = file_stat.m_uncomp_size;
void* p = mz_zip_reader_extract_file_to_heap(&zip_archive, file_stat.m_filename, &uncompressed_size, 0);
if (!p)
{
cout << "mz_zip_reader_extract_file_to_heap() failed..." << endl;
mz_zip_reader_end(&zip_archive);
return;
}
str_unzip.assign((const char*)p,uncompressed_size);
// Close the archive, freeing any resources it was using
mz_free(p);
mz_zip_reader_end(&zip_archive);
I came up with a solution for this.
Lets we have;
In your C directory:
+-- ZipFile /
+-- folder1 /
+-- file1
+-- folder2 /
+--file2.1
+--file2.2
+--file2.3
bool decompress_folders_inside_zip()
{
const std::string archive_name = "ZipFile.zip";
const std::string decompress_path = "C:\\";
mz_zip_archive archive {};
boost::filesystem::path dec_path { decompress_path + archive_name };
if (!mz_zip_reader_init_file(&archive, dec_path.string().c_str(), 0))
{
return false;
}
const int file_cnt = (int)mz_zip_reader_get_num_files(&archive);
if (0 == file_cnt)
{
return false;
}
mz_zip_archive_file_stat file_stat;
if (!mz_zip_reader_file_stat(&archive, 0, &file_stat))
{
mz_zip_reader_end(&archive);
return false;
}
for (int i = 0; i < file_cnt; ++i)
{
mz_zip_reader_file_stat(&archive, i, &file_stat);
if (mz_zip_reader_is_file_a_directory(&archive, i))
{
boost::filesystem::path dir(decompress_path + file_stat.m_filename);
boost::filesystem::create_directories(dir.parent_path());
continue;
}
boost::filesystem::path file_out(decompress_path + file_stat.m_filename);
boost::filesystem::path out_file(file_out.parent_path().generic_string() + "/" + file_out.filename().string());
if (!mz_zip_reader_extract_to_file(&archive, i, out_file.string().c_str(), 0))
{
mz_zip_reader_end(&archive);
return false;
}
}
if (!mz_zip_reader_end(&archive))
{
return false;
}
std::cout << "Completed" << std::endl;
return true;
}