Incrase flatbuffers performance in c++ - c++

We're developing a high frequency trading platform with C++ and we've tried implementing grpc with protobuf but we saw that a single network call tooks approximately 200-300 microseconds which is too long for us. What we are expecting to have as serializing/deserializing data through network socket is approximately 50-60 microseconds.
Than we 've tried to use protobuf with native c++ sockets (with using non blocking i/o), we saw that this time performance became approximately 150-200 microseconds which was not enough for us. Than we saw flatbuffers and implemented it as described in below. However during our tests we saw that only serializing (also same in deserializing) tooks approximately 50 microseconds and also transferring the data tooks 30-40 microseconds so totatly it tooks approximately 100-150 microseconds. So I wondered if we are doing something wrong in our implementation of flatbuffers.
In the below example, I've calculated the difference betwen timestamp logs are :
Timestamp 1 -> Timestamp 2 = 16 microseconds
Timestamp 2 -> Timestamp 3 = 24 microseconds
Total serialization = 40 microseconds
Do you know any other way to increase the performance
Example code for serializing data with flatbuffers in C++:
const char* MAHelper::getRequest(BaseRequest *request,int& size) {
const char *result;
flatbuffers::FlatBufferBuilder builder(10240);
if (request->orderType == OrderTypes::TYPE_LoginRequest){
std::cout<<"Timestamp 1: "<<getCurrentTimestamp()<<std::endl;
LoginRequest *loginRequest = (LoginRequest*) request;
std::cout<<"Converting Login Request 1: "<<getCurrentTimestamp()<<std::endl;
auto username = builder.CreateString(loginRequest->userName);
auto password = builder.CreateString(loginRequest->password);
auto application = getApplication(loginRequest->applicationType);
std::cout<<"Timestamp 2: "<<getCurrentTimestamp()<<std::endl;
auto loginReq = piramit::orders::fb::CreateLoginRequest(builder,username,password,application);
auto loginOrderBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_LoginRequest,loginReq.Union());
builder.Finish(loginOrderBase);
std::cout<<"Timestamp 3:"<<getCurrentTimestamp()<<std::endl;
} else if (request->orderType == OrderTypes::TYPE_EnterOrderRequest) {
EnterOrderRequest *enterOrderRequest = (EnterOrderRequest*) request;
auto strategyIdentifier = builder.CreateString(enterOrderRequest->strategyIdentifier);
auto passThrough = builder.CreateString(enterOrderRequest->passThrough);
auto account = builder.CreateString(enterOrderRequest->account);
auto authToken = builder.CreateString(enterOrderRequest->baseRequest.authToken);
auto enterOrderReq = piramit::orders::fb::CreateEnterOrder(builder,enterOrderRequest->orderbookId,enterOrderRequest->quantity,enterOrderRequest->price,account,
getStrategyType(enterOrderRequest->strategyType),strategyIdentifier,getSide(enterOrderRequest->side),getTimeInForce(enterOrderRequest->timeInForce),passThrough,getOrderType(enterOrderRequest->orderType));
auto enterOrderBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_EnterOrder,enterOrderReq.Union(),authToken);
builder.Finish(enterOrderBase);
} else if (request->orderType == OrderTypes::TYPE_ReplaceOrderRequest) {
ReplaceOrderRequest *replaceOrderRequest = (ReplaceOrderRequest*) request;
auto orderToken = builder.CreateString(replaceOrderRequest->orderToken);
auto authToken = builder.CreateString(replaceOrderRequest->baseRequest.authToken);
auto replaceOrderReq = piramit::orders::fb::CreateReplaceOrder(builder,orderToken,replaceOrderRequest->quantity,replaceOrderRequest->price);
auto replaceOrderBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_ReplaceOrder,replaceOrderReq.Union(),authToken);
builder.Finish(replaceOrderBase);
} else if (request->orderType == OrderTypes::TYPE_CancelOrderRequest) {
CancelOrderRequest *cancelOrderRequest = (CancelOrderRequest*) request;
auto orderToken = builder.CreateString(cancelOrderRequest->orderToken);
auto authToken = builder.CreateString(cancelOrderRequest->baseRequest.authToken);
auto cancelOrderReq = piramit::orders::fb::CreateCancelOrder(builder,orderToken);
auto cancelOrderBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_CancelOrder,cancelOrderReq.Union(),authToken);
builder.Finish(cancelOrderBase);
} else if (request->orderType == OrderTypes::TYPE_BasicOrderRequest) {
BasicOrderRequest *basicOrderRequest = (BasicOrderRequest*) request;
auto authToken = builder.CreateString(basicOrderRequest->baseRequest.authToken);
auto basicOrderReq = piramit::orders::fb::CreateOrderRequest(builder,getOperationType(basicOrderRequest->operation),basicOrderRequest->orderId,getOrderType(basicOrderRequest->orderTypes));
auto basicOrderBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_OrderRequest,basicOrderReq.Union(),authToken);
builder.Finish(basicOrderBase);
} else if (request->orderType == OrderTypes::TYPE_AccountStrategyRequest) {
AccountStrategyRequest *accountStrategyRequest = (AccountStrategyRequest*) request;
flatbuffers::Offset<flatbuffers::String> account = 0;
flatbuffers::Offset<flatbuffers::String> strategyIdentifier = 0;
auto authToken = builder.CreateString(accountStrategyRequest->baseRequest.authToken);
if (accountStrategyRequest->operation == OPERATION_SET) {
account = builder.CreateString(accountStrategyRequest->accountStrategy.account);
strategyIdentifier = builder.CreateString(accountStrategyRequest->accountStrategy.strategyIdentifier);
}
flatbuffers::Offset<piramit::orders::fb::AccountStrategy> accountStrategy = piramit::orders::fb::CreateAccountStrategy(builder,accountStrategyRequest->accountStrategy.orderBookId,account,getStrategyType(accountStrategyRequest->accountStrategy.strategyType),strategyIdentifier);
auto accountStrategyReq = piramit::orders::fb::CreateAccountStrategyRequest(builder,getOperationType(accountStrategyRequest->operation),accountStrategy);
auto accountStrategyBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_AccountStrategyRequest,accountStrategyReq.Union(),authToken);
builder.Finish(accountStrategyBase);
} else if (request->orderType == OrderTypes::TYPE_OrderBookStateRequest) {
OrderBookStateRequest *orderBookStateRequest = (OrderBookStateRequest*) request;
auto stateName = builder.CreateString(orderBookStateRequest->stateName);
auto orderBookStateReq = piramit::orders::fb::CreateOrderBookStateRequest(builder,stateName,orderBookStateRequest->orderBookId,orderBookStateRequest->timestamp);
auto orderBookStateBase = piramit::orders::fb::CreateRequestHolder(builder,piramit::orders::fb::BaseRequest_OrderBookStateRequest,orderBookStateReq.Union());
builder.Finish(orderBookStateBase);
}
uint8_t *requestBuffer = builder.GetBufferPointer();
result = (const char*) requestBuffer;
size = builder.GetSize();
return result;
}
And also this is part of our schema in flatbuffers
union BaseRequest { LoginRequest,EnterOrder,CancelOrder,ReplaceOrder,OrderRequest,AccountStrategyRequest,OrderBookStateRequest }
table RequestHolder {
request:BaseRequest;
authToken:string;
}
table LoginRequest {
username:string;
password:string;
application:Application = APP_UNKNOWN;
}
table EnterOrder{
order_book_id:uint;
quantity:ulong;
price:int;
account:string;
strategy:StrategyType;
strategy_identifier:string;
side:Side;
time_in_force:TimeInForce;
pass_through:string;
order_type:OrderType;
}
root_type RequestHolder;

For serializing:
You can save yourself some time by reusing the FlatBufferBuilder accross, just call Reset() on it to clear.
You are doing HFT in C++, yet a lot of your data consists of strings? FlatBuffers has all sorts of really efficient ways of representing data, with scalars, structs and enums. Try to find better representations of your data if speed really matters.
For deserializing:
Deserializing in FlatBuffers costs 0ms, since there is no need to do anything. You can access in place. If what you're doing is copying all incoming FlatBuffers data into your own data structures, you are throwing away one of FlatBuffers biggest advantages. Instead, make the code acting on the incoming data work directly with the incoming FlatBuffer.

Related

How to use redis pipeline to get all updates to db at once?

I use redis++ library and have a redis db which contains keys with ttl set. I want to be informed for all updates to my db. I set __keyevent#0__:set and a subscriber callback like this
subscriber.on_message([&keys = updated_redis_keys](std::string, std::string key) {
keys.push_back(std::move(key));
});
and use a while loop to consume events:
while (true)
{
try
{
subscriber.consume();
for (const auto &key : keys)
pipeline.get(key).ttl(key);
auto replies = pipeline.exec();
for (std::size_t i = 0; i < keys.size(); ++i)
{
static constexpr std::size_t ValueIndex = 0;
static constexpr std::size_t TtlIndex = 1;
const auto value = replies.get<std::optional<std::string>>(i * 2 + ValueIndex);
const auto ttl = replies.get<long long>(i * 2 + TtlIndex);
const auto entry = Entry(std::move(updated_redis_keys[i]), *value, static_cast<std::size_t>(ttl));
do_something_with(entry);
}
keys.clear();
}
catch (const sw::redis::Error &err)
{
}
}
I tried to accumulate keys in keys vector and use pipeline.exec() to get values and ttl of all updated keys at once. But i think subscriber.consume(); just consumes a single event each time so keys.size() always equals to 1.
How can i get better performance by stacking more keys before running exec()?
You can collect a batch of keys by running multiple consume()s before running the pipeline. Even better, you can have a time window threshold, and run the pipeline when reaching the threshold (even if we have not collected enough keys).
const int batch_size = 10;
const std::chrono::seconds time_threshold(30);
while (true) {
auto cnt = 0;
auto begin = std::chrono::steady_clock::now();
std::vector<std::string> keys;
while (cnt < batch_size && std::chrono::steady_clock::now() - begin < time_threshold) {
// Not get enough keys, and we still have time, do consume.
try {
subscriber.consume();
} catch (const Error &e) {
// handle errors.
}
++cnt;
}
// now we've gotten a batch of keys or reached the time threshold. do the pipeline job.
// your original code here.
}

Fastest way to process http request

I am currently working on creating a network of multisensors (measuring temp, humidity ect). There will be tens or in some buildings even hundreds of sensors measuring at the same time. All these sensors send their data via a http GET request to a local esp32 server that processes the data and converts it into whatever the building's contol system can work with (KNX, BACnet, MODbus). Now I stress tested this server and found out that it can process around 1400 requests per minute before the sender gets no response anymore. This seems like a high amount but if a sensor sends its data every 2 seconds it means there will be a limit of around 45 sensors. I need to find a way how to process such a request quicker, this is the code I currently use:
server.on("/get-data", HTTP_GET, [](AsyncWebServerRequest *request)
{handle_get_data(request); request->send(200); });
void handle_get_data(AsyncWebServerRequest *request)
{
packetval++;
sensorData.humidity = request->arg("humidity").toFloat();
sensorData.temperature = request->arg("temperature").toFloat();
sensorData.isMovement = request->arg("isMovement");
sensorData.isSound = request->arg("isSound");
sensorData.luxValue = request->arg("luxValue").toDouble();
sensorData.RSSI = request->arg("signalValue").toInt();
sensorData.deviceID = request->arg("deviceID");
sensorData.btList = request->arg("btList");
if (deviceList.indexOf(sensorData.deviceID) == -1)
{
deviceList += sensorData.deviceID;
activeSensors++;
}
if (sensorData.isMovement || sensorData.isSound)
{
sendDataFlag = true;
}
}
I use the AsyncTCP library.
Now I measured the execution time of the function handle_get_data() and it turns out it is only ~175uS which is very quick. However the time between two calls of handle_get_data() is around 6ms which is really slow but it still doesnt explain why I can only process 1400 per minute or 24 per second (6ms = 155Hz why is my limit 24Hz?). Other than that I do not use any other code during the processing of a request, is it perhaps a limitation in the library? Is there another way to process such a request?
A request looks like this: http://192.168.6.51:80/get-data?humidity=32.0&temperature=32.0&isMovement=1&isSound=1&luxValue=123&RSSI=32&deviceID=XX:XX:XX:XX:XX:XX&btList=d1d2d3d4d5d6d7
If there is really nothing I can do I can always switch to a raspberry pi to process everything but I would rather stick to esp32 since I want to easily create an own PCB.
Thanks for all the help!
Creating a websocket instead of using http requests solved the issue for me:
AsyncWebSocket ws("/ws");
void setup()
{
ws.onEvent(onWsEvent);
server.addHandler(&ws);
}
AsyncWebSocketClient *wsClient;
void onWsEvent(AsyncWebSocket *server, AsyncWebSocketClient *client, AwsEventType type, void *arg, uint8_t *data, size_t len)
{
if (type == WS_EVT_DATA)
{
AwsFrameInfo *info = (AwsFrameInfo *)arg;
String msg = "";
packetval++;
if (info->final && info->index == 0 && info->len == len)
{
if (info->opcode == WS_TEXT)
{
for (size_t i = 0; i < info->len; i++)
{
msg += (char)data[i];
}
}
}
sensorData.humidity = msg.substring(msg.indexOf("<hum>") + 5, msg.indexOf("</hum>")).toFloat();
sensorData.temperature = msg.substring(msg.indexOf("<tem>") + 5, msg.indexOf("</tem>")).toFloat();
sensorData.isMovement = (msg.substring(msg.indexOf("<isMov>") + 7, msg.indexOf("</isMov>")) == "1");
sensorData.isSound = (msg.substring(msg.indexOf("<isSnd>") + 7, msg.indexOf("</isSnd>")) == "1");
sensorData.luxValue = msg.substring(msg.indexOf("<lux>") + 5, msg.indexOf("</lux>")).toDouble();
sensorData.RSSI = msg.substring(msg.indexOf("<RSSI>") + 6, msg.indexOf("</RSSI>")).toInt();
sensorData.deviceID = msg.substring(msg.indexOf("<dID>") + 5, msg.indexOf("</dID>"));
sensorData.btList = msg.substring(msg.indexOf("<bt>") + 4, msg.indexOf("</bt>"));
if (deviceList.indexOf(sensorData.deviceID) == -1)
{
deviceList += sensorData.deviceID;
activeSensors++;
}
if (sensorData.isMovement || sensorData.isSound)
{
sendDataFlag = true;
}
}
}
This will process more than 11000 packets per minute (200kb/s). The execution time of void onWsEvent(AsyncWebSocket *server, AsyncWebSocketClient *client, AwsEventType type, void *arg, uint8_t *data, size_t len) takes ~500uS now which means there is definitly optimising to do in this function but the time between two calls is reduced all the way to 1ms.

Using a thread pool to parallelize a function makes it slower: why?

I am working a on database than runs on top on RocksDB. I have a find function that takes a query in parameter, iterates over all documents in the database, and returns the documents that match the query. I want to parallelize this function so the work is spread on multiple threads.
To achieve that, I tried to use ThreadPool: I moved the code of the loop in a lambda, and added a task to the thread pool for each document. After the loop, each result is processed by the main thread.
Current version (single thread):
void
EmbeDB::find(const bson_t& query,
DocumentPtrCallback callback,
int32_t limit,
const bson_t* projection)
{
int32_t count = 0;
bson_error_t error;
uint32_t num_query_keys = bson_count_keys(&query);
mongoc_matcher_t* matcher = num_query_keys != 0
? mongoc_matcher_new(&query, &error)
: nullptr;
if (num_query_keys != 0 && matcher == nullptr)
{
callback(&error, nullptr);
return;
}
bson_t document;
rocksdb::Iterator* it = _db->NewIterator(rocksdb::ReadOptions());
for (it->SeekToFirst(); it->Valid(); it->Next())
{
const char* bson_data = (const char*)it->value().data();
int bson_length = it->value().size();
std::vector<char> decrypted_data;
if (encryptionEnabled())
{
decrypted_data.resize(bson_length);
bson_length = decrypt_data(bson_data, bson_length, decrypted_data.data(), _encryption_method, _encryption_key, _encryption_iv);
bson_data = decrypted_data.data();
}
bson_init_static(&document, (const uint8_t*)bson_data, bson_length);
if (num_query_keys == 0 || mongoc_matcher_match(matcher, &document))
{
++count;
if (projection != nullptr)
{
bson_error_t error;
bson_t projected;
bson_init(&projected);
mongoc_matcher_projection_execute_noop(
&document,
projection,
&projected,
&error,
NULL
);
callback(nullptr, &projected);
}
else
{
callback(nullptr, &document);
}
if (limit >= 0 && count >= limit)
{
break;
}
}
}
delete it;
if (matcher)
{
mongoc_matcher_destroy(matcher);
}
}
New version (multi-thread):
void
EmbeDB::find(const bson_t& query,
DocumentPtrCallback callback,
int32_t limit,
const bson_t* projection)
{
int32_t count = 0;
bool limit_reached = limit == 0;
bson_error_t error;
uint32_t num_query_keys = bson_count_keys(&query);
mongoc_matcher_t* matcher = num_query_keys != 0
? mongoc_matcher_new(&query, &error)
: nullptr;
if (num_query_keys != 0 && matcher == nullptr)
{
callback(&error, nullptr);
return;
}
auto process_document = [this, projection, num_query_keys, matcher](const char* bson_data, int bson_length) -> bson_t*
{
std::vector<char> decrypted_data;
if (encryptionEnabled())
{
decrypted_data.resize(bson_length);
bson_length = decrypt_data(bson_data, bson_length, decrypted_data.data(), _encryption_method, _encryption_key, _encryption_iv);
bson_data = decrypted_data.data();
}
bson_t* document = new bson_t();
bson_init_static(document, (const uint8_t*)bson_data, bson_length);
if (num_query_keys == 0 || mongoc_matcher_match(matcher, document))
{
if (projection != nullptr)
{
bson_error_t error;
bson_t* projected = new bson_t();
bson_init(projected);
mongoc_matcher_projection_execute_noop(
document,
projection,
projected,
&error,
NULL
);
delete document;
return projected;
}
else
{
return document;
}
}
else
{
delete document;
return nullptr;
}
};
const int WORKER_COUNT = std::max(1u, std::thread::hardware_concurrency());
ThreadPool pool(WORKER_COUNT);
std::vector<std::future<bson_t*>> futures;
bson_t document;
rocksdb::Iterator* db_it = _db->NewIterator(rocksdb::ReadOptions());
for (db_it->SeekToFirst(); db_it->Valid(); db_it->Next())
{
const char* bson_data = (const char*)db_it->value().data();
int bson_length = db_it->value().size();
futures.push_back(pool.enqueue(process_document, bson_data, bson_length));
}
delete db_it;
for (auto it = futures.begin(); it != futures.end(); ++it)
{
bson_t* result = it->get();
if (result)
{
count += 1;
if (limit < 0 || count < limit)
{
callback(nullptr, result);
}
delete result;
}
}
if (matcher)
{
mongoc_matcher_destroy(matcher);
}
}
With simple documents and query, the single-thread version processes 1 million documents in 0.5 second on my machine.
With the same documents and query, the multi-thread version processes 1 million documents in 3.3 seconds.
Surprisingly, the multi-thread version is way slower. Moreover, I measured the execution time and 75% of the time is spent in the for loop. So basically the line futures.push_back(pool.enqueue(process_document, bson_data, bson_length)); takes 75% of the time.
I did the following:
I checked the value of WORKER_COUNT, it is 6 on my machine.
I tried to add futures.reserve(1000000), thinking that maybe the vector re-allocation was at fault, but it didn't change anything.
I tried to remove the dynamic memory allocations (bson_t* document = new bson_t();), it didn't change the result significantly.
So my question is: is there something that I did wrong for the multi-thread version to be that slower than the single-thread version?
My current understanding is that the synchronization operations of the thread pool (when tasks are enqueued and dequeued) are simply consuming the majority of the time, and the solution would be to change the data-structure. Thoughts?
Parallelization has overhead.
It takes around 500 nanoseconds to process each document in the single-threaded version. There's a lot of bookkeeping that has to be done to delegate work to a thread-pool (both to delegate the work, and to synchronize it afterwards), and all that bookkeeping could very well require more than 500 nanoseconds per job.
Assuming your code is correct, then the bookkeeping takes around 2800 nanoseconds per job. To get a significant speedup from parallelization, you're going to want to break the work into bigger chunks.
I recommend trying to process documents in batches of 1000 at a time. Each future, instead of corresponding to just 1 document, will correspond to 1000 documents.
Other optimizations
If possible, avoid unnecessary copying. If something gets copied a bunch, see if you can capture it by reference instead of by value.

MRPT Graph Slam Minimal Example

I am trying to come up with a "minimal" way of running a graph slam application using MRPT. The sensor data (LaserScan / Odometry) will be provided by a custom middleware similiar to ROS. After reading docs and source codes (both for the MRPT and the ROS bridge) extensively, I came up with the following snippet:
std::string config_file = "../../../laser_odometry.ini";
std::string rawlog_fname = "";
std::string fname_GT = "";
auto node_reg = mrpt::graphslam::deciders::CICPCriteriaNRD<mrpt::graphs::CNetworkOfPoses2DInf>{};
auto edge_reg = mrpt::graphslam::deciders::CICPCriteriaERD<mrpt::graphs::CNetworkOfPoses2DInf>{};
auto optimizer = mrpt::graphslam::optimizers::CLevMarqGSO<mrpt::graphs::CNetworkOfPoses2DInf>{};
auto win3d = mrpt::gui::CDisplayWindow3D{"Slam", 800, 600};
auto win_observer = mrpt::graphslam::CWindowObserver{};
auto win_manager = mrpt::graphslam::CWindowManager{&win3d, &win_observer};
auto engine = mrpt::graphslam::CGraphSlamEngine<mrpt::graphs::CNetworkOfPoses2DInf>{
config_file, rawlog_fname, fname_GT, &win_manager, &node_reg, &edge_reg, &optimizer};
for (size_t measurement_count = 0;;) {
// grab laser scan from the network, then fill it (hardcoded values for now), e.g:
auto scan_ptr = mrpt::obs::CObservation2DRangeScan::Create();
scan_ptr->timestamp = std::chrono::system_clock::now().time_since_epoch().count();
scan_ptr->rightToLeft = true;
scan_ptr->sensorLabel = "";
scan_ptr->aperture = 3.14; // rad (max-min)
scan_ptr->maxRange = 3.0; // m
scan_ptr->sensorPose = mrpt::poses::CPose3D{};
scan_ptr->resizeScan(30);
for (int i = 0; i < 30; ++i) {
scan_ptr->setScanRange(i, 0.5);
scan_ptr->setScanRangeValidity(i, true);
}
{ // Send LaserScan measurement to the slam engine
auto obs_ptr = std::dynamic_pointer_cast<mrpt::obs::CObservation>(scan_ptr);
engine.execGraphSlamStep(obs_ptr, measurement_count);
++measurement_count;
}
// grab odometry from the network, then fill it (hardcoded values for now), e.g:
auto odometry_ptr = mrpt::obs::CObservationOdometry::Create();
odometry_ptr->timestamp = std::chrono::system_clock::now().time_since_epoch().count();
odometry_ptr->hasVelocities = false;
odometry_ptr->odometry.x(0);
odometry_ptr->odometry.y(0);
odometry_ptr->odometry.phi(0);
{ // Send Odometry measurement to the slam engine
auto obs_ptr = std::dynamic_pointer_cast<mrpt::obs::CObservation>(odometry_ptr);
engine.execGraphSlamStep(obs_ptr, measurement_count);
++measurement_count;
}
// Get pose estimation from the engine
auto pose = engine.getCurrentRobotPosEstimation();
}
Am I in the right direction here? Did I miss something?
Hmm, at a first look the script seems fine, you are providing odometry and the laser scan in two different steps and in Observation form.
Minor note
auto node_reg = mrpt::graphslam::deciders::CICPCriteriaNRD{};
If you want to run with Odometry + laser scans use CFixedIntervalsNRD instead. It's much better tested and actually makes use of those measurements.
There is no minimal graphslam-engine example at present in MRPT but here's here's the main method for running graph-slam with datasets:
https://github.com/MRPT/mrpt/blob/26ee0f2d3a9366c50faa5f78d0388476ae886808/libs/graphslam/include/mrpt/graphslam/apps_related/CGraphSlamHandler_impl.h#L395
template <class GRAPH_T>
void CGraphSlamHandler<GRAPH_T>::execute()
{
using namespace mrpt::obs;
ASSERTDEB_(m_engine);
// Variables initialization
mrpt::io::CFileGZInputStream rawlog_stream(m_rawlog_fname);
CActionCollection::Ptr action;
CSensoryFrame::Ptr observations;
CObservation::Ptr observation;
size_t curr_rawlog_entry;
auto arch = mrpt::serialization::archiveFrom(rawlog_stream);
// Read the dataset and pass the measurements to CGraphSlamEngine
bool cont_exec = true;
while (CRawlog::getActionObservationPairOrObservation(
arch, action, observations, observation, curr_rawlog_entry) &&
cont_exec)
{
// actual call to the graphSLAM execution method
// Exit if user pressed C-c
cont_exec = m_engine->_execGraphSlamStep(
action, observations, observation, curr_rawlog_entry);
}
m_logger->logFmt(mrpt::system::LVL_WARN, "Finished graphslam execution.");
}
You basically grab the data and then continuously feed them to CGraphSlamEngine via either execGraphSlamStep or _execGraphSlamStep methods.
Here's also the relevant snippet for processing measurements in the corresponding ROS wrapper that operates with measurements from ROS topics:
https://github.com/mrpt-ros-pkg/mrpt_slam/blob/8b32136e2a381b1759eb12458b4adba65e2335da/mrpt_graphslam_2d/include/mrpt_graphslam_2d/CGraphSlamHandler_ROS_impl.h#L719
template<class GRAPH_T>
void CGraphSlamHandler_ROS<GRAPH_T>::processObservation(
mrpt::obs::CObservation::Ptr& observ) {
this->_process(observ);
}
template<class GRAPH_T>
void CGraphSlamHandler_ROS<GRAPH_T>::_process(
mrpt::obs::CObservation::Ptr& observ) {
using namespace mrpt::utils;
if (!this->m_engine->isPaused()) {
this->m_engine->execGraphSlamStep(observ, m_measurement_cnt);
m_measurement_cnt++;
}
}

co_await HttpClient::PutAsync never completes (using c++, coroutine, UWP)

I do http requests and when they fail I repeat the requests. I simulate it by unplugging the internet cable to my computer and replugging it. Most of the time my code works, but sometimes
co_await HttpClient::PutAsync() never "returns" or completes.
My code looks something like this:
task<Aws::Result> Aws::S3Request::tryExecute()
{
// content
IHttpContent^ httpContent;
httpContent = ref new HttpBufferContent(mBufferContent);
httpContent->Headers->ContentType = ref new Headers::HttpMediaTypeHeaderValue(ref new String(mContentType.c_str()));
std::wstring signature = getSignature();
// client
auto filter = ref new ::Filters::HttpBaseProtocolFilter;
filter->AllowUI = false;
filter->CacheControl->ReadBehavior = Filters::HttpCacheReadBehavior::NoCache;
filter->CacheControl->WriteBehavior = Filters::HttpCacheWriteBehavior::NoCache;
HttpClient^ httpClient = ref new HttpClient(filter);
httpClient->DefaultRequestHeaders->Authorization = ref new Headers::HttpCredentialsHeaderValue(L"AWS", ref new String((mUser.mAccessKey + L":" + signature).c_str()));
httpClient->DefaultRequestHeaders->Append(L"Date", ref new String(mDate.c_str()));
httpClient->DefaultRequestHeaders->Append(L"x-amz-acl", L"public-read");
httpClient->DefaultRequestHeaders->Append(L"x-amz-security-token", ref new String(mUser.mSessionToken.c_str()));
// http req
Uri^ uri = ref new Uri(ref new String(mUri.c_str()));
HttpResponseMessage^ response;
IAsyncOperationWithProgress<HttpResponseMessage^, HttpProgress>^ progress;
try
{
progress = httpClient->PutAsync(uri, httpContent);
progress->Progress = ref new AsyncOperationProgressHandler<HttpResponseMessage^, HttpProgress>(
[this](IAsyncOperationWithProgress<HttpResponseMessage^, HttpProgress>^ pretask, HttpProgress progressInfo)
{
/*double got = progressInfo.BytesReceived + progressInfo.BytesSent;
double total = 0;
if (progressInfo.TotalBytesToReceive)
total += progressInfo.TotalBytesToReceive->Value;
if (progressInfo.TotalBytesToSend)
total += progressInfo.TotalBytesToSend->Value;
float progress = (total >= 1) ? float(got / total) : 0.0f;
debugOut("--- part progress = ", progress);
if (mProgress)
*mProgress = progress;*/
}
);
response = co_await progress;
progress = nullptr;
if (!response->IsSuccessStatusCode)
debugOut("--- http fail error");
}
catch (Platform::COMException^)
{
debugOut("--- http fail nointernet");
progress = nullptr;
mResult = NoInternet;
mAnswer.clear();
co_return mResult;
}
// answer
mAnswer = response->Content->ToString()->Data();
debugOut(L"--- Http answer=", mAnswer);
mResult = response->IsSuccessStatusCode ? Success : Error;
co_return mResult;
}
If it fails, tryExecute() is called again.
By the way, the IAsyncOperationWithProgress only gives me either 0% or 100% and not the inbetweens. I upload sizes of 5 Mbytes in each request.
This works, but is not a good solution:
//response = co_await progress;
for (int timeout = 5*60*100; true; timeout--)
{
if (progress->Status != AsyncStatus::Started)
break;
co_await winrt::resume_after(10ms);
if (timeout == 0)
{
debugOut("--- http fail timeout");
mResult = NoInternet;
mAnswer.clear();
co_return mResult;
}
}
response = progress->GetResults();