cereal + armadillo + json serialization - c++

Does anyone have an example of Cereal based armadillo matrix serialization to JSON? Binary serialization below seems to be working.
Inside mat_extra_meat.hpp
template<class Archive, class eT>
typename std::enable_if<cereal::traits::is_output_serializable<cereal::BinaryData<eT>, Archive>::value, void>::type
save( Archive & ar, const Mat<eT>& m ) {
uword n_rows = m.n_rows;
uword n_cols = m.n_cols;
ar( n_rows );
ar( n_cols );
ar( cereal::binary_data(
reinterpret_cast< void * const >( const_cast< eT* >( m.memptr() ) ),
static_cast< std::size_t >( n_rows * n_cols * sizeof( eT ) ) ) );
}
template<class Archive, class eT>
typename std::enable_if<cereal::traits::is_input_serializable<cereal::BinaryData<eT>, Archive>::value, void>::type
load( Archive & ar, Mat<eT>& m ) {
uword n_rows;
uword n_cols;
ar( n_rows );
ar( n_cols );
m.resize( n_rows, n_cols );
ar( cereal::binary_data(
reinterpret_cast< void * const >( const_cast< eT* >( m.memptr() ) ),
static_cast< std::size_t >( n_rows * n_cols * sizeof( eT ) ) ) );
}
Test with this:
int main( int argc, char** argv ) {
arma::mat xx1 = arma::randn( 10, 20 );
std::ofstream ofs( "test", std::ios::binary );
cereal::BinaryOutputArchive o( ofs );
o( xx1 );
ofs.close();
// Now load it.
arma::mat xx2;
std::ifstream ifs( "test", std::ios::binary );
cereal::BinaryInputArchive i( ifs );
i( xx2 );
}

You have two options for JSON serialization - you can take a quick and dirty approach that won't really be human readable, or you can make it human readable at the cost of increased serialization size and time.
For the quick version, you can modify your existing code to use saveBinaryValue and loadBinaryValue, which exist within the text archives of cereal (JSON and XML).
e.g.:
ar.saveBinaryValue( reinterpret_cast<void * const>( const_cast< eT* >( m.memptr() ) ),
static_cast<std::size_t>( n_rows * n_cols * sizeof( eT ) ) );
and similarly for the load.
This will base64 encode your data and write it as a string. You would of course need to specialize the function to only apply to text archives (or just JSON) within cereal.
The alternative is to individually serialize each element. You have two choices again here, the first is to serialize as a JSON array (e.g. myarray: [1, 2, 3, 4, 5, ...]) or as a bunch of individual name-value-pairs: "array1" : "1", "array2": "2", ...
The convention in cereal has been to use JSON arrays for dynamically re-sizable containers (e.g. vector), but because we're purely emphasizing readability with this example, I'll use arrays even though your armadillo matrix would not be something you would like users to be able to add or remove elements from using JSON:
namespace arma
{
// Wraps a particular column in a class with its own serialization function.
// This is necessary because cereal expects actual data to follow a size_tag, and can't
// serialize two size_tags back to back without creating a new node (entering a new serialization function).
//
// This wrapper serves the purpose of creating a new node in the JSON serializer and allows us to
// then serialize the size_tag, followed by the actual data
template <class T>
struct ColWrapper
{
ColWrapper(T && m, int c, int nc) : mat(std::forward<T>(m)), col(c), n_cols(nc) {}
T & mat;
int col;
int n_cols;
template <class Archive>
void save( Archive & ar ) const
{
ar( cereal::make_size_tag( mat.n_rows ) );
for( auto iter = mat.begin_col(col), end = mat.end_col(col); iter != end; ++iter )
ar( *iter );
}
template <class Archive>
void load( Archive & ar )
{
cereal::size_type n_rows;
// Test to see if we need to resize the data
ar( cereal::make_size_tag( n_rows ) );
if( mat.n_rows != n_rows )
mat.resize( n_rows, n_cols );
for( auto iter = mat.begin_col(col), end = mat.end_col(col); iter != end; ++iter )
ar( *iter );
}
};
// Convenience function to make a ColWrapper
template<class T> inline
ColWrapper<T> make_col_wrapper(T && t, int c, int nc)
{
return {std::forward<T>(t), c, nc};
}
template<class Archive, class eT, cereal::traits::EnableIf<cereal::traits::is_text_archive<Archive>::value> = cereal::traits::sfinae>
inline void save( Archive & ar, const Mat<eT>& m )
{
// armadillo stored in column major order
uword n_rows = m.n_rows;
uword n_cols = m.n_cols;
// First serialize a size_tag for the number of columns. This will make expect a dynamic
// sized container, which it will output as a JSON array. In reality our container is not dynamic,
// but we're going for readability here.
ar( cereal::make_size_tag( n_cols ) );
for( auto i = 0; i < n_cols; ++i )
// a size_tag must be followed up with actual serializations that create nodes within the JSON serializer
// so we cannot immediately make a size_tag for the number of rows. See ColWrapper for more details
ar( make_col_wrapper(m, i, n_cols) );
}
template<class Archive, class eT, cereal::traits::EnableIf<cereal::traits::is_text_archive<Archive>::value> = cereal::traits::sfinae>
inline void load( Archive & ar, Mat<eT>& m )
{
// We're doing essentially the same thing here, but loading the sizes and performing the resize for the matrix
// within ColWrapper
cereal::size_type n_rows;
cereal::size_type n_cols;
ar( cereal::make_size_tag( n_cols ) );
for( auto i = 0; i < n_cols; ++i )
ar( make_col_wrapper(m, i, n_cols) );
}
} // end namespace arma
Example program to run the above:
int main(int argc, char* argv[])
{
std::stringstream ss;
std::stringstream ss2;
{
arma::mat A = arma::randu<arma::mat>(4, 5);
cereal::JSONOutputArchive ar(ss);
ar( A );
}
std::cout << ss.str() << std::endl;
{
arma::mat A;
cereal::JSONInputArchive ar(ss);
ar( A );
{
cereal::JSONOutputArchive ar2(ss2);
ar2( A );
}
}
std::cout << ss2.str() << std::endl;
return 0;
}
and its output:
{
"value0": [
[
0.786820954867802,
0.2504803406880287,
0.7106712289786555,
0.9466678009609704
],
[
0.019271058195813773,
0.40490214481616768,
0.25131781792803756,
0.02271243862792676
],
[
0.5206431525734917,
0.34467030607918777,
0.27419560360286257,
0.561032100176393
],
[
0.14003945653337478,
0.5438560675050177,
0.5219157100717673,
0.8570772835528213
],
[
0.49977436000503835,
0.4193700240544483,
0.7442805199715539,
0.24916812957858262
]
]
}
{
"value0": [
[
0.786820954867802,
0.2504803406880287,
0.7106712289786555,
0.9466678009609704
],
[
0.019271058195813773,
0.40490214481616768,
0.25131781792803756,
0.02271243862792676
],
[
0.5206431525734917,
0.34467030607918777,
0.27419560360286257,
0.561032100176393
],
[
0.14003945653337478,
0.5438560675050177,
0.5219157100717673,
0.8570772835528213
],
[
0.49977436000503835,
0.4193700240544483,
0.7442805199715539,
0.24916812957858262
]
]
}

Related

Find an element in vector<vector<string>>

I store this file in vector<vector<string>>:
1 a aa # vector of string stored to `vector<vector<string>>`
2 b bb
3 c cc # c -> index == 2
4 d dd
C++ code:
vector<vector<string>> myvect =
{{"1","a","aa"},
{"2","b","bb"},
{"3","c","cc"},
{"4","d","dd"}};
How can I search for c in the second column and get its index (I know it is in the second vector) - the output should be 2.
I want to use find or find_if function.
If you specificially want to search the 2nd column of the inner vector you can use a transform_iterator and regular find.
transform_iterator is in boost would look something like:
std::vector< std::vector< std::string > > v;
auto lambda = [] ( std::vector< std::string > const& v ) { return v[1]; };
auto transform_end = boost::make_transform_iterator ( v.end() );
return std::find( boost::make_transform_iterator( v.begin(), lambda ),
transform_end, "c" ) != transform_end;
If your inner lambda is to find "c" in any position I wouldn't use transform iterator here as we want to return a true/false on each inner vector, not just some transformed value, and we would use find_if on the outer-vector and find on the inner one
std::string val = "c";
auto lambda = [ const & ]( std::vector< std::string > const& vInner )
{ return std::find( vInner.begin(), vInner.end(), val ) != v.end(); } ;
return std::find_if( v.begin(), v.end(), lambda );
You can try something similar to the code below
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
int main()
{
std::vector<std::vector<std::string>> v =
{
{ "1", "a", "aa" },
{ "2", "b", "bb" },
{ "3", "c", "cc" },
{ "4", "d", "dd" }
};
std::vector<std::string>::iterator second;
std::string s = "c";
auto first = std::find_if( v.begin(), v.end(),
[&]( std::vector<std::string> &v1 )
{
return (( second = std::find( v1.begin(), v1.end(), s ) ) != v1.end() );
} );
if ( first != v.end() )
{
size_t i = std::distance( v.begin(), first );
size_t j = std::distance( v[i].begin(), second );
std::cout << v[i][j] << std::endl;
}
return 0;
}
The output is
c
You can do this:
int column = 1; // Set this to the column you need to search;
string target( "c" ); // Set this to the value you need to find
auto found = find_if( myvect.begin(), myvect.end(), [=]( vector< string > row ){ return row[column] == target; } );
cout << ( found == myvect.end() ? "not found" : ( *found2 )[column] ) << endl;
C++11 wont let you define column or target in the capture, if you want to avoid intermediate variables in C++11 though, you can do this, it's just ugly cause of the static_cast. You'd just have to set the "c" and 1 to the target and column:
auto found = find_if( myvect.begin(), myvect.end(), bind( equal_to< string >(), "c", bind( static_cast< const string&( vector<string>::* )( size_t ) const >( &vector< string >::operator[] ), placeholders::_1, 1 ) ) );
I personally would suggest that if your row size is always the same that you put it in a single std::vector like this: vector<string> myvect = { "1", "a", "aa", "2", "b", "bb", "3", "c", "cc", "4", "d", "dd" }; if you do that you can write a template to search for you which will have significantly more flexibility:
template< typename T, int stride >
T* templateFind( const vector< T >& myvect, const T& target, int column )
{
typedef array< T, stride > rowSize;
rowSize* end = ( rowSize* )( &*( myvect.begin() ) ) + ( myvect.size() / stride );
rowSize* result = find_if( ( rowSize* )( &*( myvect.begin() ) ), end, [&]( rowSize row ){ return row[column] == target; } );
return result == end ? nullptr : ( ( T* )result ) + column;
}
And use it like this:
string* found = templateFind< string, 3 >( myvect, "c", 1 );
cout << ( found == nullptr ? "not found" : *found ) << endl;

Struct alignment/endianess check

I have this kind of C structure.
struct uart_buff
{
uart_buff(uint32_t reg_Addr, uint32_t uValue)
{
addr=reg_Addr;
data=uValue;
terminator=0xFF;
}
uint32_t addr;
uint32_t data;
uint8_t terminator;
};
I would to print each byte (9 if I'm right) of this struct (as an Hexadecimal value) in order to check if the syntax is correct. Is there a simple way to do such a thing?
Yes, you can access the struct through an unsigned char pointer,
struct uart_buff buf = ...;
unsigned char *p = (unsigned char *)&buf;
size_t i;
for(i = 0; i < sizeof buf; i++) {
printf("%02X ", p[i]);
}
I've got a template class that I usually use for this sort of
thing:
template<typename T>
class Dump
{
unsigned char const*myObj;
public:
explicit Dump( T const& obj )
: myObj( reinterpret_cast<unsigned char const*>( &obj ) )
{
}
friend std::ostream& operator<<( std::ostream& dest, Dump const& obj )
{
IOSave saver( dest ) ;
dest.fill( '0' ) ;
dest.setf( std::ios::hex, std::ios::basefield ) ;
char const* baseStr = "" ;
if ( (dest.flags() & std::ios::showbase) != 0 ) {
baseStr = "0x" ;
}
unsigned char const* const
end = obj.myObj + sizeof( T ) ;
for ( unsigned char const* p = myObj ; p != end ; ++ p ) {
if ( p != myObj ) {
dest << ' ' ;
}
dest << baseStr << std::setw( 2 ) << (unsigned int)( *p ) ;
}
}
} ;
template< typename T >
inline Dump< T >
dump(
T const& obj )
{
return Dump< T >( obj ) ;
}
(IOSave is just the usual class to save and restore the
formatting parameters when they're going to be modified.)
This allows dumping a hex image of pretty much anything, just by
writing:
std::cout << dump( myObj ) << std::endl;

How to optimize out the space a class reference member takes?

template<typename T>
struct UninitializedField
{
T& X;
inline UninitializedField( ) : X( *( T* )&DATA )
{
}
protected:
char DATA[ sizeof( T ) ];
};
int main( )
{
UninitializedField<List<int>> LetsTest;
printf( "%u, %u\n", sizeof( LetsTest ), sizeof( List<int> ) );
}
I am trying to program a class that wraps an object without being automatically initialize\constructed.
But when I execute my program the output is:
8, 4
Is there a way to optimize out the dereference to get into the object in X and the space it takes?
template<typename T>
struct UninitializedField {
__inline UninitializedField( const T &t ) {
*( ( T* )this ) = t;
}
__inline UninitializedField( bool Construct = false, bool Zero = true ) {
if ( Zero )
memset( this, 0, sizeof( *this ) );
if ( Construct )
*( ( T* )this ) = T( );
}
__inline T *operator->( ) {
return ( T* )this;
}
__inline T &operator*( ) {
return *( ( T* )this );
}
protected:
char DATA[ sizeof( T ) ];
};
There isn't any space taken, and with compiler-optimization on there's no call to function.

c++ visualizing memory of variable

I would like to see the structure of the memory, allocated to two different variables.
The attention behind this, is to understand how the memory is structured in order of storing different datatypes.
How is it done in C++?
//how to show, whats in memory in &var1 &var2 ?
short var1 = 2;
string var2 = "bla";
If you are using Eclipse, you can use the Memory View in the debug perspective.
Either that, or simply create a pointer to your variables and inspect the contents of those:
short var1 = 2;
string var2 = "bla";
char* pVar1 = (char*)&var1; //point to memory storing var1
char* pVar2 = (char*)&var2; //point to memory storing var2
If you're using MSVS, you can open the Memory tab and write the address you wish to inspect.
You must be in debug - Debug -> Windows -> Memory.
I usually use something like the following:
template< typename T >
class Dump
{
public:
explicit Dump( T const& obj ) ;
void print( std::ostream& dest ) const ;
friend std::ostream& operator<<( std::ostream& dest, Dump const& source )
{
source.print( dest );
return source;
}
private:
unsigned char const*myObj ;
} ;
template< typename T >
inline Dump< T >
dump(
T const& obj )
{
return Dump< T >( obj ) ;
}
template< typename T >
Dump< T >::Dump(
T const& obj )
: myObj( reinterpret_cast< unsigned char const* >( &obj ) )
{
}
template< typename T >
void
Dump< T >::print(
std::ostream& dest ) const
{
IOSave saver( dest ) ;
dest.fill( '0' ) ;
dest.setf( std::ios::hex, std::ios::basefield ) ;
char const* baseStr = "" ;
if ( (dest.flags() & std::ios::showbase) != 0 ) {
baseStr = "0x" ;
dest.unsetf( std::ios::showbase ) ;
}
unsigned char const* const
end = myObj + sizeof( T ) ;
for ( unsigned char const* p = myObj ; p != end ; ++ p ) {
if ( p != myObj ) {
dest << ' ' ;
}
dest << baseStr << std::setw( 2 ) << (unsigned int)( *p ) ;
}
}
IOSave is a simple class which saves the formatting state (flags,
fill and precision) in the constructor, and restores them in the
destructor.

Using global C++object from C crashes application

This is my first post, I'm new to this site, but I've been lurking around for a while now. I've got a good knowledge of C and very limited knowledge of C++. I guess. I'm on Windows (XPx64), VS2008.
I'm trying to wrap a C++ library, kdtree2, so that I can use it from C. The main issues relate to accessing the kdtree2 and kdtree2_result_vector classes. As the authors ftp server does not respond I've uploaded a copy of the original distribution kdtree2 src
Just some quick info on the kd-tree (a form of a binary tree), "'the data' are coordinates in n-dimensional Cartesian space and an index. What it is used for are nearest neighbour searches, so after constructing the tree (which will not be modified), one can query the tree for various types of nn-searches. The results in this case are returned in a vector object of structs (c-like structs).
struct kdtree2_result {
//
// the search routines return a (wrapped) vector
// of these.
//
public:
float dis; // its square Euclidean distance
int idx; // which neighbor was found
};
My imagined solution is to have an array of kdtree2 objects (one per thread). For the kdtree2_result_vector class I haven't got a solution yet as I'm not getting past first base. It is not necessary to access the kdtree2 class directly.
I only need to fill it with data and then use it (as the second function below is an example of). For this I've defined:
kdtree2 *global_kdtree2;
extern "C" void new_kdtree2 ( float **data, const int n, const int dim, bool arrange ) {
multi_array_ref<float,2> kdtree2_data ( ( float * ) &data [ 0 ][ 0 ], extents [ n ][ dim ], c_storage_order ( ) );
global_kdtree2 = new kdtree2 ( kdtree2_data, arrange );
}
For then using that tree, I've defined:
extern "C" void n_nearest_around_point_kdtree2 ( int idxin, int correltime, int nn ) {
kdtree2_result_vector result;
global_kdtree2->n_nearest_around_point ( idxin, correltime, nn, result );
}
kdtree2_result_vector is derived from the vector class. This compiles without error, and the resulting library can be linked and it's C-functions accessed from C.
The problem is that the invocation of n_nearest_around_point_kdtree2 crashes the program. I suspect somehow between setting up the tree and using it in the second function call, the tree somehow gets freed/destroyed. The calling c-test-program is posted below:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "kdtree2.h"
#define MALLOC_2D(type,x,y) ((type**)malloc_2D_kdtree2((x),(y),sizeof(type)))
void **malloc_2D_kdtree2 ( const int x, const int y, const int type_size ) {
const int y_type_size = y * type_size;
void** x_idx = ( void ** ) malloc ( x * ( sizeof ( void ** ) + y_type_size ) );
if ( x_idx == NULL )
return NULL;
char* y_idx = ( char * ) ( x_idx + x );
for ( int i = 0; i < x; i++ )
x_idx [ i ] = y_idx + i * y_type_size;
return x_idx;
}
int main ( void ) {
float **data = MALLOC_2D ( float, 100, 3 );
for ( int i = 0; i < 100; i++ )
for ( int j = 0; j < 3; j++ )
data [ i ][ j ] = ( float ) ( 3 * i + j );
// this works fine
tnrp ( data, 100, 3, false );
new_kdtree2 ( data, 100, 3, false );
// this crashes the program
n_nearest_around_point_kdtree2 ( 9, 3, 6 );
delete_kdtree2 ( );
free ( data );
return 0;
}
As far as I can see, searching the internet, it should work, but I'm obviously missing something vital in the brave (for me) new world of C++.
EDIT:
Resolution, thanks to larsmans. I've defined the following class (derived from what larsmans posted earlier):
class kdtree {
private:
float **data;
multi_array_ref<float,2> data_ref;
kdtree2 tree;
public:
kdtree2_result_vector result;
kdtree ( float **data, int n, int dim, bool arrange ) :
data_ref ( ( float * ) &data [ 0 ][ 0 ], extents [ n ][ dim ], c_storage_order ( ) ),
tree ( data_ref, arrange )
{
}
void n_nearest_brute_force ( std::vector<float>& qv ) {
tree.n_nearest_brute_force ( qv, result ); }
void n_nearest ( std::vector<float>& qv, int nn ) {
tree.n_nearest ( qv, nn, result ); }
void n_nearest_around_point ( int idxin, int correltime, int nn ) {
tree.n_nearest_around_point ( idxin, correltime, nn, result ); }
void r_nearest ( std::vector<float>& qv, float r2 ) {
tree.r_nearest ( qv, r2, result ); }
void r_nearest_around_point ( int idxin, int correltime, float r2 ) {
tree.r_nearest_around_point ( idxin, correltime, r2, result ); }
int r_count ( std::vector<float>& qv, float r2 ) {
return tree.r_count ( qv, r2 ); }
int r_count_around_point ( int idxin, int correltime, float r2 ) {
return tree.r_count_around_point ( idxin, correltime, r2 ); }
};
The code to call these functions from C:
kdtree* global_kdtree2 [ 8 ];
extern "C" void new_kdtree2 ( const int thread_id, float **data, const int n, const int dim, bool arrange ) {
global_kdtree2 [ thread_id ] = new kdtree ( data, n, dim, arrange );
}
extern "C" void delete_kdtree2 ( const int thread_id ) {
delete global_kdtree2 [ thread_id ];
}
extern "C" void n_nearest_around_point_kdtree2 ( const int thread_id, int idxin, int correltime, int nn, struct kdtree2_result **result ) {
global_kdtree2 [ thread_id ]->n_nearest_around_point ( idxin, correltime, nn );
*result = &( global_kdtree2 [ thread_id ]->result.front ( ) );
}
and eventually the C-program to start using it all:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "kdtree2.h"
int main ( void ) {
float **data = MALLOC_2D ( float, 100, 3 );
for ( int i = 0; i < 100; i++ )
for ( int j = 0; j < 3; j++ )
data [ i ][ j ] = ( float ) ( 3 * i + j );
int thread_id = 0;
new_kdtree2 ( thread_id, data, 100, 3, false );
struct kdtree2_result *result;
n_nearest_around_point_kdtree2 ( thread_id, 28, 3, 9, &result );
for ( int i = 0; i < 9; i++ )
printf ( "result[%d]= (%d,%f)\n", i , result [ i ].idx, result [ i ].dis );
printf ( "\n" );
n_nearest_around_point_kdtree2 ( thread_id, 9, 3, 6, &result );
for ( int i = 0; i < 6; i++ )
printf ( "result[%d]= (%d,%f)\n", i , result [ i ].idx, result [ i ].dis );
delete_kdtree2 ( thread_id );
free ( data );
return 0;
}
The API docs in the referenced paper are rather flaky and the author's FTP server doesn't respond, so I can't tell with certainty, but my hunch is that
multi_array_ref<float,2> kdtree2_data((float *)&data[0][0], extents[n][dim],
c_storage_order( ));
global_kdtree2 = new kdtree2(kdtree2_data, arrange);
construct the kdtree2 by storing a reference to kdtree2_data in the global_kdtree2 object, rather than making a full copy. Since kdtree2_data is a local variable, it is destroyed when new_kdtree2 returns. You'll have to keep it alive until n_nearest_around_point_kdtree2 is done.