How does CUB's TexRefInputIterator work? - c++

CUB provides an iterator for texture references, the implementation of which is readily accessible.
Since I couldn't figure out how to implement template-able texture references myself - they "can only be declared as a static global variable" - I am now trying to understand how it's done in CUB. But some of it is beyond my C++ knowledge, and I haven't been able to find the answers elsewhere (then again, I don't really know what to search for).
Specifically:
Is the unnamed namespace surrounding IteratorTexRef significant? I can only think that it is to limit IteratorTexRef::TexId::ref to file/translation unit scope.
What is the purpose of IteratorTexRef? It only wraps TexId, but removing it results in unintelligible (to me) compile-time errors.
This code, a stripped-down version of the linked-to implementation, compiles and runs:
#include <thrust/device_vector.h>
namespace {
template <typename T>
struct IteratorTexRef
{
template <int UNIQUE_ID>
struct TexId
{
// Assume T is a valid texture word size.
typedef texture<T> TexRef;
static TexRef ref;
static __device__ T fetch(ptrdiff_t offset)
{
return tex1Dfetch(ref, offset);
}
};
};
template <typename T>
template <int UNIQUE_ID>
typename IteratorTexRef<T>:: template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>:: template TexId<UNIQUE_ID>::ref;
} // Anomymous namespace
template <typename T, int UNIQUE_ID = 0>
class TextureRefIterator
{
private:
typedef typename IteratorTexRef<T>:: template TexId<UNIQUE_ID> TexId;
ptrdiff_t tex_offset;
public:
__device__ T operator[](int i) const
{
return TexId::fetch(this->tex_offset + i);
}
cudaError_t bind(
const T* const ptr,
size_t bytes = size_t(-1))
{
size_t offset;
cudaError_t state = cudaBindTexture(&offset, TexId::ref, ptr, bytes);
this->tex_offset = (ptrdiff_t) (offset / sizeof(T));
return state;
}
};
template <typename TexIter>
__global__ void kernel(TexIter iter)
{
int a = iter[threadIdx.x];
printf("tid %d, a %d\n", threadIdx.x, a);
}
template <typename T>
void launch_kernel(T* d_in)
{
TextureRefIterator<T> tex_iter;
tex_iter.bind(d_in);
kernel<<<1, 32>>>(tex_iter);
}
int main()
{
thrust::device_vector<float> d_in(32, 1);
launch_kernel(thrust::raw_pointer_cast(d_in.data()));
}
The closest I got was something similar to the below, based on how one would normally access a static template member. For clarity, the below simply eliminates IteratorTexRef from the above:
#include <thrust/device_vector.h>
namespace {
template <typename T, int UNIQUE_ID>
struct TexId
{
// Assume T is a valid texture word size.
typedef texture<T> TexRef;
static TexRef ref;
static __device__ T fetch(ptrdiff_t offset)
{
return tex1Dfetch(ref, offset);
}
};
template <typename T, int UNIQUE_ID>
typename TexId<T, UNIQUE_ID>::TexRef TexId<T, UNIQUE_ID>::ref;
} // Anonymous namespace
template <typename T, int UNIQUE_ID = 0>
class TextureRefIterator
{
private:
typedef TexId<T, UNIQUE_ID> TexId;
ptrdiff_t tex_offset;
public:
__device__ T operator[](int i) const
{
return TexId::fetch(this->tex_offset + i);
}
cudaError_t bind(
const T* const ptr,
size_t bytes = size_t(-1))
{
size_t offset;
cudaError_t state = cudaBindTexture(&offset, TexId::ref, ptr, bytes);
this->tex_offset = (ptrdiff_t) (offset / sizeof(T));
return state;
}
};
template <typename TexIter>
__global__ void kernel(TexIter iter)
{
int a = iter[0];
printf("tid %d, a %d\n", threadIdx.x, a);
}
template <typename T>
void launch_kernel(T* d_in)
{
TextureRefIterator<T> tex_iter;
tex_iter.bind(d_in);
kernel<<<1, 32>>>(tex_iter);
}
int main()
{
thrust::device_vector<float> d_in(32, 1);
launch_kernel(thrust::raw_pointer_cast(d_in.data()));
}
It gives these somewhat esoteric compile-time errors. (Compiled with nvcc iter.cu and CUDA 7.0):
In file included from tmpxft_000057d4_00000000-4_test2.cudafe1.stub.c:1:0:
/tmp/tmpxft_000057d4_00000000-4_test2.cudafe1.stub.c:30:3737: error: macro "__text_var" passed 3 arguments, but takes just 2
dIfLi0EE3refE,::_NV_ANON_NAMESPACE::TexId<float, (int)0> ::ref), 1, 0, 0);__cudaReg
^
/tmp/tmpxft_000057d4_00000000-4_test2.cudafe1.stub.c:30:1: error: macro "__device__text_var" passed 3 arguments, but takes just 2
static void __nv_cudaEntityRegisterCallback(void **__T2202){__nv_dummy_param_ref(__
^
/tmp/tmpxft_000057d4_00000000-4_test2.cudafe1.stub.c:30:1: error: macro "__name__text_var" passed 3 arguments, but takes just 2

That compile error is due to the generated code using macros that contain template types so the commas in the templates make the preprocessor think that that they are more arguments. I fixed this by patching the crt/host_runtime header and making the cpp parameter of those macros (__text_var, __device__text_var, and __name__text_var) variadic. In other words, replace cpp with cpp....

Related

C++ Lookup and retrieve a C++ type based on a run-time byte/char

I receive a large number of different messages. Each message contains a messageType byte, which I use to reinterpret_cast to a corresponding struct.
So there's an implicit, compile-time mapping between the message type and the struct representing the message.
Here's a basic implementation:
const char* bytes = "";
const MessageType messageType = bytes[0];
switch(messageType)
{
case MessageType::kMessage1:
{
const Message1& msg = reinterpret_cast<const Message1&>(bytes);
return msg;
}
case MessageType::kMessage2:
{
const Message2& msg = reinterpret_cast<const Message2&>(bytes);
.
.
.
.
}
However, there's a lot of messages/case statements to write, each one possibly leading to a mismatching bug.
In an ideal world I'd like to be able to simply retrieve the type from a container (array) using messageType:
const typeMap.at(messageType)& msg = reinterpret_cast<const typeMap.at(messageType)&>(bytes);
What are the available solutions?
C++ templates cannot generate type-based cases. Instead you can recursively call a static method in a template class. For example, reflect::cast:
#include <cinttypes>
#include <stdexcept>
#include <iostream>
template <char, typename>
struct p;
template <typename ...>
struct reflect;
template <>
struct reflect<> {
template <typename callback>
[[noreturn]] static auto cast(char const*, callback const&) {
throw std::runtime_error{"bad cast"};
}
};
template <char index, typename type, typename ... rest>
struct reflect<p<index, type>, rest...> : reflect <rest...> {
static_assert(std::is_trivial_v<type>);
template <typename callback>
static auto cast(char const* bytes, callback const& function) {
if (bytes[0] == index) {
return function(reinterpret_cast<type const*>(bytes + 1));
}
return reflect<rest...>::cast(bytes, function);
}
};
struct A {
char value;
};
struct B {
std::int16_t value;
};
using typeMap = reflect<
p<0, A>,
p<1, B>
>;
int main(){
typeMap::cast(
"\x01\x2A\x00",
[](auto undefined){
std::cout << undefined->value;
});
}
The generated typeMap::cast is equivalent to the following code, which is similar to the switch statement from the question:
if (bytes[0] == 0) {
return function(reinterpret_cast<A const*>(bytes + 1));
}
if (bytes[0] == 1) {
return function(reinterpret_cast<B const*>(bytes + 1));
}
throw std::runtime_error{"bad cast"};

C++ Get Name of Function from Template

for Debugging purposes I'd like to extract the name of the function from a template argument. However I'm only getting the functions signature not an actual name.
namespace internal
{
static const unsigned int FRONT_SIZE = sizeof("internal::GetTypeNameHelper<") - 1u;
static const unsigned int BACK_SIZE = sizeof(">::GetTypeName") - 1u;
template<typename T>
struct GetTypeNameHelper
{
static const char* GetTypeName(void)
{
#ifdef __GNUC__
static const size_t size = sizeof(__PRETTY_FUNCTION__);
static char typeName[size] = { };
memcpy(typeName, __PRETTY_FUNCTION__, size - 1u);
#else
static const size_t size = sizeof(__FUNCTION__) - FRONT_SIZE - BACK_SIZE;
static char typeName[size] =
{};
memcpy(typeName, __FUNCTION__ + FRONT_SIZE, size - 1u);
#endif //__GNUC__
return typeName;
}
};
} //namespace internal
template<typename T>
const char* GetTypeName(void)
{
return internal::GetTypeNameHelper<T>::GetTypeName();
}
Calling this from an own make function
template<typename Func_T, typename ... Args>
CExtended_Function<Args...> Make_Extended_Function(Func_T f)
{
std::function<void(Args...)> func(f);
const char* pFunc_Name = NCommonFunctions::GetTypeName<Func_T>();
CExtended_Function<Args...> res(f, func_name);
return res;
}
with
void Test_Func();
void foo()
{
Make_Extended_Function(Test_Func);
}
Gives me only the function signature.
... [with T = void (*)()]...
However I'd like to get the function name (in this case "Test_Func")
I thought about using makros but I'm not sure how to implement the Args... Part in Makros. Do you have an idea on how to solve this? I'd like to avoid using RTTI.
Functions aren't valid template arguments - your template argument here is the type of a pointer to the function, not the function itself - so this is completely impossible. There is also no portable way to get the name of a particular function at compile time either, at least at the moment (it's possible that this will be possible in the future through compile time reflection, but that's going to be C++2y (23?) at the earliest).
With Macro, you can do (I also use CTAD from C++17)
template<typename F>
auto Make_Extended_Function_Impl(F f, const std::string& name)
{
std::function func(f);
CExtended_Function res(f, name);
return res;
}
#define Make_Extended_Function(f) Make_Extended_Function(f, #f)

Change behavior based on template enum parameter

I've got a communication class which can handle 2 different protocols. The protocol is chosen by a enum template variable.
Now 1 of the 2 protocols only supports 2-byte values while the other supports 4-byte values.
Communicating is done via a template member function. How to change the static_assert so it takes 2 or 4 bytes depending on the class specialization (= chosen protocol)?
#include <iostream>
#include <math.h>
enum Protocol { P1, P2 };
template <Protocol P>
class Communicator {
public:
template <typename T>
void communicate(T arg) {
static_assert(sizeof(arg) <= sizeof(float), "argument size must be <= 4 bytes");
float value = (float)arg;
uint8_t length = sizeof(arg); //length in bytes
_communicate(value, length);
}
void _communicate(float f, uint8_t length) {
std::cout << f;
}
};
EDIT: I can just pick 1 answer as correct. And although I learned the most from Roy, I picked MM's answer because it keeps things as simple as possible. (upvoted both though)
There's several ways to approach this... here is one:
template<Protocol P>
size_t value_size();
template<> size_t value_size<P1>() { return 2; }
template<> size_t value_size<P2>() { return 4; }
// ... inside your other function
static_assert(sizeof(arg) <= value_size<P>(),
Here's a different approach
#include <iostream>
#include <math.h>
#include <cstdint>
// enum Protocol { P1, P2 }; // let's use types with traits instead.
struct P1
{
constexpr static const int protocol_id = 1;
//^^ or maybe use an enum
// type, this may need refactoring
// to fit your code and style.
using data_type = uint16_t; //< or whatever your 2-byte datatype is.
// can add more data traits and even static member functions here
// you could also add protocol specific non-static data if you add a
// P1 data member to your Communicator class.
// A struct with traits is a very good entry point for many compile-time
// polymorphism techniques.
};
struct P2
{
constexpr static const int protocol_id = 2;
using data_type = uint32_t; //< or whatever your 4-byte datatype is.
};
template <typename _Proto>
class Communicator {
public:
using data_type = typename _Proto::data_type;
constexpr static const int proto_id = typename _Proto::protocol_id;
public:
void communicate(data_type arg) // we can use the trait from either P1 or P2
{
float value = (float)arg;
uint8_t length = sizeof(data_type); //length in bytes
_communicate(value, length);
}
void _communicate(float f, uint8_t length)
{
std::cout << f;
}
};
Here's code to convert an enum (If that's what you already have to a class.
enum protocol_t { p1, p2 };
template <protocol_t _p> struct Protocol {};
// simply derive the template specialization from the desired type
template <> struct Protocol<p1> : P1 {};
// simply derive the template specialization from the desired type
template <> struct Protocol<p2> : P2 {};
You can also derive from P1, P2 to help organize code.
struct P1
{
// ... + specialized functions:
void _communicate(value_type x) { ... } // that you'll call from Communicator
void _communicate(const value_type* begin, const value_type* end) { ... }
};
struct P2 { /* same thing */ };
template <typename _Proto>
class Communicator : _Proto // < can control visibility here.
{ ... };

Getting compile-time constant offsetof of base class in multiple-inheritance

Look at this example:
struct s77 {
char d[77];
};
struct s1 {
char d;
};
struct Foo: s77, s1 {
};
struct Off {
static const int v = std::size_t(static_cast<s1*>(static_cast<Foo*>(nullptr)+1)) - std::size_t(static_cast<Foo*>(nullptr)+1);
};
This code tries to put the offset of s1 in Foo into Off::v. This code compiles with GCC/clang (without any warnings), but fails to compile with VS2015/VS2017 (error C2131: expression did not evaluate to a constant)
Which compiler is correct?
Can I achieve this functionality in a standard conformant way? If it is not possible, is it possible to create a working solution which works with VS2015/VS2017? I'm willing to accept any working solution, even which has undefined behavior according to the standard (but happens to work with VS2015 and VS2017). Off::v must be a compile time constant.
My original problem is this: I have an own implementation of tuple, which is implemented with multiple inheritance (like clang's tuple). I'd like to create a compile-time constant "descriptor" for the tuple, which contains all of its members' offset in the tuple. This descriptor contains a function pointer for each tuple member too. If I'd create this descriptor by hand, it would look like this (for example):
struct Entry {
int offset;
void (*function)(void *member);
};
Entry descriptor[] = {
{ 0, &SomeType1::static_function },
{ 12, &SomeType2::static_function },
{ 20, &SomeType3::static_function }
};
The intention of this is that I could have a general function (which is not a template), which can use this descriptor to call a type-specific function on each tuple member:
void call(void *tuple, const Entry *entries, int n) {
for (int i=0; i<n; i++) {
entries[i].function(static_cast<char *>(tuple)+entries[i].offset);
}
}
(The reason of this solution instead of a templated call function is that call is actually a huge function in my real code, and entry[i].function calls cannot be factored out from it. I'd like to avoid massive code duplication.)
How about something like:
struct Entry {
void* (*data_member_getter)(void*);
void (*function)(void *member);
};
namespace details
{
template <std::size_t I, typename Tuple>
constexpr void* voidPGetter(void* tuple)
{
return &std::get<I>(*reinterpret_cast<Tuple*>(tuple));
}
template <typename Tuple, std::size_t I>
constexpr MakeEntry()
{
using type = std::tuple_element_t<I, Tuple>;
return { &voidPGetter<I, Tuple>, &type::static_function };
}
template <typename Tuple, std::size_t ... Is>
constexpr std::array<Entry, sizeof...(Is)>
ComputeEntryHelper(std::index_sequence<Is...>)
{
return {{MakeEntry<Is, Tuple>()...}};
}
}
template <typename Tuple>
constexpt auto ComputeEntry()
{
constexpr auto size = std::tuple_size<Tuple>::value;
return details::ComputeEntryHelper(std::make_index_sequence<size>());
}
And then
void call(void* tuple, const Entry* entries, int n) {
for (int i = 0; i != n; ++i) {
entries[i].function(entries[i].data_member_getter(tuple));
}
}
So instead of offset, having a function to get the data.

Creating a semi-dynamically defined array

I have to create a class that uses an array. The array can have variable length that is defined (with an actual number) upon the object construction, meaning I know it's size at the compile time.
So, I tried to create a template where I could input the size and have the object be constructed with that size. This way I could still use arrays.
But this code doesn't compile.
use of class template requires template argument list
What am I doing wrong?
h:
#pragma once
class BinaryMemoryReader;
template <int bytesPerValue>
class ChunkBlockChannel
{
public:
std::map<unsigned int, unsigned char[bytesPerValue * 1024]> cbcLayer;
std::map<unsigned int, unsigned char[bytesPerValue]> jj;
void read(BinaryMemoryReader &reader);
ChunkBlockChannel();
~ChunkBlockChannel();
};
cpp:
#include "ChunkBlockChannel.h"
#include "BinaryMemoryReader.h"
template <int bytesPerValue>
void ChunkBlockChannel::read(BinaryMemoryReader &reader) {
for (int i = 0; i < 64; ++i) {
bool flag;
reader.read<bool>(flag);
std::vector<unsigned char> cbcLayerData;
std::vector<unsigned char> jjData;
if (!flag) {
unsigned char data[bytesPerValue * 1024];
reader.readBytes(data, bytesPerValue * 1024);
cbcLayer[i] = data;
} else {
unsigned char data[bytesPerValue];
reader.readBytes(data, bytesPerValue);
jj[i] = data;
}
}
}
template <int bytesPerValue>
ChunkBlockChannel::ChunkBlockChannel() {}
template <int bytesPerValue>
ChunkBlockChannel::~ChunkBlockChannel() {}
Maybe there is other but... you've forgotten the template argument in method definition.
I mean
template <int bytesPerValue>
void ChunkBlockChannel<bytesPerValue>::read(BinaryMemoryReader &reader) {
// remember this ^^^^^^^^^^^^^^^
and
template <int bytesPerValue>
ChunkBlockChannel<bytesPerValue>::ChunkBlockChannel() {}
// and this ^^^^^^^^^^^^^^^
and
template <int bytesPerValue>
ChunkBlockChannel<bytesPerValue>::~ChunkBlockChannel() {}
// and this ^^^^^^^^^^^^^^^