Memory error when capturing variable in CUDA extended lambda - c++

I am creating an extended (i.e. __device__) lambda in CUDA (see e.g. here) and it is supposed to capture a variable (here, a simple double value = 3;).
It compiles, but running it, I get an invalid memory access error and I don't understand why.
Changing the variable to static const double value = 3 fixes the problem, as it is no longer captured (though I don't understand how it is still available inside the lambda).
Question1: how can I correctly capture host variables in a CUDA extended lambda?
Question2: why is this code not working?
I tried this on Ubuntu 16, both with CUDA 8 and 10.
MWE Code
Compiled with nvcc mwe_lambda.cu -o mwe_lambda --std=c++11 -lineinfo -arch=sm_60 --expt-relaxed-constexpr --expt-extended-lambda
Note in particular the lambda, which should capture by copy.
The managed_allocator etc. are just in order to use managed memory and print the CUDA error.
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#include <iostream>
#include <string>
static void CudaHandleError( cudaError_t err, const char *file, int line, const std::string & function)
{
if (err != cudaSuccess)
{
std::cerr << std::string(cudaGetErrorString( err )) << " " << file << " " << line << " " << function << std::endl;
}
}
#define CU_HANDLE_ERROR( err ) (CudaHandleError( err, __FILE__, __LINE__, __func__ ))
#define CU_CHECK_ERROR( ) (CudaHandleError( cudaGetLastError(), __FILE__, __LINE__, __func__ ))
#define CU_CHECK_AND_SYNC( ) CU_CHECK_ERROR(); CU_HANDLE_ERROR( cudaDeviceSynchronize() )
template<class T>
class managed_allocator : public std::allocator<T>
{
public:
using value_type = T;
template<typename _Tp1>
struct rebind
{
typedef managed_allocator<_Tp1> other;
};
value_type* allocate(size_t n)
{
value_type* result = nullptr;
CU_HANDLE_ERROR( cudaMallocManaged(&result, n*sizeof(value_type)) );
return result;
}
void deallocate(value_type* ptr, size_t)
{
CU_HANDLE_ERROR( cudaFree(ptr) );
}
managed_allocator() throw(): std::allocator<T>() { } //fprintf(stderr, "Hello managed allocator!\n"); }
managed_allocator(const managed_allocator &a) throw(): std::allocator<T>(a) { }
template <class U>
managed_allocator(const managed_allocator<U> &a) throw(): std::allocator<T>(a) { }
~managed_allocator() throw() { }
};
template<typename T>
using field = std::vector<T, managed_allocator<T>>;
// vf[i] = f()
template<typename A, typename F>
__global__ void cu_set_lambda(A * vf, const F & f, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < N)
{
vf[idx] = f();
}
}
int main()
{
std::cerr << "started" << std::endl;
{
field<double> vf(10, 0);
double value = 3;
auto lambda = [=] __device__ ()
{
return value;
};
auto n = vf.size();
cu_set_lambda<<<(n+1023)/1024, 1024>>>(vf.data(), lambda, n);
CU_CHECK_AND_SYNC();
std::cerr << vf[0] << " " << vf[1] << std::endl;
}
std::cerr << "finished" << std::endl;
}

You need to pass the lambda by value, as the variables captured by value in the lambda will not be available in device when you pass the lambda by reference.
__global__ void cu_set_lambda(A * vf, const F f, int N)
^^^^^^^
If you pass the lambda by value, the object (and its internals) will be copied to the kernel.

Related

How to make template function work for char[32] and string type?

i want to design a template function, which build a shared memory with size = sizeof(T) * n
it returns template type pointer. and i pass a default value as default value.
function def looks like:
#ifndef SHMHELP_HPP_
#define SHMHELP_HPP_
#include <bits/stdc++.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
namespace cppbox {
namespace shm {
enum EmShmOpenMode:int {
MODE_CREATE,
MODE_RD,
};
template<typename T> //, T v>
T* func(const char* filename, size_t n, int rdflag, T v) { // mode: 0666
int offlag = rdflag == EmShmOpenMode::MODE_CREATE ? O_CREAT | O_EXCL | O_RDWR : offlag = O_RDWR;
int shm_fd = shm_open(filename, offlag, 0666);
if (-1 == shm_fd) {
if (rdflag != EmShmOpenMode::MODE_CREATE) {
std::cerr << "shm_open open failed: " << strerror(errno) << std::endl;
return nullptr;
}
offlag = O_RDWR| O_TRUNC;
if (-1 == (shm_fd = shm_open(filename, offlag, 0666))) {
std::cerr << "shm_open create failed: " << strerror(errno) << std::endl;
return nullptr;
}
}
if (rdflag == EmShmOpenMode::MODE_CREATE) {
if (ftruncate(shm_fd, n*sizeof(T))) {
std::cerr << "ftruncate failed: " << strerror(errno) << std::endl;
close(shm_fd);
return nullptr;
}
}
T* ret = (T*)mmap(0, n*sizeof(T), PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
close(shm_fd);
if (ret == MAP_FAILED) {
std::cerr << "mmap failed: " << strerror(errno) << std::endl;
return nullptr;
}
if (rdflag == EmShmOpenMode::MODE_CREATE) std::fill((T*)ret, ((T*)ret) + n, v);
return ret;
}
}
};
#endif // SHMHELP_HPP_
it's ok when i call func<int>("a", 100, 0, 0) or func<double>("a", 100, 0, 0.)
but it crashed when i call func<std::string>("a", 100, 0, "")
int main() {
std::string*p = cppbox::shm::MapShm<std::string>("b", 100, cppbox::shm::MODE_CREATE, "huang");
for (int i = 0; i < 100; ++i) {
cout << (*p)[i] << " ";
}
}
and complier will reject to complie when i call func<char[32]>("a", 100, 0, "") like this:
int main() {
char[32]*p = cppbox::shm::MapShm<char[32]>("b", 100, cppbox::shm::MODE_CREATE, "huang"); // compiler will reject in this line
for (int i = 0; i < 100; ++i) {
cout << (*p)[i] << " ";
}//*/
}
how can i make func<char[32]>("a",100, 0, "") and func<std::string>("a", 100, 0, "") work?
Change
if (rdflag == EmShmOpenMode::MODE_CREATE) std::fill((T*)ret, ((T*)ret) + n, v);
to
if (rdflag == EmShmOpenMode::MODE_CREATE) std::uninitialized_fill((T*)ret, ((T*)ret) + n, v);
That should help with the std::string case.
std::fill can only be used on memory that already contains objects. In your case you have uninitialised (raw) memory containing no constructed objects, so std::uninitialized_fill should be used instead.
First of all it is not a safe practice to allocate memory inside a function and return its pointer. Because one may forget to free the memory!
Second since there is no way to assign a default value for arrays and the desired default value is somewhat empty anyway, you could do it by having 2 function overloads, one for assigning a default value and another for just allocating memory like:
#include <stdio.h>
#include <type_traits>
//general purpose type that can be constructed from any type!
class all{
public:
template<typename T>
all(T){}
};
//First overload
template <typename T>
T* pre_func(size_t n, T defaultvalue) {
T *pointer = (T*) calloc(n, sizeof(T));
for(size_t counter = n; counter--;){
pointer[counter] = defaultvalue;
}
return pointer;
}
//second overload
template <typename T>
T* pre_func(size_t n) {
T *pointer = (T*) calloc(n, sizeof(T));
return pointer;
}
//overload selector
template <typename T>
T* func(size_t n, typename std::conditional<std::is_array<T>::value, all, T>::type defaultvalue){
if constexpr(std::is_array<T>::value){
return pre_func<T>(n);
}else{
return pre_func<T>(n, defaultvalue);
}
}
then in your program call the function overloads any way you want for normal types and array types like:
func<int>(100, 0);
func<char[32]>(100, "");

Problem with cudaMalloc and cudaMemcpy in seperate template function [duplicate]

This question already has an answer here:
Cuda allocation and return array from gpu to cpu
(1 answer)
Closed 2 years ago.
I am working on the basic CUDA program that only calculates square and cube. But I do not want to write all code in main thus I have separated into the functions some of them are template. No special purpose to create a template function. Only, I want to try it. The problem is related to if I call the function as naked such as cudaMalloc it is okay. If I call with my function, it fails. Let me show;
kernel.cuh
#ifndef KERNEL_CUH_
#define KERNEL_CUH_
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <exception>
#include <iostream>
struct GPUVars
{
private:
size_t block_sz;
size_t thread_sz;
public:
GPUVars(size_t block, size_t thread) : block_sz{ block }, thread_sz{ thread } {};
size_t GetBlockSize()const { return block_sz; };
size_t GetThreadSize()const { return thread_sz; }
};
inline bool check_device()
{
auto cuda_device_count{ 0 };
cudaGetDeviceCount(&cuda_device_count);
return cuda_device_count > 0;
}
template <typename T>
void AllocateMem(T* arr, size_t SIZE_BYTE)
{
if (cudaMalloc(&arr, SIZE_BYTE) != cudaSuccess)
{
throw std::bad_alloc();
}
}
template <typename T>
void CopyMemToDevice(const T* host_arr, T* device_arr, size_t SIZE_BYTE)
{
if (cudaMemcpy(device_arr, host_arr, SIZE_BYTE, cudaMemcpyHostToDevice) != cudaSuccess)
{
throw std::bad_alloc();
}
}
#endif
main.cpp
#include <iostream>
#include <random>
#include <iomanip>
#include <cassert>
#include "timer.h"
#include "cpu_calc.h"
#include "kernel.cuh"
template <typename T>
void RandNumberGen(T lower, T upper, T* arr, size_t SIZE_ARR)
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<> dis(lower, upper);
for (size_t i = 0; i < SIZE_ARR; ++i)
{
arr[i] = dis(gen);
}
}
int main()
{
assert(check_device() == true);
constexpr size_t SIZE_ARR{ 1024 };
double input_arr[SIZE_ARR]{ 0 };
RandNumberGen(1.0, 10000.0, input_arr, SIZE_ARR);
constexpr size_t SIZE_BYTE = SIZE_ARR * sizeof(double);
std::cout << std::setprecision(9) << std::fixed;
double cpu_output[SIZE_ARR]{ 0 };
// SQUARE
auto time = CPUTimer(&cpu_output[0], &input_arr[0], SIZE_ARR, &CPUSquare);
std::cout << "CPU square opeartion with " << SIZE_ARR << " size array takes " << std::setw(18) << time << " ns\n";
GPUVars gpu_vars{ 0, 1024 };
double* pgpu_input = nullptr;
double gpu_output[SIZE_ARR];
double* pgpu_output = nullptr;
AllocateMem(pgpu_input, SIZE_BYTE);
AllocateMem(pgpu_output, SIZE_BYTE);
CopyMemToDevice(input_arr, pgpu_input, SIZE_BYTE);
}
When I call CopyMemToDevice function, it throws an error due to cudaMemCpy function return that equal to cudaErrorInvalidValue.
Also, if I change CopyMemToDevice function to this still same;
template <typename T>
void CopyMemToDevice(const T* host_arr, T* device_arr, size_t SIZE_BYTE)
{
AllocateMem(device_arr, SIZE_BYTE);
if (cudaMemcpy(device_arr, host_arr, SIZE_BYTE, cudaMemcpyHostToDevice) != cudaSuccess) // return 1 which is equal to cudaErrorInvalidValue
{
throw std::bad_alloc();
}
}
When I write this function as below, it works perfectly;
template <typename T>
void CopyMemToDevice(const T* host_arr, T* device_arr, size_t SIZE_BYTE)
{
cudaMalloc(&device_arr, SIZE_BYTE);
if (cudaMemcpy(device_arr, host_arr, SIZE_BYTE, cudaMemcpyHostToDevice) != cudaSuccess)
{
throw std::bad_alloc();
}
}
Also, I know that AllocateMem function works, cudaMalloc return 0 which is cudaSuccess.
My question is what is the difference between calling a cudaMalloc and cudaMemcpy in the same function and different function? Why it gives cudaErrorInvalidValue : This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values. error when I call in the separated function? Thanks in advance.
I am using Visual Studio 2019 16.7.1 and CUDA 10.1
As Igor Tandetnik mentioned in the comment. The problem is only related to pass by value. I updated AllocateMem function as like that;
template <typename T>
void AllocateMem(T** arr, size_t SIZE_BYTE)
{
if (cudaMalloc(arr, SIZE_BYTE); != cudaSuccess)
{
throw std::bad_alloc();
}
}
And call like this,
AllocateMem(&pgpu_output, SIZE_BYTE);
It works.

Variadic templates and variadic functions combination

I'm creating a logging function that on the one hand needs to be able to parse any number of parameters, but on the other hand will always be passed with __func__ and this(of the calling object).
If I use only variadic templates like this
template<typename... Args>
void
log_device_message_template(const class *object,
char const *func,
char const *const format,
Args const &... args)noexcept
{
log(format, to_str(object).c_str(),format, to_str(args).c_str()...);
}
I have to call log_device_message_template like this :
log_device_message_template(this,__func__,some format,some parameters)
so i've added the macro:
#define log_api_call(format, ...) \
log_device_message_template( \
this, __func__, "%s::%s(" format ")", ##__VA_ARGS__)
The thing is that i'm getting a seg fault, probably due to a bad formatting somewhere. adding __attribiute__(format) doesn't work due to the use of variadic template...
here is the error from the python test testing the logger:
lookup in file=***** [0]
28371: symbol=_ZN5***6logger27log_device_message_templateIINS_14*****E13****EEEvNS_21logger_component_eENS_17logger_level_eEPKcPKNS_9objectES7_DpRT_; lookup in file=**** [0]
28371: ****: error: symbol lookup error: undefined symbol: _ZN5***6logger27log_device_message_templateIINS_14****E13****EEEvNS_21logger_component_eENS_17logger_level_eEPKcPKNS_9objectES7_DpRT_ (fatal)
Here's another way to approach it which neatly avoids macros.
Note in this case I am emitting log records to stdout, but that can easily be altered. I am also using std::ostringstream to build the log message. It's not renowned for performance, so is a good candidate for a customisation point.
However, this should get you started if you like the approach:
#include <iostream>
#include <sstream>
#include <tuple>
#include <utility>
// define a constant index type
template<std::size_t N> using index_c = std::integral_constant<std::size_t, N>;
// emitting an item at index 0 has no prefix
template<class T>
void emit_item(std::ostream &os, index_c<0>, T const &t)
{
os << t;
}
// emitting an item at index N (!= 0) has a comma prefix
template<std::size_t N, class T>
void emit_item(std::ostream &os, index_c<N>, T const &t)
{
os << ", " << t;
}
// emit args 0 .. N-1
template<class Tuple, std::size_t...Is>
void emit_arglist(std::ostream &sink, Tuple &&tuple, std::index_sequence<Is...>)
{
using expand = int[];
void(expand{0,
(emit_item(sink, index_c<Is>(), std::get<Is>(tuple)), 0)...
});
};
// emitting a 'more' at index 0 has a prefix
template<class T>
void emit_more(std::ostream &os, index_c<0>, T const &t)
{
os << " : " << t;
}
// emitting a 'more' at index N (!= 0) has a space prefix
template<std::size_t N, class T>
void emit_more(std::ostream &os, index_c<N>, T const &t)
{
os << " " << t;
}
template<class Tuple, std::size_t...Is>
void emit_more(std::ostream &sink, Tuple &&tuple, std::index_sequence<Is...>)
{
using expand = int[];
void(expand{0,
(emit_more(sink, index_c<Is>(), std::get<Is>(tuple)), 0)...
});
};
template<typename... Args, typename ...MoreStuff>
std::string
make_log_string(const char *object,
char const *func,
std::tuple<Args const &...> const& args,
MoreStuff&&...morestuff) noexcept
{
std::ostringstream ss;
ss << object << "::" << func << '(';
emit_arglist(ss, args, std::make_index_sequence<sizeof...(Args)>());
ss << ')';
emit_more(ss, std::tie(morestuff...), std::make_index_sequence<sizeof...(MoreStuff)>());
return ss.str();
}
// syntactic sugar for indicating arguments
template<class...Arg>
decltype(auto) args(Arg const&...args)
{
return std::tie(args...);
}
int main()
{
int a = 0, b = 1, c = 2;
std::string sa = "xxx", sb = "yyy", sc = "zzz";
const char* Class = "foo";
const char* Func = "var";
std::cout << make_log_string(Class, Func, args(a, b, c)) << std::endl;
std::cout << make_log_string(Class, Func, args(sa, b, sc)) << std::endl;
std::cout << make_log_string(Class, Func, args(sa, b, sc), "more stuff") << std::endl;
std::cout << make_log_string(Class, Func, args(), "empty", "argument", "list") << std::endl;
}
expected output:
foo::var(0, 1, 2)
foo::var(xxx, 1, zzz)
foo::var(xxx, 1, zzz) : more stuff
foo::var() : empty argument list
And with a bit more boilerplate we can write this:
std::cout << make_log_string(method(Class, Func)(a, b, c)) << std::endl;
std::cout << make_log_string(method(Class, Func)(sa, b, sc)) << std::endl;
std::cout << make_log_string(method(Class, Func)(sa, b, sc), "more stuff") << std::endl;
std::cout << make_log_string(method(Class, Func)(), "empty", "argument", "list") << std::endl;
Here it is:
#include <iostream>
#include <sstream>
#include <tuple>
#include <utility>
template<std::size_t N> using index_c = std::integral_constant<std::size_t, N>;
template<class T>
void emit_item(std::ostream &os, index_c<0>, T const &t)
{
os << t;
}
template<std::size_t N, class T>
void emit_item(std::ostream &os, index_c<N>, T const &t)
{
os << ", " << t;
}
template<class Tuple, std::size_t...Is>
void emit_arglist(std::ostream &sink, Tuple &&tuple, std::index_sequence<Is...>)
{
using expand = int[];
void(expand{0,
(emit_item(sink, index_c<Is>(), std::get<Is>(tuple)), 0)...
});
};
template<class T>
void emit_more(std::ostream &os, index_c<0>, T const &t)
{
os << " : " << t;
}
template<std::size_t N, class T>
void emit_more(std::ostream &os, index_c<N>, T const &t)
{
os << " " << t;
}
template<class Tuple, std::size_t...Is>
void emit_more(std::ostream &sink, Tuple &&tuple, std::index_sequence<Is...>)
{
using expand = int[];
void(expand{0,
(emit_more(sink, index_c<Is>(), std::get<Is>(tuple)), 0)...
});
};
template<class...Args>
struct method_with_args;
struct method
{
constexpr method(const char* c, const char* f) : klass(c), func(f) {}
const char* klass;
const char* func;
template<class...Args>
auto operator()(Args const&...args) -> method_with_args<Args...>;
friend std::ostream& operator<<(std::ostream& os, const method& m)
{
return os << m.klass << "::" << m.func;
}
};
template<class...Args>
struct method_with_args
{
friend std::ostream& operator<<(std::ostream& os, method_with_args const& ma)
{
os << ma.m << '(';
emit_arglist(os, ma.args, std::make_index_sequence<sizeof...(Args)>());
return os << ')';
}
method m;
std::tuple<Args const&...> args;
};
template<class...Args>
auto method::operator()(Args const&...args) -> method_with_args<Args...>
{
return method_with_args<Args...>{*this, std::tie(args...)};
}
struct function
{
const char* name;
};
template<typename Method, typename ...MoreStuff>
std::string
make_log_string(Method m,
MoreStuff &&...morestuff) noexcept
{
std::ostringstream ss;
ss << m;
emit_more(ss, std::tie(morestuff...), std::make_index_sequence<sizeof...(MoreStuff)>());
return ss.str();
}
int main()
{
int a = 0, b = 1, c = 2;
std::string sa = "xxx", sb = "yyy", sc = "zzz";
const char *Class = "foo";
const char *Func = "var";
std::cout << make_log_string(method(Class, Func)(a, b, c)) << std::endl;
std::cout << make_log_string(method(Class, Func)(sa, b, sc)) << std::endl;
std::cout << make_log_string(method(Class, Func)(sa, b, sc), "more stuff") << std::endl;
std::cout << make_log_string(method(Class, Func)(), "empty", "argument", "list") << std::endl;
}

How to handle multi-type variables with templates?

I was trying to make a function that assigns y to x regardless whether x, y are int or std::string. I wrote this code:
#include <iostream>
#include <string>
#include <typeinfo>
template <typename T>
T& assign(T& x, T& y){
if ( typeid(x).name() == "Ss" && typeid(y).name() == "Ss" ){
std::string k = static_cast<std::string>(y);
x = k;
return x;
}
else if ( typeid(x).name() == "i" && typeid(y).name() == "i" ){
int k = static_cast<int>(y);
x = k;
return x;
}
else{
std::cout << "uncorrect assignment" << std::endl;
}
}
int main(){
std::string a = "empty_string";
std::string b = "Hi there";
assign(a, b);
std::cout << a << std::endl;
}
But it doesn’t work.
It gives the error:
[Error] invalid static_cast from type ‘std::basic_string<char>’ to type
at line 14:
int k = static_cast<int>(y);
I can’t understand, what is the problem?
I know the objection: I might have just defined function assign as:
template <typename T>
T& assign(T& x, T& y){
x = y;
}
which works. However, I was working on an other more complex function on which I have to (or at least I haven’t found any way other than) use static_cast.
So, if you could, please, explain to me what is the mistake in this example, I may try to fix the function I am working on.
Thank you very much,
Simone.
To do what do you want, you need C++17 and if constexpr. And the use of something that works compile-time, not of typeid that works runtime.
The problem is that with your code, typeid permit, runtime, to choose the if or the else part of your code, but the compiler must compile both part. So must compile
int k = static_cast<int>(y);
x = k;
when T is std::string. This give an error.
You need a type-traits (std::is_same, by example), that is evaluated compile-time, and a construct that avoid the compilation of the wrong part. This construct is if constexpr ( <test> ) (where the <test> is valuable compile time) but, unfortunately, is available only from C++17.
So, in C++17 you can write
template <typename T>
void assign (T & x, T const & y)
{
if constexpr ( std::is_same<T, std::string>::value ) {
std::string k = static_cast<std::string>(y);
x = k;
}
else if constexpr ( std::is_same<T, int>::value ) {
int k = static_cast<int>(y);
x = k;
}
else {
std::cout << "uncorrect assignment" << std::endl;
}
}
but, pre C++17, you have to follows different ways.
To handle different types separately inside a function, an option is to define a local struct with overloaded function call operators to different types:
#include <iostream>
#include <string>
template<typename T>
T& assign(T& x, const T& y) {
struct {
void operator()(std::string& lhs, const std::string& rhs) {
std::cout << "Type is std::string." << std::endl;
lhs = rhs;
}
void operator()(int& lhs, const int& rhs) {
std::cout << "Type is int." << std::endl;
lhs = rhs;
}
} assign_impl;
assign_impl(x, y);
return x;
}
int main() {
/* Test No. 1 */ {
std::string dest, src = "Foo";
std::cout << "Test result: " << assign(dest, src) << std::endl;
}
/* Test No. 2 */ {
int dest, src = 32;
std::cout << "Test result: " << assign(dest, src) << std::endl;
}
}
The code above will work on C++98 and above but its disadvantage is that it will raise compiler errors if you try to use it with unhandled types.

C++ Return data from a Variant according to the given typename

I have following function template to return specific type of data from a VARIANT, according to the given typename.
template <typename T>
T VariantGetValue(VARIANT Variant) {
std::string S(typeid(T).name());
if (S == "wchar_t* __ptr64") { return Variant.bstrVal; }
if (S == "unsigned int") { return Variant.uintVal; }
}
So, as I needed to return an unsigned int type from a VARIANT, I tried using above function like:
return VariantGetValue<unsigned int>(CV);
But, unfortunately compiler seems to ignore if (S == "....) case here and gives me error:
C2440 - 'return': cannot convert from 'BSTR' to 'unsigned int'
But, if I remove the line if (S == "wchar_t* __ptr64") { return Variant.bstrVal; }, compiler only gives me following warning:
C4715 - 'VariantGetValue': not all control paths return a value
Can I suppress this error and continue? Is it safe or are there any alternate ways to do this without compiler errors?
You cannot have multiple return types based on a branch the code is going to take at runtime. Your best bet here is to work with explicit specializations.
template < typename T >
T VariantGetValue(VARIANT) = delete;
template <>
unsigned int VariantGetValue<unsigned int>(VARIANT Variant)
{
VARIANT var;
InitVariantFromUInt32(unsigned int{}, &var);
if (Variant.vt != var.vt)
throw std::runtime_error("bad get");
return Variant.uintVal;
}
template <>
BSTR VariantGetValue<BSTR>(VARIANT Variant)
{
if (/* check that Variant stores wchar_t* __ptr64 */)
throw std::runtime_error("bad get");
return Variant.bstrVal;
}
This, by the way, is what std::get does for std::variant.
#include <iostream>
#include <variant>
using Variant = std::variant<int,std::string>;
int main()
{
Variant v(13);
std::cout << std::get<int>(v) << '\n'; // 13
//std::cout << std::get<std::string>(v) << '\n'; // std::bad_variant_access
}
I have implemented a full example to perhaps clarify some questions raised in the comments.
#include <iostream>
#include <stdlib.h>
#include <string.h>
// Implement a mock VARIANT, don't take this code too seriously
typedef unsigned int VARTYPE;
typedef char* BSTR;
enum { VT_UI4, VT_BSTR };
struct VARIANT
{
VARIANT() : bstrVal(nullptr) {}
VARTYPE vt;
union {
unsigned int uintVal;
BSTR bstrVal;
};
};
void InitVariantFromUInt32(unsigned int u, VARIANT * v)
{
v->vt = VT_UI4;
v->uintVal = u;
}
void InitVariantFromString(char const * s, VARIANT * v)
{
v->vt = VT_BSTR;
delete[] v->bstrVal;
v->bstrVal = new char[strlen(s)];
strcpy(v->bstrVal, s);
}
// VARIANT get value functions
template < typename T >
T VariantGetValue(VARIANT) = delete;
template <>
unsigned int VariantGetValue<unsigned int>(VARIANT Variant)
{
if (Variant.vt != VT_UI4)
throw std::runtime_error("bad get");
return Variant.uintVal;
}
template <>
BSTR VariantGetValue<BSTR>(VARIANT Variant)
{
if (Variant.vt != VT_BSTR)
throw std::runtime_error("bad get");
return Variant.bstrVal;
}
int main()
{
VARIANT v;
InitVariantFromUInt32(14, &v);
std::cout << VariantGetValue<unsigned int>(v) << '\n';
try {
std::cout << VariantGetValue<BSTR>(v) << '\n';
} catch (std::exception const& e) {
std::cout << "Get failed!" << '\n';
}
VARIANT w;
InitVariantFromString("Hello World!", &w);
std::cout << VariantGetValue<BSTR>(w) << '\n';
//std::cout << VariantGetValue<bool>(w) << '\n'; // error: call to deleted function 'VariantGetValue'
}