Pointer Exception while getting RGB values from (video) frame Intel Realsense - c++

I'm trying to get the different RGB values from a frame with the Realsense SDK. This is for a 3D depth camera with RGB. According to https://github.com/IntelRealSense/librealsense/issues/3364 I need to use
int i = 100, j = 100; // fetch pixel 100,100
rs2::frame rgb = ...
auto ptr = (uint8_t*)rgb.get_data();
auto stride = rgb.as<rs2::video_frame>().stride();
cout << "R=" << ptr[3*(i * stride + j)];
cout << ", G=" << ptr[3*(i * stride + j) + 1];
cout << ", B=" << ptr[3*(i * stride + j) + 2];
In my code I'm getting a pointer exception if I want to get the values for pixel (x,y)=1000,1000. With (x,y)=100,100 it works every time... Error: Exception thrown: read access violation. ptr was 0x11103131EB9192A.
I set the enable_stream to cfg.enable_stream(RS2_STREAM_COLOR, WIDTH_COLOR_FRAME, HEIGTH_COLOR_FRAME, RS2_FORMAT_RGB8, 15); where in the .h file are:
#define WIDTH_COLOR_FRAME 1920
#define HEIGTH_COLOR_FRAME 1080
This is my code. Maybe it has something to do with the RS2_FORMAT_RGB8?
frameset frames = pl.wait_for_frames();
frame color = frames.get_color_frame();
uint8_t* ptr = (uint8_t*)color.get_data();
int stride = color.as<video_frame>().get_stride_in_bytes();
int i = 1000, j = 1000; // fetch pixel 100,100
cout << "R=" << int(ptr[3 * (i * stride + j)]);
cout << ", G=" << int(ptr[3 * (i * stride + j) + 1]);
cout << ", B=" << int(ptr[3 * (i * stride + j) + 2]);
cout << endl;
Thanks in advance!

stride is in bytes (length of row in bytes), multiplication with 3 is not required.
cout << " R= " << int(ptr[i * stride + (3*j) ]);
cout << ", G= " << int(ptr[i * stride + (3*j) + 1]);
cout << ", B= " << int(ptr[i * stride + (3*j) + 2]);

I had the same problem and even with the last answers I still got segfaults.
I found out that when you do
uint8_t *ptr = color.get_data()
the realsense sdk won't increase/track some internal reference and the pointer went invalid after some time, causing the segfaults.
my Fix is copy the content to a local buffer.
malloc new buffer with RGB size.
right after get_data() copy data to the new buffer.
that fixed all my issues.
all the best.

Related

use .resize() withour loosingthe elements

I want to make a dynamic matrix and assign a values specifically to its index so I have to state the size of the matrix before calling the indices, please see the following code
void Visual_Servoing::Pose_callback(const geometry_msgs::PoseStamped::ConstPtr &msg)
{
Rotations.resize(index + 3,3);
Translations.resize(3, iterator + 1);
std::cout << " #### I'm in Pose_Callback #### " << std::endl;
this->hole_detection(frame);
this->generate_line();
Translation << msg->pose.position.x ,msg->pose.position.y ,msg->pose.position.z;
Translations.col(iterator) = Translation;
std::cout << "iterator =" << iterator << std::endl;
QuatX = msg->pose.orientation.x;
QuatY = msg->pose.orientation.y;
QuatZ = msg->pose.orientation.z;
QuatW = msg->pose.orientation.w;
Rotation << (1 - (2*pow(QuatY,2)) - (2*pow(QuatZ,2))), (2*QuatX*QuatY + 2*QuatW*QuatZ) , (2*QuatX*QuatZ - 2*QuatW*QuatY),
(2*QuatX*QuatY - 2*QuatW*QuatZ) , (1 - (2*pow(QuatX,2)) - (2*pow(QuatZ,2))) , (2*QuatY*QuatZ + 2*QuatW*QuatX),
(2*QuatX*QuatZ + 2*QuatW*QuatY) , (2*QuatY*QuatZ - 2*QuatW*QuatX) , (1 - (2*pow(QuatX,2)) - (2*pow(QuatY,2)));
Rotations.block(index, 0, 3, 3) = Rotation;
std::cout << Translation << std::endl;
std::cout << Translations << std::endl;
std::cout << Rotation << std::endl;
std::cout << Rotations << std::endl;
if (iterator>10)
{
Eigen::MatrixXf VectorsCam(3, theta1.size()); // Matrix contains the vectors in camera frame
VecsinInertial.resize(3, theta1.size());
CirclePosinFrame();
for (int i=0; i<theta1.size(); i++)
{
VecsinInertial.col(i) = Translations.col(i) + Rotations.block(index, 0, 3, 3) * VectorsCam.col(i); // Each Column represents a vector in inertial frame
}
VectorCam = Eigen::MatrixXf::Zero(3, 1);
VectorsCam = Eigen::MatrixXf::Zero(3, theta1.size());
theta1.clear();
theta2.clear();
iterator = 0;
}
iterator = iterator + 1;
index = index + 3;
}
How can I update Translations and Rotations matrices sizes without loosing the already existing values in the matrices ? I already assigned Translations and Rotations in the header file as:
Eigen::MatrixXf Translations; // Matrix of Camera Translations Vectors
Eigen::MatrixXf Rotations;

Efficient zero padding using cudaMemcpy3D

I would like to transfer a 3d array stored in linear memory on the host, into a larger (3D) array on the device. As an example (see below), I tried to transfer a (3x3x3) array into a (5x5x3) array.
I expect that on the host I get 2D slices with the following pattern:
x x x 0 0
x x x 0 0
x x x 0 0
0 0 0 0 0
0 0 0 0 0
where x are the values of my array. However, I get something like that, where y are the values of the next 2D slice:
x x x 0 0
x x x 0 0
x x x 0 0
y y y 0 0
y y y 0 0
According to the cudaMemcpy3D documentation I would have expect that the extent parameter would take into account the padding in the vertical axis but apparently not.
Am I mistaken in the understanding of the documentation? If yes, is there any other way to perform this operation? The final size of the array to transfer will be 60x60x900 into an array of size 1100x1500x900. I use the zero padding to prepare a Fourier transform.
Here is the simplified code that I used:
cudaError_t cuda_status;
cudaPitchedPtr d_ptr;
cudaExtent d_extent = make_cudaExtent(sizeof(int)*5,sizeof(int)*5,sizeof(int)*3);
cudaExtent h_extent = make_cudaExtent(sizeof(int)*3,sizeof(int)*3,sizeof(int)*3);
int* h_array = (int*) malloc(27*sizeof(int));
int* h_result = (int*) malloc(512*sizeof(int)*5*3);
for (int i = 0; i<27; i++)
{
h_array[i] = i;
}
cuda_status = cudaMalloc3D(&d_ptr, d_extent);
cout << cudaGetErrorString(cuda_status) << endl;
cudaMemcpy3DParms myParms = {0};
myParms.extent = h_extent;
myParms.srcPtr.ptr = h_array;
myParms.srcPtr.pitch = 3*sizeof(int);
myParms.srcPtr.xsize = 3*sizeof(int);
myParms.srcPtr.ysize = 3*sizeof(int);
myParms.dstPtr = d_ptr;
myParms.kind = cudaMemcpyHostToDevice;
cuda_status = cudaMemcpy3D(&myParms);
cout << cudaGetErrorString(cuda_status) << endl;
cout << "Pitch: " << d_ptr.pitch << " / xsize:" << d_ptr.xsize << " / ysize:" << d_ptr.ysize << endl; // returns Pitch: 512 / xsize:20 / ysize:20 which is as expected
// Copy array to host to be able to print the values - may not be necessary
cout << cudaMemcpy(h_result, (int*) d_ptr.ptr, 512*5*3, cudaMemcpyDeviceToHost) << endl;
cout << h_result[128] << " " << h_result[3*128] << " " << h_result[5*128] << " " << endl; // output : 3 9 15 / expected 3 0 9
The problems here have to do with your extents and sizes.
When an extent is used with cudaMemcpy3D for the non-cudaArray case, it is intended to provide the size of the region in bytes. A way to think about this is that product of the 3 dimensions of the extent should yield the size of the region in bytes.
What you're doing however is scaling each of the 3 dimensions by the element size, which is not correct:
cudaExtent h_extent = make_cudaExtent(sizeof(int)*3,sizeof(int)*3,sizeof(int)*3);
^^^^^^^^^^^
this is the only element scaling expected
You've made a similar error here:
myParms.srcPtr.xsize = 3*sizeof(int); // correct
myParms.srcPtr.ysize = 3*sizeof(int); // incorrect
We only scale the x (width) dimension by the element size, we don't scale the y (height) or z (depth) dimensions.
I haven't fully verified your code, but with those 2 changes, your code produces the output you indicate is expected:
$ cat t1593.cu
#include <iostream>
using namespace std;
int main(){
cudaError_t cuda_status;
cudaPitchedPtr d_ptr;
cudaExtent d_extent = make_cudaExtent(sizeof(int)*5,5,3);
cudaExtent h_extent = make_cudaExtent(sizeof(int)*3,3,3);
int* h_array = (int*) malloc(27*sizeof(int));
int* h_result = (int*) malloc(512*sizeof(int)*5*3);
for (int i = 0; i<27; i++)
{
h_array[i] = i;
}
cuda_status = cudaMalloc3D(&d_ptr, d_extent);
cout << cudaGetErrorString(cuda_status) << endl;
cudaMemcpy3DParms myParms = {0};
myParms.extent = h_extent;
myParms.srcPtr.ptr = h_array;
myParms.srcPtr.pitch = 3*sizeof(int);
myParms.srcPtr.xsize = 3*sizeof(int);
myParms.srcPtr.ysize = 3;
myParms.dstPtr = d_ptr;
myParms.kind = cudaMemcpyHostToDevice;
cuda_status = cudaMemcpy3D(&myParms);
cout << cudaGetErrorString(cuda_status) << endl;
cout << "Pitch: " << d_ptr.pitch << " / xsize:" << d_ptr.xsize << " / ysize:" << d_ptr.ysize << endl; // returns Pitch: 512 / xsize:20 / ysize:20 wich is as expected
// Copy array to host to be able to print the values - may not be necessary
cout << cudaMemcpy(h_result, (int*) d_ptr.ptr, d_ptr.pitch*5*3, cudaMemcpyDeviceToHost) << endl;
cout << h_result[128] << " " << h_result[3*128] << " " << h_result[5*128] << " " << endl; // output : 3 9 15 / expected 3 0 9
}
$ nvcc -o t1593 t1593.cu
$ cuda-memcheck ./t1593
========= CUDA-MEMCHECK
no error
no error
Pitch: 512 / xsize:20 / ysize:5
0
3 0 9
========= ERROR SUMMARY: 0 errors
$
I should also point out that the strided memcpy operations in CUDA (e.g. cudaMemcpy2D, cudaMemcpy3D) are not necessarily the fastest way to conduct such a transfer. You can find writeups of this characteristic in various questions about cudaMemcpy2D here on SO cuda tag.
The net of it is that it may be faster to transfer the data to the device in an unstrided, unpadded linear transfer, then write a CUDA kernel to take the data that is now on the device, and place it in the array of interest, with appropriate striding/padding.

Subtracting two integers causes integer-underflow in device code

In my cuda device code I am doing a check where I subtracting the thread's id and the blockDim to see weather or not the data I might want to use is in range. But when this number goes bellow 0 it seems to wrap back around to be max instead.
#include <iostream>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
float input[] =
{
1.5f, 2.5f, 3.5f,
4.5f, 5.5f, 6.5f,
7.5f, 8.5f, 9.5f,
};
__global__ void underflowCausingFunction(float* in, float* out)
{
int id = (blockDim.x * blockIdx.x) + threadIdx.x;
out[id] = id - blockDim.x;
}
int main()
{
float* in;
float* out;
cudaMalloc(&in, sizeof(float) * 9);
cudaMemcpy(in, input, sizeof(float) * 9, cudaMemcpyHostToDevice);
cudaMalloc(&out, sizeof(float) * 9);
underflowCausingFunction<<<3, 3>>>(in, out);
float recivedOut[9];
cudaMemcpy(recivedOut, out, sizeof(float) * 9, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
std::cout << recivedOut[0] << " " << recivedOut[1] << " " << recivedOut[2] << "\n"
<< recivedOut[3] << " " << recivedOut[4] << " " << recivedOut[5] << "\n"
<< recivedOut[6] << " " << recivedOut[7] << " " << recivedOut[8] << "\n";
cudaFree(in);
cudaFree(out);
std::cin.get();
}
The output of this is:
4.29497e+09 4.29497e+09 4.29497e+09
0 1 2
3 4 5
I'm not sure why it's acting like an unsigned int.
If it is relevant I am using GTX 970 and the NVCC compiler that comes with the visual studio plugin. If somebody could explain what's happening or what I'm doing on wrong that would be great.
The built-in variables like threadIdx and blockIdx are composed of unsigned quantities.
In C++, when you subtract an unsigned quantity from a signed integer quantity:
out[id] = id - blockDim.x;
the arithmetic that gets performed is unsigned arithmetic.
Since you want signed arithmetic (apparently) the correct thing to do is to make sure both quantities being subtracted are of signed type (let's use int in this case):
out[id] = id - (int)blockDim.x;

Getting the pitch of a rotation in c++

I have received that function in my code, written by someone else. I don't understand the theory behind it, but it seems to be working.
Does somebody could lead me in the right direction ?
We are calculating the slope of a road from a pointcloud with ransac.
rotation is the world to local matrix, so plane_normal_rot is the normal vector of the plane in the world.
But after that I don't understand what is going on..
ransac.getModelCoefficients(model_coefficients);
std::cout << "#############################" << std::endl;
std::cout << "PLANE MODEL: " << model_coefficients[0] << " "<< model_coefficients[1] << " "<< model_coefficients[2] << " " << model_coefficients[3];
std::cout << "#############################" << std::endl;
double a = model_coefficients[0];
double b = model_coefficients[1];
double c = model_coefficients[2];
tf::Vector3 plane_normal(a,b,c);
tf::Vector3 plane_normal_rot(0,0,0);
//tf::Matrix3x3 rotation_tr = rotation.transpose();
tf::Matrix3x3 rotation_tr = rotation;
plane_normal_rot.setX( (plane_normal.getX() * rotation_tr[0][0])
+ (plane_normal.getY() * rotation_tr[0][1])
+ (plane_normal.getZ() * rotation_tr[0][2]));
plane_normal_rot.setY( (plane_normal.getX() * rotation_tr[1][0])
+ (plane_normal.getY() * rotation_tr[1][1])
+ (plane_normal.getZ() * rotation_tr[1][2]));
plane_normal_rot.setZ( (plane_normal.getX() * rotation_tr[2][0])
+ (plane_normal.getY() * rotation_tr[2][1])
+ (plane_normal.getZ() * rotation_tr[2][2]));
//Check sign
if(plane_normal_rot.getZ() < 0)
{
plane_normal_rot *= (-1);
}
pitch = asin(plane_normal_rot.getX());
If I havn't been clear or you feel like you're missing info please tell me.

Verifying essential matrix

I'm trying to code a simple structure from motion scenario, using only 2 images taken from the same camera.
I use SIFT to find matching points between the images (total of 72 matches), out of which 62 are correct.
I use OpenCV to calculate the fundamental matrix, then the essential. When I try to verify the essential matrix by doing p2^T * E * p1 I get very high values instead of close to zero.
Am I doing something wrong?
Here's the code: (pts1, pts2 are std::vector<Point2f>. dmat is Mat_<double>)
int n = pts1.size();
std::cout << "Total point matches: " << n << std::endl;
std::vector<unsigned char> status(n);
std::cout << "K=" << K << std::endl;
F = findFundamentalMat(pts1, pts2,FM_RANSAC,3,0.99,status);
std::cout << "F=" << F << std::endl;
std::cout << "Total inliers: " << std::accumulate(status.begin(),status.end(),0) << std::endl;
E = K.t() * F * K;
std::cout << "E=" << E << std::endl;
for (int i = 0; i < n;++i)
{
dmat p1(3,1), p2(3,1);
p1 << pts1[i].x, pts1[i].y, 1;
p2 << pts2[i].x, pts2[i].y, 1;
dmat mv = p2.t() * E * p1;
double v = mv(0, 0);
std::cout << v << std::endl;
}
and here is the output from this code:
Total point matches: 72
K=[390.0703661671206, 0, 319.5;
0, 390.0703661671206, 239.5;
0, 0, 1]
F=[-2.723736291531157e-007, 7.660367616625481e-005, -0.01766345189507435;
-4.219955880897177e-005, 9.025976628215733e-006, -0.04376995849516735;
0.009562535474535394, 0.03723116011143099, 1]
Total inliers: 62
E=[-0.04144297973569942, 11.65562396370436, 0.2325229628055823;
-6.420869252333299, 1.373346486079092, -21.48936503378938;
-0.2462444924550576, 24.91291898830852, -0.03174504032716108]
188648
-38467.5
-34880.7
289671
257263
87504.7
462472
-30138.1
-30569.3
174520
-32342.8
-32342.8
-37543.4
241378
-36875.4
-36899
-38796.4
-38225.2
-38120.9
394285
-440986
396805
455397
543629
14281.6
630398
-29714.6
191699
-37854.1
-39295.8
-3395.93
-3088.56
629769
-28132.9
178537
878596
-58957.9
-31034.5
-30677.3
-29854.5
165689
-13575.9
-13294.3
-6607.96
-3446.41
622355
-31803
-35149
-38455.4
2068.12
82164.6
-35731.2
-36252.7
-36746.9
-35325.3
414185
-35216.3
-126107
-5551.84
100196
2.29755e+006
177785
-31991.8
-31991.8
100340
108897
108897
84660.4
-7828.65
225817
225817
295423
The equation v2^T * E * v1 is true for the essential matrix only when v2 and v1 are in normalized coordinates, i.e. v1 = K^(-1)*p1, with p1 the observed point in pixels. Same goes for v2 and p2.
If you have it, you can refer to definition 9.16 page 257 of Hartley and Zisserman's book. But note that this makes sense, given the relation E = K.t() * F * K.