I'm trying to get every frame of the stream produced by the RGB camera of the Kinect (using SDK version 1.8) into an OpenCV (2.4.10) Mat_<Vec3b>. This is my current algorithm, which is not at all fast:
Mat_<Vec3b> mat = Mat::zeros(480, 640, CV_8UC3);
NUI_IMAGE_FRAME imageFrame;
NUI_LOCKED_RECT lockedRect;
if (sensor->NuiImageStreamGetNextFrame(colorStream, 0, &imageFrame) < 0) { return; }
INuiFrameTexture* texture = imageFrame.pFrameTexture;
texture->LockRect(0, &lockedRect, NULL, 0);
if (lockedRect.Pitch != 0)
{
BYTE* upperLeftCorner = (BYTE*)lockedRect.pBits;
BYTE* pointerToTheByteBeingRead = upperLeftCorner;
for (int i = 0; i < 480; i++)
{
for (int j = 0; j < 640; j++)
{
unsigned char r = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char g = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char b = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 2; //So to skip the alpha channel
mat.at<Vec3b>(Point(j, i))[0] = r;
mat.at<Vec3b>(Point(j, i))[1] = g;
mat.at<Vec3b>(Point(j, i))[2] = b;
}
}
}
texture->UnlockRect(0);
sensor->NuiImageStreamReleaseFrame(colorStream, &imageFrame);
I checked the OpenCV documentation and I understand I'm supposed to use pointer access to increase efficiency. Are Mat_<Vec3b>s stored into memory the same way as Mats or should I do some other pointer arithmetic?
Also, I understand updating every single pixel every time is not the most efficient way of achieving the display of the stream through a Mat. What other things could I do?
Finally figured out how to use pointer arithmetic. The code is self-explanatory:
Mat_<Vec3b> mat = Mat::zeros(480, 640, CV_8UC3);
NUI_IMAGE_FRAME imageFrame;
NUI_LOCKED_RECT lockedRect;
if (sensor->NuiImageStreamGetNextFrame(colorStream, 0, &imageFrame) < 0) { return; }
INuiFrameTexture* texture = imageFrame.pFrameTexture;
texture->LockRect(0, &lockedRect, NULL, 0);
if (lockedRect.Pitch != 0)
{
BYTE* upperLeftCorner = (BYTE*)lockedRect.pBits;
BYTE* pointerToTheByteBeingRead = upperLeftCorner;
for (int i = 0; i < 480; i++)
{
Vec3b *pointerToRow = mat.ptr<Vec3b>(i);
for (int j = 0; j < 640; j++)
{
unsigned char r = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char g = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 1;
unsigned char b = *pointerToTheByteBeingRead;
pointerToTheByteBeingRead += 2; //So to skip the alpha channel
pointerToRow[j] = Vec3b(r, g, b);
}
}
}
texture->UnlockRect(0);
sensor->NuiImageStreamReleaseFrame(colorStream, &imageFrame);
Related
I define a function void segRgb(Mat &src, Mat &dst, Rect roi), using which I try to segment the region of region (ROI) of an input RGB image by simply thresholding a lumped pixel intensities derived from R, G and B channels. Here below is the code of the function:
void segRgb(Mat &src, Mat &dst, Rect roi)
{
uchar *bgrdata = src.data;
uchar *outdata = dst.data;
int ystart = roi.y;
int yend = roi.y + roi.height;
int xstart = roi.x;
int xend = roi.x+roi.width;
int step1 = src.cols-roi.width;
int step3 = 3*step1;
int start1 = roi.y*src.cols+roi.x;
int start3 = 3*start1;
bgrdata += start3;
outdata += start1;
uchar r, g, b;
double score=0.0;
for(int i=ystart; i<yend; i++)
{
qDebug()<<"Rows: "<<i;
for(int j=xstart; j<xend; j++)
{
b = *bgrdata++;
g = *bgrdata++;
r = *bgrdata++;
score = 0.21*r+0.72*g+0.07*b; //a simple rule to lump RGB values
if(score>100)
{
*outdata = 255;
}
else
{
*outdata = 0;
}
outdata++;
}
outdata+=step1;
bgrdata+=step3;
}
}
Following is my test code for the function:
Rect cvRect = Rect(10,50,256,256);
Mat dst;
segRgb(im, dst, cvRect); //im is a loaded Matrix of 427*640*3, CV_8UC3
namedWindow("Thresholded");
imshow("Thresholed", dst);
I run the codes above. The function segRgb does not work for some reason. No image is shown. Actually, the loop inside the segRgb does not proceed. Anyone can point to the problem, debug my codes bit? Thanks!
void segRgb(Mat &src, Mat &dst, Rect roi)
{
uchar *bgrdata = src.data;
uchar *outdata = dst.data;
int ystart = roi.y;
int yend = roi.y + roi.height;
int xstart = roi.x;
int xend = roi.x + roi.width;
int step1 = src.cols - roi.width;
int step3 = 3 * step1;
int start1 = roi.y*src.cols + roi.x;
int start3 = 3 * start1;
bgrdata += start3;
outdata += start1;
uchar r, g, b;
double score = 0.0;
for (int i = ystart; i < yend; i++)
{
cout << "Rows: " << i;
for (int j = xstart; j < xend; j++)
{
b = *bgrdata++;
g = *bgrdata++;
r = *bgrdata++;
score = 0.21*r + 0.72*g + 0.07*b; //a simple rule to lump RGB values
if (score > 100)
{
*outdata = 255;
}
else
{
*outdata = 0;
}
outdata++;
}
outdata += step1;
bgrdata += step3;
}
}
int main() {
Mat im = imread("urimage");
Rect cvRect = Rect(10, 50, 256, 256);
// you have to allocate a size for the dst Mat otherwise the uchar* output you point to above will be garbage
Mat dst(im.size(),im.type());
segRgb(im, dst, cvRect); //im is a loaded Matrix of 427*640*3, CV_8UC3
//Resize you dst or you can change a bit in your function paramters to get it directly
dst=Mat(dst, cvRect);
namedWindow("Thresholded");
imshow("Thresholed", dst);
waitKey(0);
}
I am using opencv to output webcam to the screen.
I output the screen normally.
However, there is one problem.
Noise is generated in the result.
I do not know the reason.
Tell me how to do it
.
The following is the initialization function.
The imshow function was called to test.
void CRtspMgr::Init() {
// init video capture
if (m_capture == NULL) {
m_capture = new VideoCapture();
}
// set address
m_capture->open(0);
namedWindow("video", 1);
}
And it is the part that draws the screen.
void CRtspMgr::OnPaint()
{
// draw video mat
CPaintDC dc(this);
if (m_capture != NULL) {
if (m_capture->isOpened()) {
RECT r;
GetClientRect(&r);
cv::Size winSize(r.right, r.bottom);
Mat mat;
m_capture->read(mat);
// test
imshow("video", mat);
int bpp = 8 * mat.elemSize();
assert((bpp == 8 || bpp == 24 || bpp == 32));
BITMAPINFO bitInfo;
bitInfo.bmiHeader.biBitCount = bpp;
bitInfo.bmiHeader.biWidth = mat.cols;
bitInfo.bmiHeader.biHeight = mat.rows;
bitInfo.bmiHeader.biPlanes = 1;
bitInfo.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
bitInfo.bmiHeader.biCompression = BI_RGB;
bitInfo.bmiHeader.biClrImportant = 0;
bitInfo.bmiHeader.biClrUsed = 0;
bitInfo.bmiHeader.biSizeImage = 0;
bitInfo.bmiHeader.biXPelsPerMeter = 0;
bitInfo.bmiHeader.biYPelsPerMeter = 0;
int destx = 0, desty = 0;
int destw = winSize.width;
int desth = winSize.height;
int imgx = 0, imgy = 0;
int imgWidth = mat.cols;
int imgHeight = mat.rows;
StretchDIBits(dc,
destx, desty, destw, desth,
imgx, imgy, imgWidth, imgHeight,
mat.data, &bitInfo, DIB_RGB_COLORS, SRCCOPY);
}
}
Invalidate();
}
show image
I'm trying to create a dynamic array of arrays (of arrays). But for some reason the data gets corrupted. I'm using the data to generate a texture in a OpenGL application.
The following code works fine:
unsigned char imageData[64][64][3];
for (int i = 0; i < 64; i++)
{
for (int j = 0; j < 64; j++)
{
unsigned char r = 0, g = 0, b = 0;
if (i < 32)
{
if (j < 32)
r = 255;
else
b = 255;
}
else
{
if (j < 32)
g = 255;
}
imageData[i][j][0] = r;
imageData[i][j][1] = g;
imageData[i][j][2] = b;
}
std::cout << std::endl;
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData);
Problem is, I want to be able to create a texture of any size (not just 64*64). So I'm trying this:
unsigned char*** imageData = new unsigned char**[64]();
for (int i = 0; i < 64; i++)
{
imageData[i] = new unsigned char*[64]();
for (int j = 0; j < 64; j++)
{
imageData[i][j] = new unsigned char[3]();
unsigned char r = 0, g = 0, b = 0;
if (i < 32)
{
if (j < 32)
r = 255;
else
b = 255;
}
else
{
if (j < 32)
g = 255;
}
imageData[i][j][0] = r;
imageData[i][j][1] = g;
imageData[i][j][2] = b;
}
std::cout << std::endl;
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData);
But that doesn't work, the image gets all messed up so I assume I'm creating the array of arrays (of arrays) incorrectly? What am I doing wrong?
Also, I guess I should be using vectors instead. But how can I cast the vector of vectors of vectors data into a (void *) ?
This line contains multiple bugs:
unsigned char* pixel = &(imageData[(y * height) + x]);
You should multiply x by height and add y. And there's also the fact that each pixel is actually 3 bytes. Some issues that led to this bug in your code (and will lead to to others)
You should also be using std::vector. You can call std::vector::data to get a pointer to the underlying data to interface to C API's.
You should have a class that represents a pixel. This will handle the offsetting correctly and give things names and made the code clearer.
Whenever you are working with a multi dimensional array that you encode into a single dimensional one, you should try to carefully write an access function that takes care of indexing so you can test it separately.
(end bulleted list... oh SO).
struct Pixel {
unsigned char red;
unsigned char blue;
unsigned char green;
};
struct TwoDimPixelArray {
TwoDimArray(int width, int height)
: m_width(width), m_height(height)
{
m_vector.resize(m_width * m_height);
}
Pixel& get(int x, int y) {
return m_vector[x*height + y];
}
Pixel* data() { return m_vector.data(); }
private:
int m_width;
int m_height;
std::vector<Pixel> m_vector;
}
int width = 64;
int height = 64;
TwoDimPixelArray imageData(width, height);
for (int x = 0; x != width ; ++ x) {
for (int y = 0; y != height ; ++y) {
auto& pixel = imageData.get(x, y);
// ... pixel.red = something, pixel.blue = something, etc
}
}
glTexImage2D(target, 0, GL_RGB, 64, 64, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData.data());
You need to use continuous memory for it to work with opengl.
My solution is inspired by previous answers, with a different indexing system
unsigned char* imageData = new unsigned char[width*height*3];
unsigned char r, g, b;
const unsigned int row_size_bytes = width * 3;
for( unsigned int x = 0; x < width; x++ ) {
unsigned int current_row_offset_bytes = x * 3;
for( unsigned int y = 0; y < height; y++ ) {
unsigned int one_dim_offset = y * row_size_bytes + current_row_offset_bytes
unsigned char* pixel = &(imageData[one_dim_offset]);
pixel[0] = r;
pixel[1] = g;
pixel[2] = b;
}
}
Unfortunnately it's untested, but i'm confident assuming sizeof(char) is 1.
Currently I am working with Faster RCNN using C++. I am trying to load cv Mat object (color image) to the net_->blob_by_name("data"). I follow the given instruction here https://github.com/YihangLou/FasterRCNN-Encapsulation-Cplusplus but the result is really bad:
I didn't change anything from the original code. So I suspect loading data to blob might be the issue.
Code:
float im_info[3];
float data_buf[height*width*3];
float *boxes = NULL;
float *pred = NULL;
float *pred_per_class = NULL;
float *sorted_pred_cls = NULL;
int *keep = NULL;
const float* bbox_delt;
const float* rois;
const float* pred_cls;
int num;
for (int h = 0; h < cv_img.rows; ++h )
{
for (int w = 0; w < cv_img.cols; ++w)
{
cv_new.at<cv::Vec3f>(cv::Point(w, h))[0] = float(cv_img.at<cv::Vec3b>(cv::Point(w, h))[0])-float(102.9801);
cv_new.at<cv::Vec3f>(cv::Point(w, h))[1] = float(cv_img.at<cv::Vec3b>(cv::Point(w, h))[1])-float(115.9465);
cv_new.at<cv::Vec3f>(cv::Point(w, h))[2] = float(cv_img.at<cv::Vec3b>(cv::Point(w, h))[2])-float(122.7717);
}
}
cv::resize(cv_new, cv_resized, cv::Size(width, height));
im_info[0] = cv_resized.rows;
im_info[1] = cv_resized.cols;
im_info[2] = img_scale;
for (int h = 0; h < height; ++h )
{
for (int w = 0; w < width; ++w)
{
data_buf[(0*height+h)*width+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[0]);
data_buf[(1*height+h)*width+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[1]);
data_buf[(2*height+h)*width+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[2]);
}
}
net_->blob_by_name("data")->Reshape(1, 3, height, width);
net_->blob_by_name("data")->set_cpu_data(data_buf);
net_->blob_by_name("im_info")->set_cpu_data(im_info);
net_->ForwardFrom(0);
bbox_delt = net_->blob_by_name("bbox_pred")->cpu_data();
num = net_->blob_by_name("rois")->num();
Any advices ?
Can you please modify the code and check ...
cv::resize(cv_new, cv_resized, cv::Size(width, height));
im_info[0] = cv_resized.rows;
im_info[1] = cv_resized.cols;
im_info[2] = img_scale;
net_->blob_by_name("data")->Reshape(1, 3, height, width);
const shared_ptr<Blob<float> >& data_blob = net_->blob_by_name("data");
float* data_buf = data_blob->mutable_cpu_data();
for (int h = 0; h < height; ++h )
{
for (int w = 0; w < width; ++w)
{
data_buf[(0*height+h)*width+w] = float(cv_resized.at<cv::Vec3f> cv::Point(w, h))[0]);
data_buf[(1*height+h)*width+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[1]);
data_buf[(2*height+h)*width+w] = float(cv_resized.at<cv::Vec3f>(cv::Point(w, h))[2]);
}
}
net_->Forward();
I am acquiring images using a digital camera. At first, I was using a mono camera, but recently I upgraded to a color camera. With the mono camera I was having some palette issues until I found this bit of code to alter the palette to a grayscale palette:
for(int i=0; i<256; i++)
{
pbmi->bmiColors[i].rgbRed = BYTE(i);
pbmi->bmiColors[i].rgbGreen = BYTE(i);
pbmi->bmiColors[i].rgbBlue = BYTE(i);
pbmi->bmiColors[i].rgbReserved = BYTE(0);
}
where pbmi is a BITMAPINFO*.
This worked just fine for the mono camera. But now with the color camera I obviously don't want to make the images grayscale. However, if I remove that chunk of code I get the same palette issues that I was getting before with the mono camera. So it seems to me like I need to do something similar as I did before and create a palette, only this time a color palette.
For reference, here is the rest of the pbmi's settings:
//// INFO ////
BITMAPINFO* pbmi = (BITMAPINFO*)alloca( sizeof(BITMAPINFOHEADER) +
sizeof(RGBQUAD)*256);
pbmi->bmiHeader.biSize = sizeof (pbmi->bmiHeader);
pbmi->bmiHeader.biWidth = 2752;
pbmi->bmiHeader.biHeight = -2200;
pbmi->bmiHeader.biPlanes = 1;
pbmi->bmiHeader.biBitCount = 8;
pbmi->bmiHeader.biCompression = BI_RGB;
pbmi->bmiHeader.biSizeImage = 0;
pbmi->bmiHeader.biXPelsPerMeter = 14173;
pbmi->bmiHeader.biYPelsPerMeter = 14173;
pbmi->bmiHeader.biClrUsed = 0;
pbmi->bmiHeader.biClrImportant = 0;
So far, I have tried the following:
for(int i=0,a = 0; i < 64; i++)
{
pbmi->bmiColors[i].rgbRed = BYTE(a);
pbmi->bmiColors[i+64].rgbGreen = BYTE(a);
pbmi->bmiColors[i+64+64].rgbBlue = BYTE(a);
pbmi->bmiColors[i+64+64+64].rgbReserved = BYTE(0);
a += 4;
}
//This created a palette consisting only of cyan, yellow, and magenta colors.
//Didn't work.
for(int i=0,r=0,g=0,b=0; b <= 255; i++)
{
if(r >= 256)
{
r = 0;
g++;
}
if(g >= 256)
{
g = 0;
b++;
}
pbmi->bmiColors[i].rgbRed = BYTE(r);
pbmi->bmiColors[i].rgbGreen = BYTE(g);
pbmi->bmiColors[i].rgbBlue = BYTE(b);
pbmi->bmiColors[i].rgbReserved = BYTE(0);
r++;
}
//Here I was trying to basically count up hexadecimally from 000000 to FFFFFF.
//Caused an access violation error.
I've also tried each of those after changing pbmi->bmiHeader.biBitCount to 16, 24, and 32, none of which worked.
So my question is: How do I create a color palette based on the BITMAPINFO settings I have provided?
If you are trying to create a simple RGB pallete, you just need to change the R, G e B values from 0 to 255:
const int MaxIndex = 255;
for(int r=0; r <= MaxIndex; r++)
for(int g=0; g <= MaxIndex; g++)
for(int b=0; b <= MaxIndex; b++)
{
i = r * MaxIndex * MaxIndex + g * MaxIndex + b
pbmi->bmiColors[i].rgbRed = BYTE(r);
pbmi->bmiColors[i].rgbGreen = BYTE(g);
pbmi->bmiColors[i].rgbBlue = BYTE(b);
pbmi->bmiColors[i].rgbReserved = BYTE(0);
}