I'm working into breaking an image(540x360) to 60x60 matrix referred by cv::Mat imga in my code, then normalizing in cv::Mat imga_normalized and finally passing each 60x60 normalized to a std::vector<cv::Mat> arrayimganorm, which has 54 elements (540 / 60 = 9; 360 / 60 = 6; 9x6 = 54).
for (int r = 0; r < src.rows; r += 60) //src.rows = 540
{
for (int c = 0; c < src.cols; c += 60) //src.cols = 360
{
cv::Mat imga = src(cv::Range(r, (r + 60)), cv::Range(c, (c + 60))).clone();
imga.convertTo(imga_normalize, CV_32F, 1.0 / 255, 0);
arrayimganorm.push_back(imga_normalize);
}
}
After this I need to copy the data from arrayimganorm to a input tensor, but I don't know how.
I already tried this 2 ways, but they don't work as well:
Without memcpy:
std::vector<cv::Mat>* data = &arrayimganorm;
for(int w = 0; w < 54; w++)
{
interpreter->typed_input_tensor<float>(0)[w] = *(data++);
}
// This gaves me there is no proper conversion function for
// std::vector<cv::Mat> to float. What I misunderstood is that
// imga_normalize it's an CV_32 and the first inputs dimension is 54.
With memcpy:
for(int w = 0; w < 54; w++)
{
memcpy(interpreter->typed_input_tensor<float>(0)[w], arrayimganorm[w].data , arrayimganorm[w].total() * arrayimganorm[w].elemSize());
}
// This gives me type argument is incompatible with type parameter
Some extra information:
I'm just a beginner in OpenCV and Tensorflow Lite in C++, so sorry If my doubt sounds weird.
model = tflite::FlatBufferModel::BuildFromBuffer(ptr, sizeof(ptr));
if (model == nullptr)
{
fprintf(stderr, "Failed to load model\n");
exit(-1);
}
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*model.get(), resolver)(&interpreter);
if (interpreter == nullptr)
{
fprintf(stderr, "Failed to initiate the interpreter\n");
exit(-1);
}
interpreter->ResizeInputTensor(0, { 54, 60, 60, 1 });
// I did this because my standard input tensor has 1 x 60 x 60 x 1
// dimensions (converted .pb to tflite, which is another misunderstood
// question, maybe for other day.. but just sayin in .pb I got
// {? x 60 x 60 x 1} dimensions and the conversion got me
// 1 x 60 x 60 x 1), but I need to get faster results and invoking
// interpreter to one by one input were consuming a lot of time, so I
// changed to 54, because it's 54 60x60 matrix.
...
auto valor = interpreter->tensor(0)->dims->data[0]; //prints 54
auto height = interpreter->tensor(0)->dims->data[1]; //prints 60
auto width = interpreter->tensor(0)->dims->data[2]; //prints 60
auto channels = interpreter->tensor(0)->dims->data[3]; //prints 1
Any idea of how can I pass this array of array to the input tensor?
Related
I was trying to pack multiple images in a single image, using Bin Packing algorithm. In the part of adding images in a single image I was trying with collecting all the image pixel values and put them in the empty frame, but this is not working. Is there any suggestions?
Hi Edited the question,
` FIBITMAP *out_bmp = FreeImage_Allocate(4096, 4096, 32, 0, 0, 0);
BYTE *out_bits = FreeImage_GetBits(out_bmp);
int out_pitch = FreeImage_GetPitch(out_bmp);
// copy all the images to the final one
for (int i = 0; i < files.size(); i++) {
string s = "PathToFile" + files[i];
FIBITMAP* img0 = FreeImage_Load(FreeImage_GetFileType(s.c_str(), 0), s.c_str());
// make sure the input picture is 32-bits
if (FreeImage_GetBPP(img0) != 32) {
FIBITMAP *new_bmp = FreeImage_ConvertTo32Bits(img0);
FreeImage_Unload(img0);
img0 = new_bmp;
}
int img_pitch = FreeImage_GetPitch(img0);
BYTE *img_bits = FreeImage_GetBits(img0);
BYTE *out_bits_ptr = out_bits + out_pitch *
FreeImage_GetHeight(img0) + 4 * FreeImage_GetWidth(img0);
for (int y = 0; y < FreeImage_GetHeight(img0); y += 1) {
memcpy(out_bits_ptr, img_bits, FreeImage_GetWidth(img0) * 4);
out_bits_ptr += out_pitch;
img_bits += img_pitch;
}
}`
I am trying to use the vl_slic_segment function of the VLFeat library using an input image stored in an OpenCV Mat. My code is compiling and running, but the output superpixel values do not make sense. Here is my code so far :
Mat bgrUChar = imread("/pathtowherever/image.jpg");
Mat bgrFloat;
bgrUChar.convertTo(bgrFloat, CV_32FC3, 1.0/255);
cv::Mat labFloat;
cvtColor(bgrFloat, labFloat, CV_BGR2Lab);
Mat labels(labFloat.size(), CV_32SC1);
vl_slic_segment(labels.ptr<vl_uint32>(),labFloat.ptr<const float>(),labFloat.cols,labFloat.rows,labFloat.channels(),30,0.1,25);
I have tried not converting it to the Lab colorspace and setting different regionSize/regularization, but the output is always very glitchy. I am able to retrieve the label values correctly, the thing is the every labels is usually scattered on a little non-contiguous area.
I think the problem is the format of my input data is wrong but I can't figure out how to send it properly to the vl_slic_segment function.
Thank you in advance!
EDIT
Thank you David, as you helped me understand, vl_slic_segment wants data ordered as [LLLLLAAAAABBBBB] whereas OpenCV is ordering its data [LABLABLABLABLAB] for the LAB color space.
In the course of my bachelor thesis I have to use VLFeat's SLIC implementation as well. You can find a short example applying VLFeat's SLIC on Lenna.png on GitHub: https://github.com/davidstutz/vlfeat-slic-example.
Maybe, a look at main.cpp will help you figuring out how to convert the images obtained by OpenCV to the right format:
// OpenCV can be used to read images.
#include <opencv2/opencv.hpp>
// The VLFeat header files need to be declared external.
extern "C" {
#include "vl/generic.h"
#include "vl/slic.h"
}
int main() {
// Read the Lenna image. The matrix 'mat' will have 3 8 bit channels
// corresponding to BGR color space.
cv::Mat mat = cv::imread("Lenna.png", CV_LOAD_IMAGE_COLOR);
// Convert image to one-dimensional array.
float* image = new float[mat.rows*mat.cols*mat.channels()];
for (int i = 0; i < mat.rows; ++i) {
for (int j = 0; j < mat.cols; ++j) {
// Assuming three channels ...
image[j + mat.cols*i + mat.cols*mat.rows*0] = mat.at<cv::Vec3b>(i, j)[0];
image[j + mat.cols*i + mat.cols*mat.rows*1] = mat.at<cv::Vec3b>(i, j)[1];
image[j + mat.cols*i + mat.cols*mat.rows*2] = mat.at<cv::Vec3b>(i, j)[2];
}
}
// The algorithm will store the final segmentation in a one-dimensional array.
vl_uint32* segmentation = new vl_uint32[mat.rows*mat.cols];
vl_size height = mat.rows;
vl_size width = mat.cols;
vl_size channels = mat.channels();
// The region size defines the number of superpixels obtained.
// Regularization describes a trade-off between the color term and the
// spatial term.
vl_size region = 30;
float regularization = 1000.;
vl_size minRegion = 10;
vl_slic_segment(segmentation, image, width, height, channels, region, regularization, minRegion);
// Convert segmentation.
int** labels = new int*[mat.rows];
for (int i = 0; i < mat.rows; ++i) {
labels[i] = new int[mat.cols];
for (int j = 0; j < mat.cols; ++j) {
labels[i][j] = (int) segmentation[j + mat.cols*i];
}
}
// Compute a contour image: this actually colors every border pixel
// red such that we get relatively thick contours.
int label = 0;
int labelTop = -1;
int labelBottom = -1;
int labelLeft = -1;
int labelRight = -1;
for (int i = 0; i < mat.rows; i++) {
for (int j = 0; j < mat.cols; j++) {
label = labels[i][j];
labelTop = label;
if (i > 0) {
labelTop = labels[i - 1][j];
}
labelBottom = label;
if (i < mat.rows - 1) {
labelBottom = labels[i + 1][j];
}
labelLeft = label;
if (j > 0) {
labelLeft = labels[i][j - 1];
}
labelRight = label;
if (j < mat.cols - 1) {
labelRight = labels[i][j + 1];
}
if (label != labelTop || label != labelBottom || label!= labelLeft || label != labelRight) {
mat.at<cv::Vec3b>(i, j)[0] = 0;
mat.at<cv::Vec3b>(i, j)[1] = 0;
mat.at<cv::Vec3b>(i, j)[2] = 255;
}
}
}
// Save the contour image.
cv::imwrite("Lenna_contours.png", mat);
return 0;
}
In addition, have a look at README.md within the GitHub repository. The following figures show some example outputs of setting the regularization to 1 (100,1000) and setting the region size to 30 (20,40).
Figure 1: Superpixel segmentation with region size set to 30 and regularization set to 1.
Figure 2: Superpixel segmentation with region size set to 30 and regularization set to 100.
Figure 3: Superpixel segmentation with region size set to 30 and regularization set to 1000.
Figure 4: Superpixel segmentation with region size set to 20 and regularization set to 1000.
Figure 5: Superpixel segmentation with region size set to 20 and regularization set to 1000.
currently i m working on the Tizen IDE.
I had read the input data from the microPhone and apply the FFT on it... but everytime i gets the nan output.
here is my code..
ShortBuffer *pBuffer1 = pData->AsShortBufferN();
fft = new KissFFT(BUFFER_SIZE);
std::vector<short> input(pBuffer1->GetPointer(),
pBuffer1->GetPointer() + BUFFER_SIZE); // this contains audio data
std::vector<float> specturm(BUFFER_SIZE);
fft->spectrum(input, specturm);
applying FFT..
void KissFFT::spectrum(KissFFTO* fft, std::vector<short>& samples2,
std::vector<float>& spectrum) {
int len = fft->numSamples / 2 + 1;
kiss_fft_scalar* samples = (kiss_fft_scalar*) &samples2[0];
kiss_fftr(fft->config, samples, fft->spectrum);
for (int i = 0; i < len; i++) {
float re = scale(fft->spectrum[i].r) * fft->numSamples;
float im = scale(fft->spectrum[i].i) * fft->numSamples;
if (i > 0)
spectrum[i] = sqrtf(re * re + im * im) / (fft->numSamples / 2);
else
spectrum[i] = sqrtf(re * re + im * im) / (fft->numSamples);
AppLog("specturm %d",spectrum[i]); // everytime returns returns nan output
}
}
KissFFTO* KissFFT::create(int numSamples) {
KissFFTO* fft = new KissFFTO();
fft->config = kiss_fftr_alloc(numSamples/2, 0, NULL, NULL);
fft->spectrum = new kiss_fft_cpx[numSamples / 2 + 1];
fft->numSamples = numSamples;
return fft;
}
In fft->config there should be some parameters about the size of FFT like 2048, 4096, i.e. powers of 2. If you increase this value, you can get more resolution in frequency.
I am using opencv to achieve object tracking. I read that YUV image is better option to use than RGB image. My problem is that I fail to understand about the YUV format although i spend much time read notes. Y is the brightness which i believe is calculated from the combination of R, G, B component.
My main problem is how can I access and manipulate the pixels in YUV image format. In RGB format its easy to access the component and therefore change it using simple operatin like
src.at<Vec3b>(j,i).val[0] = 0; for example
But this is not the case in YUV. I need help in accessing and changing the pixel values in YUV image. For example if pixel in RGB is red, then I want to only keep the corresponding pixel in YUV and the rest is removed. Please help me with this.
I would suggest operating on your image in HSV or LAB rather than RGB.
The raw image from the camera will be in YCbCr (sometimes called YUV, which I think is incorrect, but I may be wrong), and laid out in a way that resembles something like YUYV (repeating), so if you can convert directly from that to HSV, you will avoid additional copy and conversion operations which will save you some time. That may only matter to you if you're processing video or batches of images however.
Here's some C++ code for converting between YCbCr and RGB (one uses integer math, the other floating point):
Colour::bgr Colour::YCbCr::toBgrInt() const
{
int c0 = 22987;
int c1 = -11698;
int c2 = -5636;
int c3 = 29049;
int y = this->y;
int cb = this->cb - 128;
int cr = this->cr - 128;
int b = y + (((c3 * cb) + (1 << 13)) >> 14);
int g = y + (((c2 * cb + c1 * cr) + (1 << 13)) >> 14);
int r = y + (((c0 * cr) + (1 << 13)) >> 14);
if (r < 0)
r = 0;
else if (r > 255)
r = 255;
if (g < 0)
g = 0;
else if (g > 255)
g = 255;
if (b < 0)
b = 0;
else if (b > 255)
b = 255;
return Colour::bgr(b, g, r);
}
Colour::bgr Colour::YCbCr::toBgrFloat() const
{
float y = this->y;
float cb = this->cb;
float cr = this->cr;
int r = y + 1.40200 * (cr - 0x80);
int g = y - 0.34414 * (cb - 0x80) - 0.71414 * (cr - 0x80);
int b = y + 1.77200 * (cb - 0x80);
if (r < 0)
r = 0;
else if (r > 255)
r = 255;
if (g < 0)
g = 0;
else if (g > 255)
g = 255;
if (b < 0)
b = 0;
else if (b > 255)
b = 255;
return Colour::bgr(b, g, r);
}
And a conversion from BGR to HSV:
Colour::hsv Colour::bgr2hsv(bgr const& in)
{
Colour::hsv out;
int const hstep = 255 / 3; // Hue step size between red -> green -> blue
int min = in.r < in.g ? in.r : in.g;
min = min < in.b ? min : in.b;
int max = in.r > in.g ? in.r : in.g;
max = max > in.b ? max : in.b;
out.v = max; // v
int chroma = max - min;
if (max > 0)
{
out.s = 255 * chroma / max; // s
}
else
{
// r = g = b = 0 // s = 0, v is undefined
out.s = 0;
out.h = 0;
out.v = 0; // it's now undefined
return out;
}
if (chroma == 0)
{
out.h = 0;
return out;
}
const int chroma2 = chroma * 2;
int offset;
int diff;
if (in.r == max)
{
offset = 3 * hstep;
diff = in.g - in.b;
}
else if (in.g == max)
{
offset = hstep;
diff = in.b - in.r;
}
else
{
offset = 2 * hstep;
diff = in.r - in.g;
}
int h = offset + (diff * (hstep + 1)) / chroma2;
// Rotate such that red has hue 0
if (h >= 255)
h -= 255;
assert(h >= 0 && h < 256);
out.h = h;
return out;
Unfortunately I do not have code to do this in one step.
You can also use the built-in OpenCV functions for colour conversion.
cvtColor(img, img, CV_BGR2HSV);
Also the U and V components are calculated as linear combinations of RGB values. Then it means, that different intensities of red (R,0,0) are mapped to some (y*R + a,u*R + b, v*R + c), which again means that to detect "red" in YUV one can calculate if the distance of the pixel to that line determined by y,u,v,a,b,c (some of which are redundant) is close to zero. That's achievable with a single dot product. Then set the remaining pixels to the (0,128,128) in YUV space (I think that's R=0,G=0,B=0 in almost all varieties of YCrCb, YUV and such).
There are several YUV formats, but the common ones keep Y at the same resolution as the original image, but U and V are half size, and are saved as separate or interlaced planes/channels after the single channel Y image buffer.
This allows you to efficiently access Y as a 1-channel 8-bit greyscale image.
Access and manipulate pixels does not know the colorformat so the same code applies for color components Y U and V. If you need to access in RGB mode, best is probably calling cv::cvtColor for your region of interest first.
I have searched internet and stackoverflow thoroughly, but I haven't found answer to my question:
How can I get/set (both) RGB value of certain (given by x,y coordinates) pixel in OpenCV? What's important-I'm writing in C++, the image is stored in cv::Mat variable. I know there is an IplImage() operator, but IplImage is not very comfortable in use-as far as I know it comes from C API.
Yes, I'm aware that there was already this Pixel access in OpenCV 2.2 thread, but it was only about black and white bitmaps.
EDIT:
Thank you very much for all your answers. I see there are many ways to get/set RGB value of pixel. I got one more idea from my close friend-thanks Benny! It's very simple and effective. I think it's a matter of taste which one you choose.
Mat image;
(...)
Point3_<uchar>* p = image.ptr<Point3_<uchar> >(y,x);
And then you can read/write RGB values with:
p->x //B
p->y //G
p->z //R
Try the following:
cv::Mat image = ...do some stuff...;
image.at<cv::Vec3b>(y,x); gives you the RGB (it might be ordered as BGR) vector of type cv::Vec3b
image.at<cv::Vec3b>(y,x)[0] = newval[0];
image.at<cv::Vec3b>(y,x)[1] = newval[1];
image.at<cv::Vec3b>(y,x)[2] = newval[2];
The low-level way would be to access the matrix data directly. In an RGB image (which I believe OpenCV typically stores as BGR), and assuming your cv::Mat variable is called frame, you could get the blue value at location (x, y) (from the top left) this way:
frame.data[frame.channels()*(frame.cols*y + x)];
Likewise, to get B, G, and R:
uchar b = frame.data[frame.channels()*(frame.cols*y + x) + 0];
uchar g = frame.data[frame.channels()*(frame.cols*y + x) + 1];
uchar r = frame.data[frame.channels()*(frame.cols*y + x) + 2];
Note that this code assumes the stride is equal to the width of the image.
A piece of code is easier for people who have such problem. I share my code and you can use it directly. Please note that OpenCV store pixels as BGR.
cv::Mat vImage_;
if(src_)
{
cv::Vec3f vec_;
for(int i = 0; i < vHeight_; i++)
for(int j = 0; j < vWidth_; j++)
{
vec_ = cv::Vec3f((*src_)[0]/255.0, (*src_)[1]/255.0, (*src_)[2]/255.0);//Please note that OpenCV store pixels as BGR.
vImage_.at<cv::Vec3f>(vHeight_-1-i, j) = vec_;
++src_;
}
}
if(! vImage_.data ) // Check for invalid input
printf("failed to read image by OpenCV.");
else
{
cv::namedWindow( windowName_, CV_WINDOW_AUTOSIZE);
cv::imshow( windowName_, vImage_); // Show the image.
}
The current version allows the cv::Mat::at function to handle 3 dimensions. So for a Mat object m, m.at<uchar>(0,0,0) should work.
uchar * value = img2.data; //Pointer to the first pixel data ,it's return array in all values
int r = 2;
for (size_t i = 0; i < img2.cols* (img2.rows * img2.channels()); i++)
{
if (r > 2) r = 0;
if (r == 0) value[i] = 0;
if (r == 1)value[i] = 0;
if (r == 2)value[i] = 255;
r++;
}
const double pi = boost::math::constants::pi<double>();
cv::Mat distance2ellipse(cv::Mat image, cv::RotatedRect ellipse){
float distance = 2.0f;
float angle = ellipse.angle;
cv::Point ellipse_center = ellipse.center;
float major_axis = ellipse.size.width/2;
float minor_axis = ellipse.size.height/2;
cv::Point pixel;
float a,b,c,d;
for(int x = 0; x < image.cols; x++)
{
for(int y = 0; y < image.rows; y++)
{
auto u = cos(angle*pi/180)*(x-ellipse_center.x) + sin(angle*pi/180)*(y-ellipse_center.y);
auto v = -sin(angle*pi/180)*(x-ellipse_center.x) + cos(angle*pi/180)*(y-ellipse_center.y);
distance = (u/major_axis)*(u/major_axis) + (v/minor_axis)*(v/minor_axis);
if(distance<=1)
{
image.at<cv::Vec3b>(y,x)[1] = 255;
}
}
}
return image;
}