i want to transport the follow codes into c++:
gaussFilter = fspecial('gaussian', 2*neighSize+1, 0.5*neighSize);
pointFeature = imfilter(pointFeature, gaussFilter, 'symmetric');
where the pointFeature is a [height, width, 24] array.
i try to use filter2D, but it only support the 2D array.
so i want to know if there are functions in opencv that can filtering the multi-dimensional array?
You can use separable kernel filters for make anydimentional filter.
If you are using OpenCV, you could try this for a 3 Dimensional MatND:
void Smooth3DHist(cv::MatND &hist, const int& kernDimension)
{
assert(hist.dims == 3);
int x_size = hist.size[0];
int y_size = hist.size[1];
int z_size = hist.size[2];
int xy_size = x_size*y_size;
cv::Mat kernal = cv::getGaussianKernel(kernDimension, -1, CV_32F);
// Filter XY dimensions for every Z
for (int z = 0; z < z_size; z++)
{
float *ind = (float*)hist.data + z * xy_size; // sub-matrix pointer
cv::Mat subMatrix(2, hist.size, CV_32F, ind);
cv::sepFilter2D(subMatrix, subMatrix, CV_32F, kernal.t(), kernal, Point(-1,-1), 0.0, cv::BORDER_REPLICATE);
}
// Filter Z dimension
float* kernGauss = (float *)kernal.data;
unsigned kernSize = kernal.total();
int kernMargin = (kernSize - 1)/2;
float* lineBuffer = new float[z_size + 2*kernMargin];
for (int y = 0; y < y_size; y++)
{
for (int x = 0; x < x_size; x++)
{
// Copy along Z dimension into a line buffer
float* z_ptr = (float*)hist.data + y * x_size + x;//same as hist.ptr<float>(0, y, x)
for (int z = 0; z < z_size; z++, z_ptr += xy_size)
{
lineBuffer[z + kernMargin] = *z_ptr;
}
// Replicate borders
for (int m = 0; m < kernMargin; m++)
{
lineBuffer[m] = lineBuffer[kernMargin];// replicate left side
lineBuffer[z_size + 2*kernMargin - 1 - m] = lineBuffer[kernMargin + z_size - 1];//replicate right side
}
// Filter line buffer 1D - convolution
z_ptr = (float*)hist.data + y * x_size + x;
for (int z = 0; z < z_size; z++, z_ptr += xy_size)
{
*z_ptr = 0.0f;
for (unsigned k = 0; k < kernSize; k++)
{
*z_ptr += lineBuffer[z+k]*kernGauss[k];
}
}
}
}
delete [] lineBuffer;
}
Related
I'm trying to use GPU Delegate in Tensorflow Lite on iOS. My model has inputs and outputs as OpenCV BGR image ([258, 540, 3]). How can I set inputs and outputs in C++ tensorflow lite interpreter? I tried to use this code
int input = interpreter->inputs()[0];
float* out = interpreter->typed_tensor<float>(input);
NSData* slicedData = [self inputDataFromCvMat:slicedImage];
uint8_t* in = (uint8_t*) slicedData.bytes;
ProcessInputWithFloatModel(in, out, WIDTH, HEIGHT, CHANNELS);
void ProcessInputWithFloatModel(uint8_t* input, float* buffer, int image_width, int image_height, int image_channels) {
for (int y = 0; y < wanted_input_height; ++y) {
float* out_row = buffer + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x) {
const int in_x = (y * image_width) / wanted_input_width;
const int in_y = (x * image_height) / wanted_input_height;
uint8_t* input_pixel =
input + (in_y * image_width * image_channels) + (in_x * image_channels);
float* out_pixel = out_row + (x * wanted_input_channels);
for (int c = 0; c < wanted_input_channels; ++c) {
out_pixel[c] = (input_pixel[c] - input_mean) / input_std;
}
}
}
}
- (NSData *)inputDataFromCvMat:(Mat)image {
NSMutableData *inputData = [[NSMutableData alloc] initWithCapacity:0];
for (int row = 0; row < HEIGHT + 10; row++) {
for (int col = 0; col < WIDTH + 10; col++) {
Vec3b intensity = image.at<Vec3b>(row, col);
int blue = intensity.val[0];
int green = intensity.val[1];
int red = intensity.val[2];
// we need to put pixel values in BGR (model was trained with opencv)
[inputData appendBytes:&blue length:sizeof(blue)];
[inputData appendBytes:&green length:sizeof(green)];
[inputData appendBytes:&red length:sizeof(red)];
}
}
return inputData;
}
but I don't know what is wrong
After some research, I managed to get it working
const int wanted_input_width = 258;
const int wanted_input_height = 540;
const int wanted_input_channels = 3;
Mat image = ...
// write to input
int input = interpreter->inputs()[0];
float* out = interpreter->typed_tensor<float>(input);
uint8_t* in = image.ptr<uint8_t>(0);
ProcessInputWithFloatModel(in, out);
// run interpreter
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke!";
}
// get output
int output_idx = interpreter->outputs()[0];
float* output = interpreter->typed_output_tensor<float>(output_idx);
Mat outputMat = ProcessOutputWithFloatModel(output);
/// Preprocess the input image and feed the TFLite interpreter buffer for a float model.
void ProcessInputWithFloatModel(uint8_t* input, float* buffer) {
for (int y = 0; y < wanted_input_height; ++y) {
float* out_row = buffer + (y * wanted_input_width * wanted_input_channels);
for (int x = 0; x < wanted_input_width; ++x) {
uint8_t* input_pixel = input + (y * wanted_input_width * wanted_input_channels) + (x * wanted_input_channels);
float* out_pixel = out_row + (x * wanted_input_channels);
for (int c = 0; c < wanted_input_channels; ++c) {
out_pixel[c] = input_pixel[c] / 255.0f;
}
}
}
}
Mat ProcessOutputWithFloatModel(float* input) {
cv::Mat image = cv::Mat::zeros(wanted_input_height, wanted_input_width, CV_8UC3);
for (int y = 0; y < wanted_input_height; ++y) {
for (int x = 0; x < wanted_input_width; ++x) {
float* input_pixel = input + (y * wanted_input_width * wanted_input_channels) + (x * wanted_input_channels);
cv::Vec3b & color = image.at<cv::Vec3b>(cv::Point(x, y));
color[0] = (uchar) floor(input_pixel[0] * 255.0f);
color[1] = (uchar) floor(input_pixel[1] * 255.0f);
color[2] = (uchar) floor(input_pixel[2] * 255.0f);
}
}
return image;
}
I was learning filters in OpenCV, but I'm a little confused about the Laplacian filter. My result is very different from the Laplacian filter in OpenCV lib.
For first, I use a Gaussian filter for the image:
Mat filtroGauss(Mat src){
Mat gauss = src.clone();
Mat temp(src.rows+2,src.cols+2,DataType<uchar>::type);
int y,x;
for (y=0; y<src.rows; y++){
for (x=0; x<src.cols; x++) temp.at<uchar>(y+1,x+1) = src.at<uchar>(y,x);
}
int mask[lenMask*lenMask];
mask[0] = mask[2] = mask[6] = mask[8] = 1;
mask[1] = mask[3] = mask[5] = mask[7] = 2;
mask[4] = 4;
int denominatore = 0;
for (int i=0; i<lenMask*lenMask; i++) denominatore += mask[i];
int value[lenMask*lenMask];
for(y=0; y<src.rows; y++){
for (x=0; x<src.cols; x++){
value[0] = temp.at<uchar>(y-1,x-1)*mask[0];
value[1] = temp.at<uchar>(y-1,x)*mask[1];
value[2] = temp.at<uchar>(y-1,x+1)*mask[2];
value[3] = temp.at<uchar>(y,x-1)*mask[3];
value[4] = temp.at<uchar>(y,x)*mask[4];
value[5] = temp.at<uchar>(y,x+1)*mask[5];
value[6] = temp.at<uchar>(y+1,x-1)*mask[6];
value[7] = temp.at<uchar>(y+1,x)*mask[7];
value[8] = temp.at<uchar>(y+1,x+1)*mask[8];
int avg = 0;
for(int i=0; i<lenMask*lenMask; i++)avg+=value[i];
avg = avg/denominatore;
gauss.at<uchar>(y,x) = avg;
}
}
return gauss;
}
Then I use the Laplacian function:
L(y,x) = f(y-1,x) + f(y+1,x) + f(y,x-1) + f(y,x+1) + 4*f(y,x)
Mat filtroLaplace(Mat src){
Mat output = src.clone();
Mat temp = src.clone();
int y,x;
for (y =1; y<src.rows-1; y++){
for(x =1; x<src.cols-1; x++){
output.at<uchar>(y,x) = temp.at<uchar>(y-1,x) + temp.at<uchar>(y+1,x) + temp.at<uchar>(y,x-1) + temp.at<uchar>(y,x+1) -4*( temp.at<uchar>(y,x));
}
}
return output;
}
And here is the final result from my code:
OpenCV result:
Let's rewrite the function a little, so it's easier to discuss:
cv::Mat filtroLaplace(cv::Mat src)
{
cv::Mat output = src.clone();
for (int y = 1; y < src.rows - 1; y++) {
for (int x = 1; x < src.cols - 1; x++) {
int sum = src.at<uchar>(y - 1, x)
+ src.at<uchar>(y + 1, x)
+ src.at<uchar>(y, x - 1)
+ src.at<uchar>(y, x + 1)
- 4 * src.at<uchar>(y, x);
output.at<uchar>(y, x) = sum;
}
}
return output;
}
The source of your problem is sum. Let's examine its range in scope of this algorithm, by taking the two extremes:
Black pixel, surrounded by 4 white. That means 255 + 255 + 255 + 255 - 4 * 0 = 1020.
White pixel, surrounded by 4 black. That means 0 + 0 + 0 + 0 - 4 * 255 = -1020.
When you perform output.at<uchar>(y, x) = sum; there's an implicit cast of the int back to unsigned char -- the high order bits simply get chopped off and the value overflows.
The correct approach to handle this situation (which OpenCV takes), is to perform saturation before the actual cast. Essentially
if (sum < 0) {
sum = 0;
} else if (sum > 255) {
sum = 255;
}
OpenCV provides function cv::saturate_cast<T> to do just this.
There's an additional problem that you're not handling the edge rows/columns of the input image -- you just leave them at the original value. Since you're not asking about that, I'll leave solving that as an excercise to the reader.
Code:
cv::Mat filtroLaplace(cv::Mat src)
{
cv::Mat output = src.clone();
for (int y = 1; y < src.rows - 1; y++) {
for (int x = 1; x < src.cols - 1; x++) {
int sum = src.at<uchar>(y - 1, x)
+ src.at<uchar>(y + 1, x)
+ src.at<uchar>(y, x - 1)
+ src.at<uchar>(y, x + 1)
- 4 * src.at<uchar>(y, x);
output.at<uchar>(y, x) = cv::saturate_cast<uchar>(sum);
}
}
return output;
}
Sample input:
Output of corrected filtroLaplace:
Output of cv::Laplacian:
I want to find dominant N colors on the picture. For this purpose I decided to use KMeans algorithm. My project written on C, that is way I used cvKMeans2 algorithm. But it gives me very strange results. Then I decided to try kmeans algorithm on OpenCV C++. It gives me more accurate results. So, where is my fault? Could someone explain it to me?
1. I used this image for test.
2. Implementation on C.
#include <cv.h>
#include <highgui.h>
#define CLUSTERS 3
int main(int argc, char **argv) {
const char *filename = "test_12.jpg";
IplImage *tmp = cvLoadImage(filename);
if (!tmp) {
return -1;
}
IplImage *src = cvCloneImage(tmp);
cvCvtColor(tmp, src, CV_BGR2RGB);
CvMat *samples = cvCreateMat(src->height * src->width, 3, CV_32F);
for (int i = 0; i < samples->height; i++) {
samples->data.fl[i * 3 + 0] = (uchar) src->imageData[i * 3 + 0];
samples->data.fl[i * 3 + 1] = (uchar) src->imageData[i * 3 + 1];
samples->data.fl[i * 3 + 2] = (uchar) src->imageData[i * 3 + 2];
}
CvMat *labels = cvCreateMat(samples->height, 1, CV_32SC1);
CvMat *centers = cvCreateMat(CLUSTERS, 3, CV_32FC1);
int flags = 0;
int attempts = 5;
cvKMeans2(samples, CLUSTERS, labels,
cvTermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 10000, 0.005),
attempts, 0, flags, centers);
int rows = 40;
int cols = 300;
IplImage *des = cvCreateImage(cvSize(cols, rows), 8, 3);
int part = 4000;
int r = 0;
int u = 0;
for (int y = 0; y < 300; ++y) {
for (int x = 0; x < 40; ++x) {
if (u >= part) {
r++;
part = (r + 1) * part;
}
des->imageData[(300 * x + y) * 3 + 0] = static_cast<char>(centers->data.fl[r * 3 + 0]);
des->imageData[(300 * x + y) * 3 + 1] = static_cast<char>(centers->data.fl[r * 3 + 1]);
des->imageData[(300 * x + y) * 3 + 2] = static_cast<char>(centers->data.fl[r * 3 + 2]);
u++;
}
}
IplImage *dominant_colors = cvCloneImage(des);
cvCvtColor(des, dominant_colors, CV_BGR2RGB);
cvNamedWindow("dominant_colors", CV_WINDOW_AUTOSIZE);
cvShowImage("dominant_colors", dominant_colors);
cvWaitKey(0);
cvDestroyWindow("dominant_colors");
cvReleaseImage(&src);
cvReleaseImage(&des);
cvReleaseMat(&labels);
cvReleaseMat(&samples);
return 0;
}
3. Implementation on C++.
#include <cv.h>
#include <opencv/cv.hpp>
#define CLUSTERS 3
int main(int argc, char **argv) {
const cv::Mat &tmp = cv::imread("test_12.jpg");
cv::Mat src;
cv::cvtColor(tmp, src, CV_BGR2RGB);
cv::Mat samples(src.rows * src.cols, 3, CV_32F);
for (int y = 0; y < src.rows; y++)
for (int x = 0; x < src.cols; x++)
for (int z = 0; z < 3; z++)
samples.at<float>(y + x * src.rows, z) = src.at<cv::Vec3b>(y, x)[z];
int attempts = 5;
cv::Mat labels;
cv::Mat centers;
kmeans(samples, CLUSTERS, labels, cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 1000, 0.005),
attempts, cv::KMEANS_PP_CENTERS, centers);
cv::Mat colors(cv::Size(CLUSTERS * 100, 30), tmp.type());
int p = 100;
int cluster_id = 0;
for (int x = 0; x < CLUSTERS * 100; x++) {
for (int y = 0; y < 30; y++) {
if (x >= p) {
cluster_id++;
p = (cluster_id + 1) * 100;
}
colors.at<cv::Vec3b>(y, x)[0] = static_cast<uchar>(centers.at<float>(cluster_id, 0));
colors.at<cv::Vec3b>(y, x)[1] = static_cast<uchar>(centers.at<float>(cluster_id, 1));
colors.at<cv::Vec3b>(y, x)[2] = static_cast<uchar>(centers.at<float>(cluster_id, 2));
}
}
cv::Mat dominant_colors;
cv::cvtColor(colors, dominant_colors, CV_RGB2BGR);
cv::imshow("dominant_colors", dominant_colors);
cv::waitKey(0);
return 0;
}
4. Result of code on C.
5. Result of code on C++.
I found my mistake. It is related to IplImage's widthStep field. As I read here widthStep gets padded up to a multiple of 4 for performance reasons. If widthStep is equal to 30 it will padded up to 32.
int h = src->height;
int w = src->width;
int c = 3;
int delta = 0;
for (int i = 0, y = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
for (int k = 0; k < c; ++k, y++) {
samples->data.fl[i * w * c + c * j + k] = (uchar) src->imageData[delta + i * w * c + c * j + k];
}
}
delta += src->widthStep - src->width * src->nChannels;
}
With pointers
for (int x = 0, i = 0; x < src->height; ++x) {
auto *ptr = (uchar *) (src->imageData + x * src->widthStep);
for (int y = 0; y < src->width; ++y, i++) {
for (int j = 0; j < 3; ++j) {
samples->data.fl[i * 3 + j] = ptr[3 * y + j];
}
}
}
I'm trying to create this code in c++/ I'm using openCV but don't have to.
wi = size(Gr, 2);
he = size(Gr, 1);
cropFactor = 0.10;
[x, y] = meshgrid( round(cropFactor * wi):round( (1-cropFactor)*wi ), round(cropFactor * he):round((1-cropFactor)*he) );
xy = sub2ind(size(Gr), y(:), x(:));
here is what I have so far
int width = dst.cols;
int height = dst.rows;
double cropFactor = 0.10;
cv::Mat1i X,Y;
Utilities::Meshgrid(Utilities::MatlabRound(cropFactor * width), Utilities::MatlabRound((1 - cropFactor) * width), Utilities::MatlabRound(cropFactor * height), Utilities::MatlabRound((1-cropFactor) * height),X, Y);
Utilities::Sub2Ind(width, height, X, Y);
round() function
int Utilities::MatlabRound(double numberToRound)
{
return floor( numberToRound + 0.5);
}
this is my meshgrid() function it works as expected
void Utilities::Meshgrid(int startX, int endX, int startY, int endY, cv::Mat1i &X, cv::Mat1i & Y)
{
std::vector<int> vec_x, vec_y;
for (int i = startX; i <= endX; i++)
{
vec_x.push_back(i);
}
for (int i = startY; i <= endY; i++)
{
vec_y.push_back(i);
}
cv::Mat x = cv::Mat(vec_x);
cv::Mat y = cv::Mat(vec_y);
cv::repeat(x.reshape(1,1), y.total(), 1, X);
cv::repeat(y.reshape(1,1).t(), 1, x.total(), Y);
}
however i'm having trouble understanding what are subscripts and how to implement Sub2Ind function
Can you please explain?
update I have implemented sub2ind please see my answer
I have implemented sub2Ind for 2D matrix
it is tested and work fine
cv::Mat Utilities::Sub2Ind(int width, int height, cv::Mat X, cv::Mat Y)
{
/*sub2ind(size(a), rowsub, colsub)
sub2ind(size(a), 2 , 3 ) = 6
a = 1 2 3 ;
4 5 6
rowsub + colsub-1 * numberof rows in matrix*/
std::vector<int> index;
cv::transpose(Y,Y);
cv::MatConstIterator_<int> iterX = X.begin<int>(), it_endX = X.end<int>();
cv::MatConstIterator_<int> iterY = Y.begin<int>(), it_endY = Y.end<int>();
for (int j = 0; j < X.cols; ++j,++iterX)
{
//running on each col of y matrix
for (int i =0 ;i < Y.cols; ++i,++iterY )
{
int rowsub = *iterY;
int colsub = *iterX;
int res = rowsub + ((colsub-1)*height);
index.push_back(res);
}
int x = 5;
}
cv::Mat M(index) ;
return M;
}
Iterating through 1D array (pseudo 2D) with step of 3:
arr = new int[height * width * 3];
for (int i = 0; i < height * width * 3; i+=3) {
arr[i] = 1;
}
I have tried this, but what I got is column of one third:
for (int y = 0; y < height * 3; y++) {
for (int x = 0; x < width; x+=3) {
arr[x + width * y] = 1;
}
}
Assuming your cells have a 'size' of 3 entries, you should use the * 3 on the inner loop. Otherwise you miss 2 thirds of your cells on each row.
You also need to multiply width by 3 to get the correct row.
for (int y = 0; y < height; y++) {
for (int x = 0; x < width * 3; x+=3) {
arr[x + width * 3 * y] = 1;
}
}
In general you need the following structure for such situations:
for (int y = 0; y < height; y++) {
for (int x = 0; x < width * cellWidth; x+= cellWidth) {
arr[x + width * cellWidth * y] = 1;
}
}
(Were cellWidth is 3 in your case)
To slightly simplify this, you could assume in the loops that your cells have a width of 1 (like a normal situation) and multiply by cellWidth when actually assigning the values:
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int index = (x + width * y) * cellWidth;
arr[index + 0] = 1; // First 'cell entry'
arr[index + 1] = 1; // Second
...
arr[index + cellWidth - 1] = 1; // Last
}
}
Another solution is to create larger 'items' using a struct for example:
typedef struct { int r, int g, int b } t_rgb;
t_rgb* arr = new t_rgb[height * width];
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
arr[x + width * y].r = 1;
}
}
and you are able to use it as a regular array (the compiler does all calculations for you). This also makes it more clear what is happening in your code.
What are you trying to accomplish exactly? Setting a channel in a RGB image?
I usually do it like this:
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
arr[(x + width * y) * 3] = 1;
In general, to set RGB values, you can simply add an offset like this:
for (int y = 0; y < height; y++)
for (int x = 0; x < width; x++)
{
size_t base = (x + width * y) * 3;
arr[base + 0] = r;
arr[base + 1] = g;
arr[base + 2] = b;
}