Tensorflow add new op slicing the output tensor - c++

I checked the file here:
// get the corresponding Eigen tensors for data access
auto input_tensor = input.matrix<double>();
auto weights_tensor = weights.matrix<double>();
auto biases_tensor = biases.matrix<double>();
auto output_tensor = output->matrix<double>();
for (int ix_sample = 0; ix_sample < batch_samples; ix_sample++) {
for (int ix_unit = 0; ix_unit < units; ix_unit++) {
output_tensor(ix_sample, ix_unit) = 0;
for (int ix_input = 0; ix_input < input_feature_width; ix_input++) {
output_tensor(ix_sample, ix_unit) += input_tensor(ix_sample, ix_input) * weights_tensor(ix_input, ix_unit );
}
output_tensor(ix_sample, ix_unit) += biases_tensor(0, ix_unit);
}
}
And the one on Tensorflow tutorial:
// Create an output tensor
Tensor* output_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
&output_tensor));
auto output_flat = output_tensor->flat<int32>();
// Set all but the first element of the output tensor to 0.
const int N = input.size();
for (int i = 1; i < N; i++) {
output_flat(i) = 0;
}
I am wondering if the output tensor is a 3d tensor, how could I slice it and assign its value vector-wise.
I find the slice method for Eigen:
Eigen::Tensor<int, 2> a(4, 3);
a.setValues({{0, 100, 200}, {300, 400, 500},
{600, 700, 800}, {900, 1000, 1100}});
Eigen::array<int, 2> offsets = {1, 0};
Eigen::array<int, 2> extents = {2, 2};
Eigen::Tensor<int, 1> slice = a.slice(offsets, extents);
cout << "a" << endl << a << endl;
=>
a
0 100 200
300 400 500
600 700 800
900 1000 1100
cout << "slice" << endl << slice << endl;
=>
slice
300 400
600 700
But not clear how could I use it here.

Related

What is the risk of using struct dataContent decimate(struct dataContent)?

I have written the following code to retrieve the content from the audio file. This is just part of the full project. I just want to know Would there be any risk using struct dataContent decimate(struct dataContent)? If yes, what are these and how can I improve this code to reduce the risk?
struct dataContent
{
DoubleArrayPtr data;
DoubleArrayPtr memorydata;
int numberofvalues;
int datasize;
long int sizeoffile;
};
struct dataContent decimate(struct dataContent dataprocess)
{
int i = 0, j = 0, k = 0, l = 0, m = 0, n = 0, p = 0, q = 0, r = 0, s = 0, t = 0;
cout << "Total number of blocks is: " << dataprocess.datasize << endl;
size_t size = dataprocess.datasize;
vector<double> sum(size);
vector<double> mean(size); //The mean is the arithmetic average of a set of given numbers
vector<double> secondmoment(size); //
vector<double> fourthmoment(size);
vector<double> kurtosis(size);
sum[0] = 0.0;
secondmoment[0] = 0.0;
fourthmoment[0] = 0.0;
kurtosis[0] = 0.0;
// Finding statistical moments for the data: mean, second- and fourth-order moments, and kurtosis
for(j = 0 ; j < size ; ++j)
{
// Mean Value
for(i = k ; i < (k + BUFFER_SIZE) ; i++)
{
sum[j] = sum[j] + abs(dataprocess.memorydata[i]);
//sum[j] = sum[j] + dataprocess.memorydata[i];
}
mean[j] = sum[j] / BUFFER_SIZE;
cout << "The mean of the absolute value of data in block " << (j + 1) << " is: " << mean[j] << endl;
k = k + BUFFER_SIZE;
}
return dataprocess;
} // End of decimate()
Thanks for your time.
the obvious issue is that you have BUFFER_SIZE and stride along the data array in those size pieces with no concern about running off the end.
I assume DoubleArrayPtr is double *, why not vector<double> given that you use vector elsewhere.
Also dont do this
int i = 0, j = 0, k = 0, l = 0, m = 0, n = 0, p = 0, q = 0, r = 0, s = 0, t = 0;
create an initialize at the point of use
like this
for(int j = 0 ; j < size ; ++j)
in c++ structs are types so you can do this
dataContent decimate(dataContent dataprocess)
pass in a reference to the struct thos, at the moment you are copying it
dataContent decimate(dataContent &dataprocess)
all this needs to be in codereview really tho

Problem while running "mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite" on edge TPU using Tensorflow Lite in c++

const std::string model_path = "/home/pi/mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite";
std::unique_ptr<tflite::FlatBufferModel> model =
tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
//creating edge TPU
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context =
edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
// Build the interpreter
tflite::ops::builtin::BuiltinOpResolver resolver;
std::unique_ptr<tflite::Interpreter> interpreter;
resolver.AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
tflite::InterpreterBuilder(*model.get(), resolver)(&interpreter);
interpreter->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context.get());
interpreter->SetNumThreads(1);
if (edgetpu_context == nullptr)
{
cout<< "TPU cannot be found or opened!";
}
// Resize input tensors, if desired.
TfLiteTensor* output_locations = nullptr;
TfLiteTensor* output_classes = nullptr;
TfLiteTensor* num_detections = nullptr;
// TfLiteTensor* scores = nullptr;
auto cam = cv::VideoCapture(0);
// auto cam = cv::VideoCapture("../demo.mp4");
std::vector<std::string> labels;
auto file_name="/home/pi/labelmap.txt";
std::ifstream input( file_name );
for( std::string line; getline( input, line ); )
{
labels.push_back( line);
}
auto cam_width =cam.get(CV_CAP_PROP_FRAME_WIDTH);
auto cam_height = cam.get(CV_CAP_PROP_FRAME_HEIGHT);
while (true) {
cv::Mat image0;
auto success = cam.read(image0);
if (!success) {
std::cout << "cam fail" << std::endl;
break;
}
cv::Mat image;
resize(image0, image, Size(300,300));
interpreter->AllocateTensors();
auto input = interpreter->typed_tensor<float>(0);
//uchar* input = interpreter->typed_input_tensor<uchar>(0);
// feed input
auto image_height=image.rows;
auto image_width=image.cols;
auto image_channels=3;
int number_of_pixels = image_height * image_width * image_channels;
int base_index = 0;
// copy image to input as input tensor
//memcpy(interpreter->typed_input_tensor<uchar>(0), image.data, image.total() * image.elemSize());
//interpreter->SetAllowFp16PrecisionForFp32(true);
int count1=0;
for(int i=0;i<image.rows;i++){
for(int j=0;j<image.cols;j++)
{
cv::Vec3f pixel= image.at<cv::Vec3f>(i,j);
for(int k = 0; k < image.channels(); k++) {
input[count1] = pixel.val[k];
count1++;
}
}
}
interpreter->SetNumThreads(1);
interpreter->Invoke();
output_locations = interpreter->tensor(interpreter->outputs()[0]);
auto output_data = output_locations->data.f;
std::vector<float> locations;
std::vector<float> cls;
output_classes = interpreter->tensor(interpreter->outputs()[1]);
auto out_cls = output_classes->data.f;
num_detections = interpreter->tensor(interpreter->outputs()[3]);
auto nums = num_detections->data.f;
for (int i = 0; i < 20; i++){
auto output = output_data[i];
locations.push_back(output);
cls.push_back(out_cls[i]);
}
int count=0;
std::vector<Object> objects;
for(int j = 0; j <locations.size(); j+=4){
auto ymin=locations[j]*cam_height;
auto xmin=locations[j+1]*cam_width;
auto ymax=locations[j+2]*cam_height;
auto xmax=locations[j+3]*cam_width;
auto width= xmax - xmin;
auto height= ymax - ymin;
// auto rec = Rect(xmin, ymin, width, height);
float score = expit(nums[count]); // How has this to be done?
// std::cout << "score: "<< score << std::endl;
// if (score < 0.5f) continue;
// auto id=outputClasses;
Object object;
object.class_id = cls[count];
object.rec.x = xmin;
object.rec.y = ymin;
object.rec.width = width;
object.rec.height = height;
object.prob = score;
objects.push_back(object);
count+=1;
}
nms(objects,0.5);
RNG rng(12345);
std::cout << "size: "<<objects.size() << std::endl;
for(int l = 0; l < objects.size(); l++)
{
Object object = objects.at(l);
auto score=object.prob;
if (score < 0.60f) continue;
Scalar color = Scalar(rng.uniform(0,255), rng.uniform(0, 255), rng.uniform(0, 255));
auto cls = object.class_id;
cv::rectangle(image0, object.rec,color, 1);
cv::putText(image0, labels[cls+1], cv::Point(object.rec.x, object.rec.y - 5),
cv::FONT_HERSHEY_COMPLEX, .8, cv::Scalar(10, 255, 30));
std::cout<< cls<< std::endl;
}
cv::imshow("cam", image0);
auto k = cv::waitKey(30);
if (k != 255) {
break;
}
}
After running this code I am getting following error:
" ERROR: Internal: Unsupported data type in custom op handler: 0
ERROR: Node number 0 (edgetpu-custom-op) failed to prepare."
There's recently an update to libedgetpu.so runtime version 13 that would cause this. I would check to make sure that you are building with the latest version of libtensorflow-lite.a as well as linking to the newest version of libedgetpu.so

Check failed: 1 == NumElements() (1 vs. 1792)Must have a one element tensor in Tensorflow C++

In the Python code, the image data is assigned to tensor image_batch:
Part of the code:
image_data = misc.imread(image_path)
image_batch = graph.get_tensor_by_name("input:0")
phase_train_placeholder = graph.get_tensor_by_name("phase_train:0")
embeddings = graph.get_tensor_by_name("embeddings:0")
feed_dict = {image_batch: np.expand_dims(image_data, 0), phase_train_placeholder: False}
rep = sess.run(embeddings, feed_dict=feed_dict)
C++ code:
const float * source_data = (float*) image.data;
Tensor image_batch(DT_FLOAT, TensorShape({1, 160, 160, 3}));
auto input = image_batch.tensor<float, 4>();
for (int y = 0; y < height; ++y) {
const float* source_row = source_data + (y * width * depth);
for (int x = 0; x < width; ++x) {
const float* source_pixel = source_row + (x * depth);
for (int c = 0; c < depth; ++c) {
const float* source_value = source_pixel + c;
//std::cout << *source_value << std::endl;
input(0, y, x, c) = *source_value;
}
}
}
Tensor phase_train(DT_BOOL, TensorShape());
phase_train.scalar<bool>()() = false;
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
{ "input:0", image_batch },
{ "phase_train:0", phase_train },
};
std::vector<Tensor> outputs;
Status run_status = session->Run(inputs, {"embeddings:0"}, {}, &outputs);
if (!status.ok()) {
std::cout << status.ToString() << "\n";
return 1;
}
auto output_c = outputs[0].scalar<float>(); //Error here
std::cerr << "SHOW\n";
// Print the results
std::cout << outputs[0].DebugString() << "\n";
std::cout << output_c() << "\n"; // 30
Err:
F tensorflow/core/framework/tensor.cc:493] Check failed: 1 == NumElements() (1 vs. 1792)Must have a one element tensor
Process finished with exit code 6
What is the reason for err?
Before that, I made a stupid mistake and did not find the key error code.
auto output_c = outputs[0].scalar<float>();
replace:
auto output_c = outputs[0].flat<float>();
all the problems are solved.
https://github.com/tensorflow/tensorflow/issues/3362 (prompted me to check what code)
#Aziuth Thanks.

Understanding of planes in NAryMatIterator

I have 3-dimension matrix:
const int n_mat_size = 5;
const int n_mat_sz[] = { n_mat_size , n_mat_size, n_mat_size };
cv::Mat m1(3, n_mat_sz, CV_32FC1);
Now I'd like to iterate its planes and expect that it should be three two-dimensional matrices:
const cv::Mat* arrays[] = { &m1, 0 };
cv::Mat planes[3];
cv::NAryMatIterator it(arrays, planes);
std::cout << it.nplanes << ", " << it.planes[0].rows << ", " << it.planes[0].cols;
I expect to get output "3, 5, 5", but instead I get "1, 1, 125". Where is the slice of matrix?
Because the matrix m1 is continuous, there is only one plane (or slice).
Please refer to the documentation for NAryMatIterator:
It iterates through the slices (or planes), not the elements, where "slice" is a continuous part of the arrays.
For example, the matrix m2 in the following code is not continuous:
const int n_mat_size = 5;
const int n_mat_sz[] = { n_mat_size , n_mat_size, n_mat_size };
cv::Mat m1(3, n_mat_sz, CV_32FC1);
// Get plane 2 and 3 of m1
// and row 2, row 3 and row 4 of every selected plane
// m2 is not continuous
cv::Mat m2 = m1(cv::Range(2,4), cv::Range(2,5));
const cv::Mat* arrays[] = { &m2, 0 };
cv::Mat planes[3];
cv::NAryMatIterator it(arrays, planes);
std::cout << it.nplanes << ", " << it.planes[0].rows << ", " << it.planes[0].cols << std::end;
The output of the above code is: 2, 1, 15.
Note that the number of rows of every plane is always 1 and the number of columns is the number of elements contained in the plane.
There is a statement:
planes[i] = Mat(1, (int)size, A.type(), A.data);
in the function void NAryMatIterator::init, which can be found at https://github.com/opencv/opencv/blob/master/modules/core/src/matrix.cpp#L4596 .
The above statement sets the size of the plane.
To separate the matrix you gave into planes, you could use cv::InputArray::getMatVector.
The following code shows it usage.
int main()
{
const int n_mat_size = 3;
const int n_mat_sz[] = { n_mat_size , n_mat_size, n_mat_size };
cv::Mat m1(3, n_mat_sz, CV_8U);
cv::MatIterator_<uchar> it = m1.begin<uchar>();
cv::MatIterator_<uchar> end = m1.end<uchar>();
for (uchar i = 0; it != end; ++it, ++i)
{
*it = i;
}
cv::InputArray arr(m1);
std::vector<cv::Mat> planes;
arr.getMatVector(planes);
for (size_t i = 0; i < planes.size(); ++i)
{
std::cout << "-------" << std::endl
<< planes[i] << std::endl << "******" << std::endl;
}
}
Its output is as follows:
-------
[ 0, 1, 2;
3, 4, 5;
6, 7, 8]
******
-------
[ 9, 10, 11;
12, 13, 14;
15, 16, 17]
******
-------
[ 18, 19, 20;
21, 22, 23;
24, 25, 26]
******
Maybe the easiest one is to use the method cv::Mat::row(int). The corresponding code is:
int main()
{
const int n_mat_size = 3;
const int n_mat_sz[] = { n_mat_size , n_mat_size, n_mat_size };
cv::Mat m1(3, n_mat_sz, CV_8U);
cv::MatIterator_<uchar> it = m1.begin<uchar>();
cv::MatIterator_<uchar> end = m1.end<uchar>();
for (uchar i = 0; it != end; ++it, ++i)
{
*it = i;
}
int n = m1.size[0];
for (int i = 0; i < n; ++i)
{
cv::Mat three_d_plane = m1.row(i);
// three_d_plane has a size 1x3x3
// std::cout supports only 2-d matrix. Therefore, we change it to 2-d here
cv::Mat two_d_plane(three_d_plane.size[1], three_d_plane.size[2], three_d_plane.type(), three_d_plane.data);
std::cout << two_d_plane << std::endl << "----" << std::endl;
}
}
The output is
[ 0, 1, 2;
3, 4, 5;
6, 7, 8]
----
[ 9, 10, 11;
12, 13, 14;
15, 16, 17]
----
[ 18, 19, 20;
21, 22, 23;
24, 25, 26]
----

keep the signed value that has minimal absolute value in two matrix in OpenCV

In OpenCV, I have two matrix One and Two which are the same size. I want to find the signed value that has minimal absolute value in both matrix and keep it in matrix One. For this, I use following code:
for (int i = 0; i < One.rows; ++i)
{
p=One.ptr<float>(i);
p_two = Two.ptr<float>(i);
for (int j = 0; j < One.cols; ++j)
{
if(fabsf(p_two[j])<fabsf(p[j]))
p[j] = p_two[j];
}
}
This code seems to be the bottleneck in my program. Does anyone know how to improve the performance? Thanks a lot!
Your code is not the bottleneck of your program. It's indeed very fast. You need to profile your code to see where the actual bottleneck is.
You can optimize it a little in case your matrices are continuous (which is very often in practice), like:
int rows = one.rows;
int cols = one.cols;
if (one.isContinuous() && two.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int r = 0; r < rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
Here a small evaluation also against the good alternative method proposed by #s1h in the comments:
two.copyTo(one, abs(two) < abs(one));
Time (in ms)
Size: Yuanhao s1h Miki
[3 x 3] 0.000366543 0.117294 0.000366543
[10 x 10] 0.00109963 0.0157614 0.00109963
[100 x 100] 0.0964009 0.139653 0.112529
[1280 x 720] 8.70577 11.0267 8.65372
[1000 x 1000] 9.66538 13.5068 9.02026
[1920 x 1080] 16.5681 26.9706 15.7412
[4096 x 3112] 104.423 135.629 102.595
[5000 x 5000] 196.124 277.457 187.203
You see that your method is very fast. Mine is a little bit faster. #s1h is slower, but more concise and easy to read.
Code
You can evaulate the results on your PC with this:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace std;
using namespace cv;
int main()
{
vector<Size> sizes{ Size(3, 3), Size(10, 10), Size(100, 100), Size(1280, 720), Size(1000, 1000), Size(1920, 1080), Size(4096, 3112), Size(5000, 5000) };
cout << "Size: \t\tYuanhao \ts1h \t\tMiki" << endl;
for (int is = 0; is < sizes.size(); ++is)
{
Size sz = sizes[is];
cout << sz << "\t";
Mat1f img1(sz);
randu(img1, Scalar(-100), Scalar(100));
Mat1f img2(sz);
randu(img2, Scalar(-100), Scalar(100));
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
for (int r = 0; r < one.rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < one.cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
two.copyTo(one, abs(two) < abs(one));
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
{
Mat1f one = img1.clone();
Mat1f two = img2.clone();
double tic = double(getTickCount());
int rows = one.rows;
int cols = one.cols;
if (one.isContinuous() && two.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int r = 0; r < rows; ++r)
{
float* pone = one.ptr<float>(r);
float* ptwo = two.ptr<float>(r);
for (int c = 0; c < cols; ++c)
{
if (fabs(ptwo[c]) < fabs(pone[c]))
{
pone[c] = ptwo[c];
}
}
}
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
}
cout << endl;
}
getchar();
return 0;
}