I'm trying to use an "example" code for c++ on qt. In this example, there's a function "get_top_n" from tflite::label_image, in tensorflow/lite/examples/label_image/get_top_n.h. But, qt creator doesn't find the function.
Error: main.cpp:104 (and 107): erreur : no matching function for call to 'get_top_n'
What am I doing wrong here ?
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/examples/label_image/get_top_n.h"
#include "tensorflow/lite/model.h"
std::vector<std::string> load_labels(std::string labels_file)
{
std::ifstream file(labels_file.c_str());
if (!file.is_open())
{
fprintf(stderr, "unable to open label file\n");
exit(-1);
}
std::string label_str;
std::vector<std::string> labels;
while (std::getline(file, label_str))
{
if (label_str.size() > 0)
labels.push_back(label_str);
}
file.close();
return labels;
}
int main(int argc, char *argv[])
{
// Get Model label and input image
if (argc != 4)
{
fprintf(stderr, "TfliteClassification.exe modelfile labels image\n");
exit(-1);
}
const char *modelFileName = argv[1];
const char *labelFile = argv[2];
const char *imageFile = argv[3];
// Load Model
auto model = tflite::FlatBufferModel::BuildFromFile(modelFileName);
if (model == nullptr)
{
fprintf(stderr, "failed to load model\n");
exit(-1);
}
// Initiate Interpreter
std::unique_ptr<tflite::Interpreter> interpreter;
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if (interpreter == nullptr)
{
fprintf(stderr, "Failed to initiate the interpreter\n");
exit(-1);
}
if (interpreter->AllocateTensors() != kTfLiteOk)
{
fprintf(stderr, "Failed to allocate tensor\n");
exit(-1);
}
// Configure the interpreter
interpreter->SetAllowFp16PrecisionForFp32(true);
interpreter->SetNumThreads(1);
// Get Input Tensor Dimensions
int input = interpreter->inputs()[0];
auto height = interpreter->tensor(input)->dims->data[1];
auto width = interpreter->tensor(input)->dims->data[2];
auto channels = interpreter->tensor(input)->dims->data[3];
// Load Input Image
cv::Mat image;
auto frame = cv::imread(imageFile);
if (frame.empty())
{
fprintf(stderr, "Failed to load iamge\n");
exit(-1);
}
// Copy image to input tensor
cv::resize(frame, image, cv::Size(width, height), cv::INTER_NEAREST);
memcpy(interpreter->typed_input_tensor<unsigned char>(0), image.data, image.total() * image.elemSize());
// Inference
std::chrono::steady_clock::time_point start, end;
start = std::chrono::steady_clock::now();
interpreter->Invoke();
end = std::chrono::steady_clock::now();
auto inference_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
// Get Output
int output = interpreter->outputs()[0];
TfLiteIntArray *output_dims = interpreter->tensor(output)->dims;
auto output_size = output_dims->data[output_dims->size - 1];
std::vector<std::pair<float, int>> top_results;
float threshold = 0.01f;
switch (interpreter->tensor(output)->type)
{
case kTfLiteInt32:
tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
break;
case kTfLiteUInt8:
tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
break;
default:
fprintf(stderr, "cannot handle output type\n");
exit(-1);
}
// Print inference ms in input image
cv::putText(frame, "Infernce Time in ms: " + std::to_string(inference_time), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
// Load Labels
auto labels = load_labels(labelFile);
// Print labels with confidence in input image
for (const auto &result : top_results)
{
const float confidence = result.first;
const int index = result.second;
std::string output_txt = "Label :" + labels[index] + " Confidence : " + std::to_string(confidence);
cv::putText(frame, output_txt, cv::Point(10, 60), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
}
// Display image
cv::imshow("Output", frame);
cv::waitKey(0);
return 0;
}
The lines affected :
104: tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
107: tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
Content of tensorflow/lite/examples/label_image/get_top_n.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#include "tensorflow/lite/examples/label_image/get_top_n_impl.h"
namespace tflite {
namespace label_image {
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type);
// explicit instantiation so that we can use them otherwhere
template void get_top_n<float>(float*, int, size_t, float,
std::vector<std::pair<float, int>>*, TfLiteType);
template void get_top_n<int8_t>(int8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
Content of tensorflow/lite/examples/label_image/get_top_n_impl.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#include <algorithm>
#include <functional>
#include <queue>
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace label_image {
extern bool input_floating;
// Returns the top N confidence values over threshold in the provided vector,
// sorted by confidence in descending order.
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type) {
// Will contain top N results in ascending order.
std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
std::greater<std::pair<float, int>>>
top_result_pq;
const long count = prediction_size; // NOLINT(runtime/int)
float value = 0.0;
for (int i = 0; i < count; ++i) {
switch (input_type) {
case kTfLiteFloat32:
value = prediction[i];
break;
case kTfLiteInt8:
value = (prediction[i] + 128) / 256.0;
break;
case kTfLiteUInt8:
value = prediction[i] / 255.0;
break;
default:
break;
}
// Only add it if it beats the threshold and has a chance at being in
// the top N.
if (value < threshold) {
continue;
}
top_result_pq.push(std::pair<float, int>(value, i));
// If at capacity, kick the smallest value out.
if (top_result_pq.size() > num_results) {
top_result_pq.pop();
}
}
// Copy to output vector and reverse into descending order.
while (!top_result_pq.empty()) {
top_results->push_back(top_result_pq.top());
top_result_pq.pop();
}
std::reverse(top_results->begin(), top_results->end());
}
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
Github link
Related
I am trying to make a program that uses servos connected to an arduino that follows your face. I'm using visual c++ in visual studio 2017 with opencv 4.1 to do the facial recognition, then sending the location of the identified face to an arduino via serial connection.
I'm new to opencv so I've been going through many tutorials to try to make a code that works.
The plan is to do the facial recognition then calculate the location of the face in visual studio. Then combine the x and y locations to a single string and send it to the arduino with serial. The arduino then splits the coordinates, separated by a colon, using strtok(). It then will move servos accordingly to keep the tracked face in center screen.
I have tested the serial comm in a separate c++ project and it seems to work fine with the current arduino code (not anywhere near finished because I ran into a hiccup wiyh serial comm) except when I send the location string, the servo moves to the desired spot then returns to its starting location.
When I try to implement the serial communication in the project with opencv, it sends the location once, then appears to stop sending serial commands. I've tried debugging by manually, by calling the sendSerial function in other locations, to see if I can get it to send. I've tried looking around for solutions but haven't found any definite solutions other than it may be the waitKey(10) function. If this is so, is there a way around this?
Thanks.
###############SerialPort.h##############
#ifndef SERIALPORT_H
#define SERIALPORT_H
#define ARDUINO_WAIT_TIME 2000
#define MAX_DATA_LENGTH 255
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
class SerialPort
{
private:
HANDLE handler;
bool connected;
COMSTAT status;
DWORD errors;
public:
SerialPort(char *portName);
~SerialPort();
int readSerialPort(char *buffer, unsigned int buf_size);
bool writeSerialPort(char *buffer, unsigned int buf_size);
bool isConnected();
};
#endif // SERIALPORT_H
#################SerialSource.cpp##################
#include "SerialPort.h"
SerialPort::SerialPort(char *portName)
{
this->connected = false;
this->handler = CreateFileA(static_cast<LPCSTR>(portName),
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (this->handler == INVALID_HANDLE_VALUE) {
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
printf("ERROR: Handle was not attached. Reason: %s not available\n", portName);
}
else
{
printf("ERROR!!!");
}
}
else {
DCB dcbSerialParameters = { 0 };
if (!GetCommState(this->handler, &dcbSerialParameters)) {
printf("failed to get current serial parameters");
}
else {
dcbSerialParameters.BaudRate = CBR_9600;
dcbSerialParameters.ByteSize = 8;
dcbSerialParameters.StopBits = ONESTOPBIT;
dcbSerialParameters.Parity = NOPARITY;
dcbSerialParameters.fDtrControl = DTR_CONTROL_ENABLE;
if (!SetCommState(handler, &dcbSerialParameters))
{
printf("ALERT: could not set Serial port parameters\n");
}
else {
this->connected = true;
PurgeComm(this->handler, PURGE_RXCLEAR | PURGE_TXCLEAR);
Sleep(ARDUINO_WAIT_TIME);
}
}
}
}
SerialPort::~SerialPort()
{
if (this->connected) {
this->connected = false;
CloseHandle(this->handler);
}
}
int SerialPort::readSerialPort(char *buffer, unsigned int buf_size)
{
DWORD bytesRead;
unsigned int toRead = 0;
ClearCommError(this->handler, &this->errors, &this->status);
if (this->status.cbInQue > 0) {
if (this->status.cbInQue > buf_size) {
toRead = buf_size;
}
else toRead = this->status.cbInQue;
}
if (ReadFile(this->handler, buffer, toRead, &bytesRead, NULL)) return bytesRead;
return 0;
}
bool SerialPort::writeSerialPort(char *buffer, unsigned int buf_size)
{
DWORD bytesSend;
if (!WriteFile(this->handler, (void*)buffer, buf_size, &bytesSend, 0)) {
ClearCommError(this->handler, &this->errors, &this->status);
return false;
}
else return true;
}
bool SerialPort::isConnected()
{
return this->connected;
}
###################faceDetect.cpp################
// CPP program to detects face in a video
// Include required header files from OpenCV directory
#include <opencv2/objdetect.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <string>
#include <stdlib.h>
#include "SerialPort.h"
#include <sstream>
#include <iomanip>
using namespace std;
using namespace cv;
//Set up serial comm
char output[MAX_DATA_LENGTH];
char port[] = "\\\\.\\COM3";
char incoming[MAX_DATA_LENGTH];
// Function for Face Detection
void detectAndDraw(Mat& img, CascadeClassifier& cascade, double scale, SerialPort arduino);
string cascadeName;
// Function for sending locations to arduino
void sendSerial(string locations, SerialPort arduino);
int main(int argc, const char** argv)
{
//Establish connection to serial
SerialPort arduino(port);
if (arduino.isConnected()) {
cout << "COnnection Established" << endl;
}
else {
cout << "Error in port name" << endl;
}
// VideoCapture class for playing video for which faces to be detected
VideoCapture capture;
Mat frame, image;
// PreDefined trained XML classifiers with facial features
CascadeClassifier cascade;
double scale = 1;
// Change path before execution
cascade.load("C:/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml");
// Start Video..1) 0 for WebCam 2) "Path to Video" for a Local Video
capture.open(CAP_MSMF);
//sendSerial("400:100", arduino);
if (capture.isOpened())
{
// Capture frames from video and detect faces
cout << "Face Detection Started...." << endl;
while (1)
{
capture >> frame;
if (frame.empty())
break;
Mat frame1 = frame.clone();
detectAndDraw(frame1, cascade, scale, arduino);
char c = (char)waitKey(10);
// Press q to exit from window
if (c == 27 || c == 'q' || c == 'Q')
break;
}
}
else
cout << "Could not Open Camera";
return 0;
}
void sendSerial(string locations, SerialPort arduino) {
//string command;
//command = to_string(xloc);
cout << locations << endl;
char *charArray = new char[locations.size() + 1];
copy(locations.begin(), locations.end(), charArray);
charArray[locations.size()] = '\n';
arduino.writeSerialPort(charArray, MAX_DATA_LENGTH);
//arduino.readSerialPort(output, MAX_DATA_LENGTH);
//cout << output;
delete[] charArray;
//
//command = to_string(yloc);
//copy(command.begin(), command.end(), charArray);
//charArray[command.size()] = '\n';
//arduino.writeSerialPort(charArray, MAX_DATA_LENGTH);
////arduino.readSerialPort(output, MAX_DATA_LENGTH);
////cout << output;
//delete[] charArray;
}
void detectAndDraw(Mat& img, CascadeClassifier& cascade,
double scale, SerialPort arduino)
{
vector<Rect> faces;
Mat gray, smallImg;
cvtColor(img, gray, COLOR_BGR2GRAY); // Convert to Gray Scale
double fx = 1 / scale;
// Resize the Grayscale Image
resize(gray, smallImg, Size(), fx, fx, INTER_LINEAR);
equalizeHist(smallImg, smallImg);
// Detect faces of different sizes using cascade classifier
cascade.detectMultiScale(smallImg, faces, 1.1,
2, 0 | CASCADE_SCALE_IMAGE, Size(30, 30));
// Draw circles around the faces
for (size_t i = 0; i < faces.size(); i++)
{
Rect r = faces[i];
Mat smallImgROI;
int x = faces[i].x;
int y = faces[i].y;
int h = y + faces[i].height;
int w = x + faces[i].width;
int centerX = x + (.5* faces[i].width);
int centerY = y + (.5* faces[i].height);
if (abs(320 - centerX) <= 50) {
if (abs(240 - centerY) <= 50) {
rectangle(img,
Point(x, y),
Point(w, h),
Scalar(0, 0, 275),
2,
8,
0);
}
}
else {
rectangle(img,
Point(x, y),
Point(w, h),
Scalar(275, 275, 275),
2,
8,
0);
}
stringstream stringX;
stringstream stringY;
stringX << std::setw(3) << std::setfill('0') << centerX;
stringY << std::setw(3) << std::setfill('0') << centerY;
std::stringstream ss;
//ss << std::setw(3) << std::setfill('0') << centerX << ":"<< centerY;
//std::string s = ss.str();
std::string s = stringX.str() + ":" + stringY.str();
//cout << s << endl;
sendSerial(s, arduino);
smallImgROI = smallImg(r);
if (arduino.isConnected()) {
cout << "COnnection Established" << endl;
//sendSerial("400:100", arduino);
}
}
// Show Processed Image with detected faces
imshow("Face Detection", img);
}
#####################arduino code################
#include <Servo.h>
String input;
char array[6];
char *strings[3];
char *ptr = NULL;
int xloc;
int yloc;
int hServoPin = 9;
Servo hServo;
int ledPin = 13;
void setup() {
//set up servos
hServo.attach(hServoPin);
//start serial connection
Serial.begin(9600);
//***** delete later *****
pinMode(ledPin, OUTPUT);
}
void loop() {
if(Serial.available()){
//grab "xloc:yloc" and convert to char array
input = Serial.readStringUntil('\n');
//delete later
//Serial.print("input; ");
//Serial.println(input);
for(int i = 0; i<6; i++){
array[i] = input.charAt(i);
//Serial.print(array[i]);
}
//split char array into two entities
byte index = 0;
ptr = strtok(array, ":;"); // takes a list of delimiters
while(ptr != NULL)
{
strings[index] = ptr;
index++;
ptr = strtok(NULL, ":;"); // takes a list of delimiters
//Serial.println("loop");
}
//set xloc and yloc respectively
xloc = atoi(strings[0]);
yloc = atoi(strings[1]);
}
if((xloc < 214)){
hServo.write(0);
delay(100);
}
else if((xloc > 214) && (xloc < 328)){
hServo.write(90);
delay(100);
}
else if((xloc > 328)){
hServo.write(180);
delay(100);
}
}
I'd like to do an simple dnn example for getting known with mlpack.
The simple example is to train a dnn with the MNist Digits - the classical
ml-hello world case :-)
I managed to preparate all images using opencv-filters - the result is a
monochannel grayscale opencv::mat matrix.
I also managed to convert the pixle values to the armadillo matrix arma::mat
and to label this "pictures".
But somewhere in the past two operations, I did a mistake.
I got N examples and M Input Neurons
IN means "Input Neuron"
OL means "OutputLabel"
Ex means "Example"
The structure of my train-data is like that:
arma::mat TrainSet = {{IN_1/Ex_1,IN_/Ex_2,IN_1/Ex_3,...,IN_1/Ex_N},
{IN_2/Ex_1, IN_2/Ex_2, IN_2/Ex_3,...,IN_2/Ex_N},
{...},
{IN_M/Ex_1, IN_M/Ex_2, IN_M/Ex_3,...,IN_M/Ex_N}}`
arma::mat LabelSet = {OL_Ex_1, OL_Ex_2, ..., OL_Ex_N}
Training my network leeds to an error.
Error training artificial neural network!
Error details: Mat::operator(): index out of bounds
I'm quite sure, that the structure of my train- and labelstack is not correct.
Does anybody know, what I did wrong?
I tried to follow this example and to translate it to my case:
http://www.mlpack.org/docs/mlpack-git/doxygen/cnetutorial.html
Here is a picture of visual studio showing the structure of my trainset:
Here is a picture of visual studio showing the structure of my lables:
Thank you for any help.
Yours,
Jan
This is my code:
#pragma region Includings
#include <iostream>
#include <stdlib.h>
#include <exception>
#include <string>
#include "opencv2/opencv.hpp"
#include <mlpack\\core.hpp>
#include <mlpack/methods/ann/layer/layer.hpp>
#include <mlpack/methods/ann/ffn.hpp>
#include <mlpack/core/optimizers/cne/cne.hpp>
#pragma endregion
#pragma region Globals
std::string TrainFolder = "C:\\HomeC\\MNist\\MNist\\train-labels\\";
#pragma endregion
#pragma region Structs
typedef struct TInputPair {
double Value;
int Index;
};
typedef struct TDigitPairExample {
TInputPair* InputPairArray;
int nNonZero;
char OutputValue;
};
#pragma endregion
#pragma region Identifier
void DisplayImage(cv::Mat* Img, std::string Title = "CV::DefaultForm");
std::vector<TDigitPairExample> GenerateTrainingSet(std::string TrainFolder, int nExamplesPerClass, bool DisplayAtWindow = false);
void DisplayImage(cv::Mat* Img, std::string Title, int Delay = 0);
TInputPair* MatToArray(cv::Mat* img, int* nEntries);
int CharToOutputInt(char c);
void TransferDataToMLPack(std::vector<TDigitPairExample>* ExStack, arma::mat* DataStack, arma::mat* LabelStack, int nInput);
typedef uchar Pixel;
#pragma endregion
int main() {
#pragma region Get training examples from images
std::vector<TDigitPairExample> TrainExamples = GenerateTrainingSet(TrainFolder, 101);
#pragma endregion
#pragma region Convert training vector to armadillo matrix
arma::mat trainset, labels;
TransferDataToMLPack(&TrainExamples, &trainset, &labels, 784);
#pragma endregion
#pragma region Define network
mlpack::ann::FFN<mlpack::ann::NegativeLogLikelihood<> > network;
network.Add<mlpack::ann::Linear<> >(784, 784);
network.Add<mlpack::ann::SigmoidLayer<> >();
network.Add<mlpack::ann::Linear<> >(784, 10);
network.Add<mlpack::ann::LogSoftMax<> >();
#pragma endregion
#pragma region Train network
try {
network.Train(trainset, labels);
}catch (const std::exception& e) {
std::cout << "Error training artificial neural network!" << std::endl << "Error details: " << e.what() << std::endl;
}
#pragma endregion
std::cout << "Application finished. Press ENTER to exit..." << std::endl;
std::cin.get();
}
#pragma region Private_regions
void DisplayImage(cv::Mat* Img, std::string Title, int Delay) {
/***************/
/*Define window*/
/***************/
cv:cvNamedWindow(Title.c_str(), cv::WINDOW_AUTOSIZE);
cv::imshow(Title.c_str(), *Img);
cv::waitKey(Delay);
//cv::destroyWindow(Title.c_str());
return;
}
TInputPair* MatToArray(cv::Mat* img, int* nEntries) {
uchar* ptr = nullptr, *dptr = nullptr;
TInputPair* InPairArr = nullptr;
int j = 0;
if (img->isContinuous()) {
ptr = img->ptr<uchar>();
}else { return nullptr; }
InPairArr = (TInputPair*)malloc((img->cols) * (img->rows) * sizeof(TInputPair));
if (InPairArr == nullptr) { return nullptr; }
for (int i = 0; i < (img->rows)*(img->cols); i++) {
//std::cout << "Index_" + std::to_string(i) + "; " + std::to_string(ptr[i]) << std::endl;
if (ptr[i] != 255) { InPairArr[j].Index = i; InPairArr[j].Value = (double)(255 - ptr[i]) / 255.0; j++; }
}
InPairArr = (TInputPair*)realloc(InPairArr, j * sizeof(TInputPair));
*nEntries = j;
return InPairArr;
}
std::vector<TDigitPairExample> GenerateTrainingSet(std::string TrainFolder, int nExamplesPerClass, bool DisplayAtWindow) {
/********/
/*Localc*/
/********/
int nEntries = 0;
cv::Mat imgMod, imgGrad, imgInv, ptHull, imgHull, imgResize;
std::vector<std::vector<cv::Point>> contours;
std::vector<TDigitPairExample> TrainExamples;
TDigitPairExample TDPE;
for (int i = 1, j = 0;; i++) {
/**************/
/*Reading file*/
/**************/
cv::Mat imgOrig = cv::imread(TrainFolder + std::to_string(j) + "_" + std::to_string(i) + ".bmp", cv::IMREAD_GRAYSCALE);
if (imgOrig.empty() || i > 100) { j++; i = 1; if (j > 9) { break; } continue; }
/****************/
/*Build negative*/
/****************/
cv::subtract(cv::Scalar::all(255.0), imgOrig, imgMod);
/*****************/
/*Cut by treshold*/
/*****************/
cv::threshold(imgMod, imgMod, 230.0, 255.0, cv::THRESH_BINARY);
/**************/
/*Get contours*/
/**************/
//cv::findContours(imgMod, contours, cv::CHAIN_APPROX_NONE, 1);
//cv::Scalar color = cv::Scalar(255, 0, 0);
//cv::drawContours(imgMod, contours, -1, color, 1, 8);
//cv::Laplacian(imgOrig, imgGrad, 16, 1, 1.0, 0.0, cv::BORDER_REFLECT);
/********************/
/*Resize and display*/
/********************/
cv::resize(imgMod, imgResize, cv::Size(300, 300), .0, .0, cv::INTER_LINEAR);
TDPE.InputPairArray = MatToArray(&imgMod, &nEntries);
TDPE.nNonZero = nEntries;
TDPE.OutputValue = std::to_string(j).c_str()[0];
TrainExamples.push_back(TDPE);
if (DisplayAtWindow) { DisplayImage(&imgResize, std::string("After inversion"), 5); }
}
return TrainExamples;
}
int CharToOutputInt(char c) {
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
default: throw new std::exception();
}
}
void TransferDataToMLPack(std::vector<TDigitPairExample>* ExStack, arma::mat* DataStack, arma::mat* LabelStack, int nInput) {
*DataStack = arma::zeros(nInput, ExStack->size());
*LabelStack = arma::zeros(1, ExStack->size()); /*...edit...*/
TDigitPairExample DPE;
TInputPair TIP;
/*Looping all digit examples*/
for (int i = 0; i < ExStack->size(); i++) {
DPE = (*ExStack)[i];
/*Looping all nonZero pixle*/
for (int j = 0; j < DPE.nNonZero; j++) {
TIP = DPE.InputPairArray[j];
try {
(*DataStack)(TIP.Index, i) = TIP.Value;
}catch (std::exception& ex) {
std::cout << "Error adding example[" << std::to_string(j) << "] to training stack!" << std::endl <<
"Error details: " << ex.what() << std::endl;
}
}
/*Adding label*/
try {
(*LabelStack)(0, i) = CharToOutputInt(DPE.OutputValue); /*...edit...*/
}catch (std::exception& ex) {
std::cout << "Error adding example[" << std::to_string(i) << "] to label stack!" << std::endl <<
"Error details: " << ex.what() << std::endl;
}
}
return;
}
#pragma endregion
The NegativeLogLikelihood loss function expects the target to be in the range [1, N], so you would have to increase the return value of CharToOutputInt. In case you haven't seen it already there is an interesting example: mlpack - DigitRecognizerCNN which could be helpful as well.
Ok, thanks to Marcus, I found the error. A label "0" is not allowed. I simply changed the lables to:
1 ==> Zero
2 ==> One
3 ==> Two
...
10 => Nine
That worked.
WINDOWS 10, amd64
Built tensorflow GPU enabled C++ static libraries with CMAKE GUI + MSBUILD
Built successful.
LABEL_IMAGE tutorial example execution times :
... Main.cc execution : 9.17 secs
... Label_image.py execution (tensorflow) : 10.34 secs
... Label_image.py execution (tensorflow-gpu) : 1.62 secs
Any idea why ? Thanks a lot
Main.cc with minor customizations :
#define NOMINMAX
#include <fstream>
#include <utility>
#include <vector>
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
static Status ReadEntireFile(tensorflow::Env* env, const string& filename, Tensor* output) {
tensorflow::uint64 file_size = 0;
TF_RETURN_IF_ERROR(env->GetFileSize(filename, &file_size));
string contents;
contents.resize(file_size);
std::unique_ptr<tensorflow::RandomAccessFile> file;
TF_RETURN_IF_ERROR(env->NewRandomAccessFile(filename, &file));
tensorflow::StringPiece data;
TF_RETURN_IF_ERROR(file->Read(0, file_size, &data, &(contents)[0]));
if (data.size() != file_size) {
return tensorflow::errors::DataLoss("Truncated read of '", filename, "' expected ", file_size, " got ", data.size());
}
output->scalar<string>()() = data.ToString();
return Status::OK();
}
// Given an image file name, read in the data, try to decode it as an image,
// resize it to the requested size, and then scale the values as desired.
Status ReadTensorFromImageFile(const string file_name, const int input_height, const int input_width, const float input_mean, const float input_std, std::vector<Tensor>* out_tensors) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
string input_name = "file_reader";
string output_name = "dim";
// read file_name into a tensor named input
Tensor input(tensorflow::DT_STRING, tensorflow::TensorShape());
TF_RETURN_IF_ERROR(ReadEntireFile(tensorflow::Env::Default(), file_name, &input));
// use a placeholder to read input data
auto file_reader = Placeholder(root.WithOpName("input"), tensorflow::DataType::DT_STRING);
std::vector<std::pair<string, tensorflow::Tensor>> inputs = { { "input", input }, };
// Now try to figure out what kind of file it is and decode it.
const int wanted_channels = 3;
tensorflow::Output image_reader;
if (tensorflow::StringPiece(file_name).ends_with(".png")) {
image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels));
}
else if (tensorflow::StringPiece(file_name).ends_with(".gif")) {
// gif decoder returns 4-D tensor, remove the first dim
image_reader = Squeeze(root.WithOpName("squeeze_first_dim"), DecodeGif(root.WithOpName("gif_reader"), file_reader));
}
else if (tensorflow::StringPiece(file_name).ends_with(".bmp")) {
image_reader = DecodeBmp(root.WithOpName("bmp_reader"), file_reader);
}
else {
// Assume if it's neither a PNG nor a GIF then it must be a JPEG.
image_reader = DecodeJpeg(root.WithOpName("jpeg_reader"), file_reader, DecodeJpeg::Channels(wanted_channels));
}
// Now cast the image data to float so we can do normal math on it.
auto uint8_caster = Cast(root.WithOpName("uint8_caster"), image_reader, tensorflow::DT_UINT8);
// The convention for image ops in TensorFlow is that all images are expected
// to be in batches, so that they're four-dimensional arrays with indices of
// [batch, height, width, channel]. Because we only have a single image, we
// have to add a batch dimension of 1 to the start with ExpandDims().
auto dims_expander = ExpandDims(root.WithOpName(output_name), uint8_caster, 0);
// Bilinearly resize the image to fit the required dimensions.
//auto resized = ResizeBilinear(root, dims_expander,Const(root.WithOpName("size"), { input_height, input_width }));
// Subtract the mean and divide by the scale.
//Div(root.WithOpName(output_name), Sub(root, resized, { input_mean }),{ input_std });
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output tensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
tensorflow::SessionOptions options;
std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(options));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({ inputs }, { output_name }, {}, out_tensors));
return Status::OK();
}
// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(const string& graph_file_name, std::unique_ptr<tensorflow::Session>* session) {
tensorflow::GraphDef graph_def;
Status load_graph_status = ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
if (!load_graph_status.ok()) {return tensorflow::errors::NotFound("Failed to load compute graph at '",graph_file_name, "'");}
tensorflow::SessionOptions options;
session->reset(tensorflow::NewSession(options));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()) {return session_create_status; }
return Status::OK();
}
int main(int argc, char* argv[]) {
// These are the command-line flags the program can understand.
// They define where the graph and input data is located, and what kind of
// input the model expects. If you train your own model, or use something
// other than inception_v3, then you'll need to update these.
string image = "tensorflow/examples/label_image/data/grace_hopper.jpg";
string graph = "tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/frozen_inference_graph.pb";
string labels = "/tensorflow/tensorflow/examples/label_image/data/faster_rcnn_resnet101_coco_11_06_2017/graph.pbtxt";
int32 input_width = 299;
int32 input_height = 299;
float input_mean = 0;
float input_std = 255;
string input_layer = "image_tensor:0";
std::vector<string> output_layer = { "detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0" };
string o_layer = "detection_boxes:0, detection_scores : 0, detection_classes : 0, num_detections : 0"; //dummy for Flag structure
bool self_test = false;
string root_dir = "/tensorflow/";
std::vector<Flag> flag_list = {
Flag("image", &image, "image to be processed"),
Flag("graph", &graph, "graph to be executed"),
Flag("labels", &labels, "name of file containing labels"),
Flag("input_width", &input_width, "resize image to this width in pixels"),
Flag("input_height", &input_height,
"resize image to this height in pixels"),
Flag("input_mean", &input_mean, "scale pixel values to this mean"),
Flag("input_std", &input_std, "scale pixel values to this std deviation"),
Flag("input_layer", &input_layer, "name of input layer"),
Flag("output_layer", &o_layer, "name of output layer"),
Flag("self_test", &self_test, "run a self test"),
Flag("root_dir", &root_dir,
"interpret image and graph file names relative to this directory"),
};
string usage = tensorflow::Flags::Usage(argv[0], flag_list);
const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
if (!parse_result) {
LOG(ERROR) << usage;
return -1;
}
// We need to call this to set up global state for TensorFlow.
tensorflow::port::InitMain(argv[0], &argc, &argv);
if (argc > 1) {
LOG(ERROR) << "Unknown argument " << argv[1] << "\n" << usage;
return -1;
}
// First we load and initialize the model.
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, graph);
Status load_graph_status = LoadGraph(graph_path, &session);
if (!load_graph_status.ok()) {
LOG(ERROR) << load_graph_status;
return -1;
}
// Get the image from disk as a float array of numbers, resized and normalized
// to the specifications the main graph expects.
std::vector<Tensor> resized_tensors;
string image_path = tensorflow::io::JoinPath(root_dir, image);
//-------------------------------------
LOG(ERROR) << "Detection Basla....";
Status read_tensor_status = ReadTensorFromImageFile(image_path, input_height, input_width, input_mean, input_std, &resized_tensors);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
const Tensor resized_tensor = resized_tensors[0];
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status = session->Run({ { input_layer, resized_tensor } }, { output_layer }, {}, &outputs);
LOG(ERROR) << "Detection Bit......";
//-----------------------------------------
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
tensorflow::TTypes<float>::Flat scores = outputs[1].flat<float>();
tensorflow::TTypes<float>::Flat classes = outputs[2].flat<float>();
tensorflow::TTypes<float>::Flat num_detections = outputs[3].flat<float>();
auto boxes = outputs[0].flat_outer_dims<float, 3>();
LOG(ERROR) << "num_detections:" << num_detections(0) << "," << outputs[0].shape().DebugString();
for (size_t i = 0; i < num_detections(0) && i < 20; ++i)
{
if (scores(i) > 0.5)
{
LOG(ERROR) << i << ",score:" << scores(i) << ",class:" << classes(i) << ",box:" << "," << boxes(0, i, 0) << "," << boxes(0, i, 1) << "," << boxes(0, i, 2) << "," << boxes(0, i, 3);
}
}
return 0;
}
After successful built I ran the code and got a "_pywrap_tensorflow_internal.pyd not found" message.
I searched PC and found one in phython/tensorflow path.
I copied that one to execution path and everything was ok except gpu usage
Suddenly something whispered me ;
"Hey you immortal !! you should get recently generated
pywrap_tensorflow_internal.dll and rename it _pywrap_tensorflow_internal.pyd
and copy it to execution path.
GPU is being used
I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
Here are my test images and the source code:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE);
resize(image, temp, size, 0, 0, CV_INTER_AREA);
// convert to float 1-channel
temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);
return true;
}
vector<string> getFilesNamesInFolder(string folder)
{
vector<string> names;
char search_path[200];
sprintf(search_path, "%s/*.*", folder.c_str());
WIN32_FIND_DATA fd;
HANDLE hFind = ::FindFirstFile(search_path, &fd);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// read all (real) files in current folder
// , delete '!' read other 2 default folder . and ..
if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
names.push_back(fd.cFileName);
}
} while (::FindNextFile(hFind, &fd));
::FindClose(hFind);
}
return names;
}
class Sign {
public:
enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };
Mat image;
Category category;
int number;
Sign(Mat& image, string name) :image(image) {
category = static_cast<Category>(name.at(0));
number = stoi(name.substr(2, name.length()));
};
};
vector<Sign> loadSignsFromFolder(String folderName) {
vector<Sign> roadSigns;
for (string fileName : getFilesNamesInFolder(folderName)) {
Mat image;
loadImage(folderName + fileName, image);
roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
}
return roadSigns;
}
void showSignsInWindows(vector<Sign> roadSigns) {
for (Sign sign : roadSigns) {
String windowName = "Sign " + to_string(sign.number);
namedWindow(windowName, WINDOW_AUTOSIZE);
imshow(windowName, sign.image);
}
waitKey(0);
}
Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
Mat roadSignsImageData;
for (Sign sign : roadSigns) {
Mat signImageDataInOneRow = sign.image.reshape(0, 1);
roadSignsImageData.push_back(signImageDataInOneRow);
}
return roadSignsImageData;
}
Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
int signsCount = (int) roadSigns.size();
int signsVectorSize = signsCount + 1;
Mat roadSignsData(0, signsVectorSize, CV_32FC1);
int i = 1;
for (Sign sign : roadSigns) {
vector<float> outputTraningVector(signsVectorSize);
fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
outputTraningVector[i++] = 1.0;
Mat tempMatrix(outputTraningVector, false);
roadSignsData.push_back(tempMatrix.reshape(0, 1));
}
return roadSignsData;
}
int main(int argc, char* argv[])
{
if (argc != 2) {
cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
const int hiddenLayerSize = 500;
vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
//mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
,100 //(int) INT_MAX
,0.000001
);
mlp->setTermCriteria(termCrit);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
auto start = system_clock::now();
mlp->train(trainingData
//, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
, ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
);
auto duration = duration_cast<milliseconds> (system_clock::now() - start);
cout << "Training time: " << duration.count() << "ms" << endl;
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat result;
//mlp->predict(inputTrainingData.row(i), result);
mlp->predict(roadSigns[i].image.reshape(0, 1), result);
//cout << result << endl;
print(result, 2);
}
//showSignsInWindows(roadSigns);
return 0;
}
What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?
Thanks!
There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.
I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001); and the hidden layer size to 100 (to speed up the learning) - now it's working!
I have the following code which raises an error about line m_threads.push_back(boost::thread(boost::ref(*this)));. Could you help me the figure out what is wrong?
#include <algorithm>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "boost/scoped_ptr.hpp"
#include "boost/thread.hpp"
#include "boost/thread/barrier.hpp"
#include "boost/atomic.hpp"
// #include "gflags/gflags.h"
// #include "glog/logging.h"
// #include "caffe/proto/caffe.pb.h"
// #include "caffe/util/db.hpp"
// #include "caffe/util/io.hpp"
// #include "caffe/util/rng.hpp"
using namespace caffe; // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;
DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
"Randomly shuffle the order of images and their labels");
DEFINE_int32(threads, 1,
"Build DB using multiple threads.");
DEFINE_string(backend, "lmdb",
"The backend {lmdb, leveldb} for storing the result");
DEFINE_int32(resize_width, 0, "Width images are resized to");
DEFINE_int32(resize_height, 0, "Height images are resized to");
DEFINE_bool(check_size, false,
"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, false,
"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "",
"Optional: What type should we encode the image as ('png','jpg',...).");
class ThreadedReader
{
public:
typedef struct {
int line_id;
bool status;
Datum datum;
} data_entry;
ThreadedReader(int threads, std::vector<std::pair<std::string, int> >& lines,
const string& encode_type, const string& root_folder,
int resize_height, int resize_width, bool is_color) :
m_thread_count(threads), m_lines(lines),
m_barrier_read(threads), m_barrier_fetch(2),
m_id(0), m_thd_done(0),
m_done(false),
m_root_folder(root_folder),
m_encode_type(encode_type),
m_resize_height(resize_height), m_resize_width(resize_width),
m_is_color(is_color)
{
m_data.resize(m_thread_count);
for (int i = 0; i < threads; ++i)
m_threads.push_back(boost::thread(boost::ref(*this)));
}
~ThreadedReader() {
// for (int i = 0; i < m_thread_count; ++i)
// m_threads[i].join();
}
void operator()()
{
for (;;) {
int my_id = m_id.fetch_add(1, boost::memory_order_acq_rel);
const int my_base = my_id % m_thread_count;
m_barrier_read.count_down_and_wait();
m_data[my_base].status = false;
m_data[my_base].line_id = my_id;
if (my_id < m_lines.size()) {
m_data[my_base].line_id = my_id;
m_data[my_base].status = ReadImageToDatum(m_root_folder + m_lines[my_id].first,
m_lines[my_id].second, m_resize_height, m_resize_width, m_is_color,
m_encode_type, &m_data[my_base].datum);
}
if (m_thd_done.fetch_add(1, boost::memory_order_acq_rel) + 1 == m_thread_count) {
m_barrier_fetch.count_down_and_wait();
m_thd_done.store(0, boost::memory_order_release);
if (m_id.load(boost::memory_order_relaxed) >= m_lines.size())
m_done.store(true);
m_barrier_fetch.count_down_and_wait();
}
if (m_id.load(boost::memory_order_relaxed) >= m_lines.size())
break;
}
}
// bool fetch_batch(std::vector<data_entry>& vec)
// {
// using std::swap;
// if (m_done.load(boost::memory_order_acquire))
// return false;
// m_barrier_fetch.count_down_and_wait();
// swap(vec, m_data);
// if (m_data.size() < m_thread_count)
// m_data.resize(m_thread_count);
// m_barrier_fetch.count_down_and_wait();
// return true;
// }
private:
const int m_thread_count;
std::vector<std::pair<std::string, int> >& m_lines;
boost::barrier m_barrier_read;
boost::barrier m_barrier_fetch;
boost::atomic<int> m_id;
boost::atomic<int> m_thd_done;
boost::atomic<bool> m_done;
const std::string& m_root_folder;
const std::string& m_encode_type;
const int m_resize_height, m_resize_width;
const bool m_is_color;
std::vector<data_entry> m_data;
std::vector<boost::thread> m_threads;
};
int main(int argc, char** argv) {
return 0;
}
The errror:
passing ‘const boost::thread’ as ‘this’ argument of ‘boost::thread::operator boost::detail::thread_move_t<boost::thread>()’ discards qualifiers [-fpermissive]
make: *** [.build_release/tools/convert_imageset_threaded.o] Error 1
Your code compiles. It should just be a lot more reduced.
The error message indicates that in your real code you have a const/volatile qualified ThreadedReader, perhaps.
And you shouldn't mix signed/unsigned integers in comparison.
Live On Coliru
// This program converts a set of images to a lmdb/leveldb by storing them
// as Datum proto buffers.
// Usage:
// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME
//
// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
// should be a list of files as well as their labels, in the format as
// subfolder1/file1.JPEG 7
// ....
#include <algorithm>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "boost/scoped_ptr.hpp"
#include "boost/thread.hpp"
#include "boost/thread/barrier.hpp"
#include "boost/atomic.hpp"
// #include "gflags/gflags.h"
// #include "glog/logging.h"
// #include "caffe/proto/caffe.pb.h"
// #include "caffe/util/db.hpp"
// #include "caffe/util/io.hpp"
// #include "caffe/util/rng.hpp"
//using namespace caffe; // NOLINT(build/namespaces)
using std::pair;
using boost::scoped_ptr;
struct Datum {};
class ThreadedReader
{
public:
typedef struct {
int line_id;
bool status;
Datum datum;
} data_entry;
bool ReadImageToDatum(...) { return false; }
ThreadedReader(int threads, std::vector<std::pair<std::string, int> >& lines,
const std::string& encode_type, const std::string& root_folder,
int resize_height, int resize_width, bool is_color) :
m_thread_count(threads), m_lines(lines),
m_barrier_read(threads), m_barrier_fetch(2),
m_id(0), m_thd_done(0),
m_done(false),
m_root_folder(root_folder),
m_encode_type(encode_type),
m_resize_height(resize_height), m_resize_width(resize_width),
m_is_color(is_color)
{
m_data.resize(m_thread_count);
for (int i = 0; i < threads; ++i)
m_threads.push_back(boost::thread(boost::ref(*this)));
}
~ThreadedReader() {
// for (int i = 0; i < m_thread_count; ++i)
// m_threads[i].join();
}
void operator()()
{
for (;;) {
unsigned my_id = m_id.fetch_add(1, boost::memory_order_acq_rel);
const int my_base = my_id % m_thread_count;
m_barrier_read.count_down_and_wait();
m_data[my_base].status = false;
m_data[my_base].line_id = my_id;
if (my_id < m_lines.size()) {
m_data[my_base].line_id = my_id;
m_data[my_base].status = ReadImageToDatum(m_root_folder + m_lines[my_id].first,
m_lines[my_id].second, m_resize_height, m_resize_width, m_is_color,
m_encode_type, &m_data[my_base].datum);
}
if (m_thd_done.fetch_add(1, boost::memory_order_acq_rel) + 1 == m_thread_count) {
m_barrier_fetch.count_down_and_wait();
m_thd_done.store(0, boost::memory_order_release);
if (m_id.load(boost::memory_order_relaxed) >= m_lines.size())
m_done.store(true);
m_barrier_fetch.count_down_and_wait();
}
if (m_id.load(boost::memory_order_relaxed) >= m_lines.size())
break;
}
}
// bool fetch_batch(std::vector<data_entry>& vec)
// {
// using std::swap;
// if (m_done.load(boost::memory_order_acquire))
// return false;
// m_barrier_fetch.count_down_and_wait();
// swap(vec, m_data);
// if (m_data.size() < m_thread_count)
// m_data.resize(m_thread_count);
// m_barrier_fetch.count_down_and_wait();
// return true;
// }
private:
const int m_thread_count;
std::vector<std::pair<std::string, int> >& m_lines;
boost::barrier m_barrier_read;
boost::barrier m_barrier_fetch;
boost::atomic<unsigned> m_id;
boost::atomic<int> m_thd_done;
boost::atomic<bool> m_done;
const std::string& m_root_folder;
const std::string& m_encode_type;
const int m_resize_height, m_resize_width;
const bool m_is_color;
std::vector<data_entry> m_data;
std::vector<boost::thread> m_threads;
};
int main(int argc, char** argv) {
// ::google::InitGoogleLogging(argv[0]);
// #ifndef GFLAGS_GFLAGS_H_
// namespace gflags = google;
// #endif
// gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"
// "format used as input for Caffe.\n"
// "Usage:\n"
// " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"
// "The ImageNet dataset for the training demo is at\n"
// " http://www.image-net.org/download-images\n");
// gflags::ParseCommandLineFlags(&argc, &argv, true);
// if (argc < 4) {
// gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");
// return 1;
// }
// const bool is_color = !FLAGS_gray;
// const int threads = FLAGS_threads > 1 ? FLAGS_threads : 1;
// const bool check_size = FLAGS_check_size;
// const bool encoded = FLAGS_encoded;
// const string encode_type = FLAGS_encode_type;
// std::ifstream infile(argv[2]);
// std::vector<std::pair<std::string, int> > lines;
// std::string filename;
// int label;
// while (infile >> filename >> label) {
// lines.push_back(std::make_pair(filename, label));
// }
// if (FLAGS_shuffle) {
// // randomly shuffle data
// LOG(INFO) << "Shuffling data";
// shuffle(lines.begin(), lines.end());
// }
// LOG(INFO) << "A total of " << lines.size() << " images.";
// if (encode_type.size() && !encoded)
// LOG(INFO) << "encode_type specified, assuming encoded=true.";
// int resize_height = std::max<int>(0, FLAGS_resize_height);
// int resize_width = std::max<int>(0, FLAGS_resize_width);
// // Create new DB
// scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
// db->Open(argv[3], db::NEW);
// scoped_ptr<db::Transaction> txn(db->NewTransaction());
// // Storing to db
// std::string root_folder(argv[1]);
// int count = 0;
// const int kMaxKeyLength = 256;
// char key_cstr[kMaxKeyLength];
// int data_size = 0;
// bool data_size_initialized = false;
// if (threads > 1) {
// ThreadedReader rdr(threads, lines, encode_type, root_folder,
// resize_height, resize_width, is_color);
// std::vector<ThreadedReader::data_entry> vec;
// while (rdr.fetch_batch(vec)) {
// for (int i = 0; i < vec.size(); ++i) {
// if (!vec[i].status)
// continue;
// if (check_size) {
// if (!data_size_initialized) {
// data_size = vec[i].datum.channels() * vec[i].datum.height() * vec[i].datum.width();
// data_size_initialized = true;
// } else {
// const std::string& data = vec[i].datum.data();
// CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
// << data.size();
// }
// }
// // sequential
// int length = snprintf(key_cstr, kMaxKeyLength, "%08d_%s", vec[i].line_id,
// lines[vec[i].line_id].first.c_str());
// // Put in db
// string out;
// CHECK(vec[i].datum.SerializeToString(&out));
// txn->Put(string(key_cstr, length), out);
// if (++count % 1000 == 0) {
// // Commit db
// txn->Commit();
// txn.reset(db->NewTransaction());
// LOG(ERROR) << "Processed " << count << " files.";
// }
// }
// }
// } else {
// Datum datum;
// for (int line_id = 0; line_id < lines.size(); ++line_id) {
// bool status;
// std::string enc = encode_type;
// if (encoded && !enc.size()) {
// // Guess the encoding type from the file name
// string fn = lines[line_id].first;
// size_t p = fn.rfind('.');
// if (p == fn.npos)
// LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
// enc = fn.substr(p);
// std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
// }
// status = ReadImageToDatum(root_folder + lines[line_id].first,
// lines[line_id].second, resize_height, resize_width, is_color,
// enc, &datum);
// if (status == false) continue;
// if (check_size) {
// if (!data_size_initialized) {
// data_size = datum.channels() * datum.height() * datum.width();
// data_size_initialized = true;
// } else {
// const std::string& data = datum.data();
// CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
// << data.size();
// }
// }
// // sequential
// int length = snprintf(key_cstr, kMaxKeyLength, "%08d_%s", line_id,
// lines[line_id].first.c_str());
// // Put in db
// string out;
// CHECK(datum.SerializeToString(&out));
// txn->Put(string(key_cstr, length), out);
// if (++count % 1000 == 0) {
// // Commit db
// txn->Commit();
// txn.reset(db->NewTransaction());
// LOG(ERROR) << "Processed " << count << " files.";
// }
// }
// }
// // write the last batch
// if (count % 1000 != 0) {
// txn->Commit();
// LOG(ERROR) << "Processed " << count << " files.";
// }
return 0;
}