I am trying to make a program that uses servos connected to an arduino that follows your face. I'm using visual c++ in visual studio 2017 with opencv 4.1 to do the facial recognition, then sending the location of the identified face to an arduino via serial connection.
I'm new to opencv so I've been going through many tutorials to try to make a code that works.
The plan is to do the facial recognition then calculate the location of the face in visual studio. Then combine the x and y locations to a single string and send it to the arduino with serial. The arduino then splits the coordinates, separated by a colon, using strtok(). It then will move servos accordingly to keep the tracked face in center screen.
I have tested the serial comm in a separate c++ project and it seems to work fine with the current arduino code (not anywhere near finished because I ran into a hiccup wiyh serial comm) except when I send the location string, the servo moves to the desired spot then returns to its starting location.
When I try to implement the serial communication in the project with opencv, it sends the location once, then appears to stop sending serial commands. I've tried debugging by manually, by calling the sendSerial function in other locations, to see if I can get it to send. I've tried looking around for solutions but haven't found any definite solutions other than it may be the waitKey(10) function. If this is so, is there a way around this?
Thanks.
###############SerialPort.h##############
#ifndef SERIALPORT_H
#define SERIALPORT_H
#define ARDUINO_WAIT_TIME 2000
#define MAX_DATA_LENGTH 255
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
class SerialPort
{
private:
HANDLE handler;
bool connected;
COMSTAT status;
DWORD errors;
public:
SerialPort(char *portName);
~SerialPort();
int readSerialPort(char *buffer, unsigned int buf_size);
bool writeSerialPort(char *buffer, unsigned int buf_size);
bool isConnected();
};
#endif // SERIALPORT_H
#################SerialSource.cpp##################
#include "SerialPort.h"
SerialPort::SerialPort(char *portName)
{
this->connected = false;
this->handler = CreateFileA(static_cast<LPCSTR>(portName),
GENERIC_READ | GENERIC_WRITE,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (this->handler == INVALID_HANDLE_VALUE) {
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
printf("ERROR: Handle was not attached. Reason: %s not available\n", portName);
}
else
{
printf("ERROR!!!");
}
}
else {
DCB dcbSerialParameters = { 0 };
if (!GetCommState(this->handler, &dcbSerialParameters)) {
printf("failed to get current serial parameters");
}
else {
dcbSerialParameters.BaudRate = CBR_9600;
dcbSerialParameters.ByteSize = 8;
dcbSerialParameters.StopBits = ONESTOPBIT;
dcbSerialParameters.Parity = NOPARITY;
dcbSerialParameters.fDtrControl = DTR_CONTROL_ENABLE;
if (!SetCommState(handler, &dcbSerialParameters))
{
printf("ALERT: could not set Serial port parameters\n");
}
else {
this->connected = true;
PurgeComm(this->handler, PURGE_RXCLEAR | PURGE_TXCLEAR);
Sleep(ARDUINO_WAIT_TIME);
}
}
}
}
SerialPort::~SerialPort()
{
if (this->connected) {
this->connected = false;
CloseHandle(this->handler);
}
}
int SerialPort::readSerialPort(char *buffer, unsigned int buf_size)
{
DWORD bytesRead;
unsigned int toRead = 0;
ClearCommError(this->handler, &this->errors, &this->status);
if (this->status.cbInQue > 0) {
if (this->status.cbInQue > buf_size) {
toRead = buf_size;
}
else toRead = this->status.cbInQue;
}
if (ReadFile(this->handler, buffer, toRead, &bytesRead, NULL)) return bytesRead;
return 0;
}
bool SerialPort::writeSerialPort(char *buffer, unsigned int buf_size)
{
DWORD bytesSend;
if (!WriteFile(this->handler, (void*)buffer, buf_size, &bytesSend, 0)) {
ClearCommError(this->handler, &this->errors, &this->status);
return false;
}
else return true;
}
bool SerialPort::isConnected()
{
return this->connected;
}
###################faceDetect.cpp################
// CPP program to detects face in a video
// Include required header files from OpenCV directory
#include <opencv2/objdetect.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <string>
#include <stdlib.h>
#include "SerialPort.h"
#include <sstream>
#include <iomanip>
using namespace std;
using namespace cv;
//Set up serial comm
char output[MAX_DATA_LENGTH];
char port[] = "\\\\.\\COM3";
char incoming[MAX_DATA_LENGTH];
// Function for Face Detection
void detectAndDraw(Mat& img, CascadeClassifier& cascade, double scale, SerialPort arduino);
string cascadeName;
// Function for sending locations to arduino
void sendSerial(string locations, SerialPort arduino);
int main(int argc, const char** argv)
{
//Establish connection to serial
SerialPort arduino(port);
if (arduino.isConnected()) {
cout << "COnnection Established" << endl;
}
else {
cout << "Error in port name" << endl;
}
// VideoCapture class for playing video for which faces to be detected
VideoCapture capture;
Mat frame, image;
// PreDefined trained XML classifiers with facial features
CascadeClassifier cascade;
double scale = 1;
// Change path before execution
cascade.load("C:/opencv/sources/data/haarcascades/haarcascade_frontalface_default.xml");
// Start Video..1) 0 for WebCam 2) "Path to Video" for a Local Video
capture.open(CAP_MSMF);
//sendSerial("400:100", arduino);
if (capture.isOpened())
{
// Capture frames from video and detect faces
cout << "Face Detection Started...." << endl;
while (1)
{
capture >> frame;
if (frame.empty())
break;
Mat frame1 = frame.clone();
detectAndDraw(frame1, cascade, scale, arduino);
char c = (char)waitKey(10);
// Press q to exit from window
if (c == 27 || c == 'q' || c == 'Q')
break;
}
}
else
cout << "Could not Open Camera";
return 0;
}
void sendSerial(string locations, SerialPort arduino) {
//string command;
//command = to_string(xloc);
cout << locations << endl;
char *charArray = new char[locations.size() + 1];
copy(locations.begin(), locations.end(), charArray);
charArray[locations.size()] = '\n';
arduino.writeSerialPort(charArray, MAX_DATA_LENGTH);
//arduino.readSerialPort(output, MAX_DATA_LENGTH);
//cout << output;
delete[] charArray;
//
//command = to_string(yloc);
//copy(command.begin(), command.end(), charArray);
//charArray[command.size()] = '\n';
//arduino.writeSerialPort(charArray, MAX_DATA_LENGTH);
////arduino.readSerialPort(output, MAX_DATA_LENGTH);
////cout << output;
//delete[] charArray;
}
void detectAndDraw(Mat& img, CascadeClassifier& cascade,
double scale, SerialPort arduino)
{
vector<Rect> faces;
Mat gray, smallImg;
cvtColor(img, gray, COLOR_BGR2GRAY); // Convert to Gray Scale
double fx = 1 / scale;
// Resize the Grayscale Image
resize(gray, smallImg, Size(), fx, fx, INTER_LINEAR);
equalizeHist(smallImg, smallImg);
// Detect faces of different sizes using cascade classifier
cascade.detectMultiScale(smallImg, faces, 1.1,
2, 0 | CASCADE_SCALE_IMAGE, Size(30, 30));
// Draw circles around the faces
for (size_t i = 0; i < faces.size(); i++)
{
Rect r = faces[i];
Mat smallImgROI;
int x = faces[i].x;
int y = faces[i].y;
int h = y + faces[i].height;
int w = x + faces[i].width;
int centerX = x + (.5* faces[i].width);
int centerY = y + (.5* faces[i].height);
if (abs(320 - centerX) <= 50) {
if (abs(240 - centerY) <= 50) {
rectangle(img,
Point(x, y),
Point(w, h),
Scalar(0, 0, 275),
2,
8,
0);
}
}
else {
rectangle(img,
Point(x, y),
Point(w, h),
Scalar(275, 275, 275),
2,
8,
0);
}
stringstream stringX;
stringstream stringY;
stringX << std::setw(3) << std::setfill('0') << centerX;
stringY << std::setw(3) << std::setfill('0') << centerY;
std::stringstream ss;
//ss << std::setw(3) << std::setfill('0') << centerX << ":"<< centerY;
//std::string s = ss.str();
std::string s = stringX.str() + ":" + stringY.str();
//cout << s << endl;
sendSerial(s, arduino);
smallImgROI = smallImg(r);
if (arduino.isConnected()) {
cout << "COnnection Established" << endl;
//sendSerial("400:100", arduino);
}
}
// Show Processed Image with detected faces
imshow("Face Detection", img);
}
#####################arduino code################
#include <Servo.h>
String input;
char array[6];
char *strings[3];
char *ptr = NULL;
int xloc;
int yloc;
int hServoPin = 9;
Servo hServo;
int ledPin = 13;
void setup() {
//set up servos
hServo.attach(hServoPin);
//start serial connection
Serial.begin(9600);
//***** delete later *****
pinMode(ledPin, OUTPUT);
}
void loop() {
if(Serial.available()){
//grab "xloc:yloc" and convert to char array
input = Serial.readStringUntil('\n');
//delete later
//Serial.print("input; ");
//Serial.println(input);
for(int i = 0; i<6; i++){
array[i] = input.charAt(i);
//Serial.print(array[i]);
}
//split char array into two entities
byte index = 0;
ptr = strtok(array, ":;"); // takes a list of delimiters
while(ptr != NULL)
{
strings[index] = ptr;
index++;
ptr = strtok(NULL, ":;"); // takes a list of delimiters
//Serial.println("loop");
}
//set xloc and yloc respectively
xloc = atoi(strings[0]);
yloc = atoi(strings[1]);
}
if((xloc < 214)){
hServo.write(0);
delay(100);
}
else if((xloc > 214) && (xloc < 328)){
hServo.write(90);
delay(100);
}
else if((xloc > 328)){
hServo.write(180);
delay(100);
}
}
Related
main
#include "stdafx.h"
#include <iostream>
#include "SerialPort.h"
#include <WinBase.h>
using namespace std;
int main()
{
cout << "Monitoring System \n";
cout << "Please select a connection method\n\n";
cout << "1.Serial 2.TCP \n";
int num;
while (!(cin >> num))
{
cin.clear();
cin.ignore(INT_MAX, '\n');
cout << "Please enter a number only ";
}
if(num==1)
{
int i,SN;
int k=0;
cout << "Scan all ports that are currently...\n";
for (i = 1; i < 30; ++i)
{
if (COM_exists(i))
{
cout <<++k<< ".COM" << i << "\n";
}
}
cout << "\nselect a port to connect.\n";
scanf_s("%d", &SN, sizeof SN);
if (SN == 1)
{
SerialPort PortOpen("COM", CBR_115200, 8, "#or", 24);
PortOpen.getData();
PortOpen.getData();
cout << PortOpen.SerialBuffer << endl;
return 0;
}
}
}
serial cpp
#include "stdafx.h"
#include <string.h>
#include "SerialPort.h"
using namespace std;
BOOL COM_exists(int port)
{
char buffer[7];
COMMCONFIG CommConfig;
DWORD size;
if (!(1 <= port && port <= 30))
{
return FALSE;
}
snprintf(buffer, sizeof buffer, "COM%d", port);
size = sizeof CommConfig;
// COM port exists if GetDefaultCommConfig returns TRUE
// or changes <size> to indicate COMMCONFIG buffer too small.
return (GetDefaultCommConfig(buffer, &CommConfig, &size)
|| size > sizeof CommConfig);
}
SerialPort::SerialPort(LPCSTR COM,int setBaudRate, int setByteSize, char* commandBuffer, int BufSize) {
this->BufSize = BufSize;
this->SerialBuffer = new char[BufSize];
this->NoBytesRead = BufSize;
Connect(COM, setBaudRate, setByteSize, commandBuffer);
}
void SerialPort::Connect(LPCSTR COM ,int setBaudRate, int setByteSize, char* commandBuffer) {
this->COMport = COM;
this->hComm = CreateFile(COM, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); // (port name, read/write , no sharing , no security, open existing port only, non overlapped i/o , null for comm devices)
if (this->hComm == INVALID_HANDLE_VALUE)
cout << "PortOpen Fail." << endl;
else {
cout << "PortOpen Success." << endl;
sendCommand(setBaudRate, setByteSize, commandBuffer);
}
}
void SerialPort::sendCommand(int setBaudRate, int setByteSize, char* commandBuffer) {
this->commandBuffer = commandBuffer;
this->dNoOFBytestoWrite = sizeof(commandBuffer);
this->Status = WriteFile(hComm, commandBuffer, dNoOFBytestoWrite, &dNoOfBytesWritten, NULL);
DCB dcbSerialParams = { 0 };
dcbSerialParams.DCBlength = sizeof(dcbSerialParams);
dcbSerialParams.BaudRate = setBaudRate;
dcbSerialParams.ByteSize = setByteSize;
dcbSerialParams.StopBits = ONESTOPBIT;
dcbSerialParams.Parity = NOPARITY;
COMMTIMEOUTS timeouts = { 0 };
timeouts.ReadIntervalTimeout = 10;
timeouts.ReadTotalTimeoutConstant = 10;
timeouts.ReadTotalTimeoutMultiplier = 10;
timeouts.WriteTotalTimeoutConstant = 50;
timeouts.WriteTotalTimeoutMultiplier = 10;
if (this->Status)
cout << "Command Successful" << endl;
else
{
cout << "fail to Command" << endl;
}
}
void SerialPort::getData() {
ReadFile(this->hComm, &*this->SerialBuffer, this->BufSize, &this->NoBytesRead, NULL);
}
SerialPort.h
#pragma once
#include <windows.h>
#include <iostream>
#include <stdlib.h>
#include <WinBase.h>
using namespace std;
class SerialPort {
public:
HANDLE hComm;
LPCSTR COMport;
DWORD dNoOFBytestoWrite;
DWORD dNoOfBytesWritten;
int BufSize;
int Status;
char* SerialBuffer;
char* commandBuffer;
DWORD NoBytesRead;
int port;
SerialPort(LPCSTR COMport,int setBaudRate, int setByteSize, char* commandBuffer, int BufSize);
void Connect(LPCSTR COMport, int setBaudRate, int setByteSize, char* commandBuffer);
void sendCommand(int setBaudRate, int setByteSize, char* commandBuffer);
void getData();
};
BOOL COM_exists(int port);
I am writing a code to connect by selecting a serial port.
example, 1.COM1 / 2.COM3 / 3.COM4
How do I modify the main to connect COM4 that matches No. 3?
How do I automatically read and use matching ports to match numbers?
I am writing a code to connect by selecting a serial port.
example, 1.COM1 / 2.COM3 / 3.COM4
How do I modify the main to connect COM4 that matches No. 3?
How do I automatically read and use matching ports to match numbers?
I would personally never ever use the old school style open and close COM port to detect if it exists.
Assuming you are running on Windows I would use the SetupDi to find the serial port without opening it.
A little snippet to adapt:
#include <initguid.h>
#include <windows.h> // Data Type
#include <setupapi.h> // ::SetupDi*********
#include <devguid.h> // Device
// Global Variables
HDEVINFO m_hDevInfo; //!< Reference to device information set
SP_DEVINFO_DATA m_spDevInfoData; //!< Device information structure (references a device instance that is a member of a device information set)
BOOL m_bDevInfo; //!< Tested to ensure EnumDeviceInfo has been called
short m_MemberIndex = -1; //!< Preserves state between EnumDeviceInfo calls
bool WIN_EnumDeviceInfo ()
{
m_MemberIndex++;
m_spDevInfoData.cbSize = sizeof(SP_DEVINFO_DATA);
m_bDevInfo = ::SetupDiEnumDeviceInfo(m_hDevInfo, m_MemberIndex, &m_spDevInfoData);
return m_bDevInfo;
}
bool WIN_GetDeviceRegistryProperty(DWORD Property, PBYTE PropertyBuffer)
{
BOOL bGotRegProp = ::SetupDiGetDeviceRegistryProperty(m_hDevInfo, &m_spDevInfoData,
Property,
0L,
PropertyBuffer,
2048,
0);
return bGotRegProp;
}
void ScanSerial()
{
WORD Flags;
PCWSTR Enumerator=0;
const GUID *ClassGuid;
HWND m_hWnd;
ClassGuid = &GUID_DEVCLASS_PORTS;
Flags = DIGCF_PROFILE;
m_hDevInfo = SetupDiGetClassDevs(ClassGuid, Enumerator, m_hWnd, Flags);
while(WIN_EnumDeviceInfo())
{
wchar_t szBuf[MAX_PATH] = {0};
if(WIN_GetDeviceRegistryProperty(SPDRP_CLASS, (PBYTE)szBuf))
{
wchar_t szFriendlyName[MAX_PATH] = {0};
WIN_GetDeviceRegistryProperty (SPDRP_FRIENDLYNAME, (PBYTE)szFriendlyName);
}
}
}
From the szFriendlyName you can filter the COMx number without opening it or either confusing them...
Try and enjoy...
I'm trying to use an "example" code for c++ on qt. In this example, there's a function "get_top_n" from tflite::label_image, in tensorflow/lite/examples/label_image/get_top_n.h. But, qt creator doesn't find the function.
Error: main.cpp:104 (and 107): erreur : no matching function for call to 'get_top_n'
What am I doing wrong here ?
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/examples/label_image/get_top_n.h"
#include "tensorflow/lite/model.h"
std::vector<std::string> load_labels(std::string labels_file)
{
std::ifstream file(labels_file.c_str());
if (!file.is_open())
{
fprintf(stderr, "unable to open label file\n");
exit(-1);
}
std::string label_str;
std::vector<std::string> labels;
while (std::getline(file, label_str))
{
if (label_str.size() > 0)
labels.push_back(label_str);
}
file.close();
return labels;
}
int main(int argc, char *argv[])
{
// Get Model label and input image
if (argc != 4)
{
fprintf(stderr, "TfliteClassification.exe modelfile labels image\n");
exit(-1);
}
const char *modelFileName = argv[1];
const char *labelFile = argv[2];
const char *imageFile = argv[3];
// Load Model
auto model = tflite::FlatBufferModel::BuildFromFile(modelFileName);
if (model == nullptr)
{
fprintf(stderr, "failed to load model\n");
exit(-1);
}
// Initiate Interpreter
std::unique_ptr<tflite::Interpreter> interpreter;
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if (interpreter == nullptr)
{
fprintf(stderr, "Failed to initiate the interpreter\n");
exit(-1);
}
if (interpreter->AllocateTensors() != kTfLiteOk)
{
fprintf(stderr, "Failed to allocate tensor\n");
exit(-1);
}
// Configure the interpreter
interpreter->SetAllowFp16PrecisionForFp32(true);
interpreter->SetNumThreads(1);
// Get Input Tensor Dimensions
int input = interpreter->inputs()[0];
auto height = interpreter->tensor(input)->dims->data[1];
auto width = interpreter->tensor(input)->dims->data[2];
auto channels = interpreter->tensor(input)->dims->data[3];
// Load Input Image
cv::Mat image;
auto frame = cv::imread(imageFile);
if (frame.empty())
{
fprintf(stderr, "Failed to load iamge\n");
exit(-1);
}
// Copy image to input tensor
cv::resize(frame, image, cv::Size(width, height), cv::INTER_NEAREST);
memcpy(interpreter->typed_input_tensor<unsigned char>(0), image.data, image.total() * image.elemSize());
// Inference
std::chrono::steady_clock::time_point start, end;
start = std::chrono::steady_clock::now();
interpreter->Invoke();
end = std::chrono::steady_clock::now();
auto inference_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
// Get Output
int output = interpreter->outputs()[0];
TfLiteIntArray *output_dims = interpreter->tensor(output)->dims;
auto output_size = output_dims->data[output_dims->size - 1];
std::vector<std::pair<float, int>> top_results;
float threshold = 0.01f;
switch (interpreter->tensor(output)->type)
{
case kTfLiteInt32:
tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
break;
case kTfLiteUInt8:
tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
break;
default:
fprintf(stderr, "cannot handle output type\n");
exit(-1);
}
// Print inference ms in input image
cv::putText(frame, "Infernce Time in ms: " + std::to_string(inference_time), cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
// Load Labels
auto labels = load_labels(labelFile);
// Print labels with confidence in input image
for (const auto &result : top_results)
{
const float confidence = result.first;
const int index = result.second;
std::string output_txt = "Label :" + labels[index] + " Confidence : " + std::to_string(confidence);
cv::putText(frame, output_txt, cv::Point(10, 60), cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 255), 2);
}
// Display image
cv::imshow("Output", frame);
cv::waitKey(0);
return 0;
}
The lines affected :
104: tflite::label_image::get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size, 1, threshold, &top_results, kTfLiteFloat32);
107: tflite::label_image::get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0), output_size, 1, threshold, &top_results, kTfLiteUInt8);
Content of tensorflow/lite/examples/label_image/get_top_n.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
#include "tensorflow/lite/examples/label_image/get_top_n_impl.h"
namespace tflite {
namespace label_image {
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type);
// explicit instantiation so that we can use them otherwhere
template void get_top_n<float>(float*, int, size_t, float,
std::vector<std::pair<float, int>>*, TfLiteType);
template void get_top_n<int8_t>(int8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
std::vector<std::pair<float, int>>*,
TfLiteType);
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_H_
Content of tensorflow/lite/examples/label_image/get_top_n_impl.h:
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. ...*/
#ifndef TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
#include <algorithm>
#include <functional>
#include <queue>
#include "tensorflow/lite/c/common.h"
namespace tflite {
namespace label_image {
extern bool input_floating;
// Returns the top N confidence values over threshold in the provided vector,
// sorted by confidence in descending order.
template <class T>
void get_top_n(T* prediction, int prediction_size, size_t num_results,
float threshold, std::vector<std::pair<float, int>>* top_results,
TfLiteType input_type) {
// Will contain top N results in ascending order.
std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
std::greater<std::pair<float, int>>>
top_result_pq;
const long count = prediction_size; // NOLINT(runtime/int)
float value = 0.0;
for (int i = 0; i < count; ++i) {
switch (input_type) {
case kTfLiteFloat32:
value = prediction[i];
break;
case kTfLiteInt8:
value = (prediction[i] + 128) / 256.0;
break;
case kTfLiteUInt8:
value = prediction[i] / 255.0;
break;
default:
break;
}
// Only add it if it beats the threshold and has a chance at being in
// the top N.
if (value < threshold) {
continue;
}
top_result_pq.push(std::pair<float, int>(value, i));
// If at capacity, kick the smallest value out.
if (top_result_pq.size() > num_results) {
top_result_pq.pop();
}
}
// Copy to output vector and reverse into descending order.
while (!top_result_pq.empty()) {
top_results->push_back(top_result_pq.top());
top_result_pq.pop();
}
std::reverse(top_results->begin(), top_results->end());
}
} // namespace label_image
} // namespace tflite
#endif // TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_GET_TOP_N_IMPL_H_
Github link
I made a code in c++ to take a snapshot with a Ueye camera, however the picture that gets saved is just white, in saying that sometimes to can see a tiny bit of the road but still just white. I believe it’s an issue with an auto parameter but I feel as if I have tried everything.
Below is my code:
void MainWindow::CaptureImage(){
int initcamera = is_InitCamera(&hCam, hWndDisplay);
if(initcamera != IS_SUCCESS)
{
cout<<endl<<"Failed to initialize the camera"<<endl;
exit(-1);
}else{
cout<<endl<<"Initialized Camera"<<endl;
}
int camerainfo = is_GetCameraInfo (hCam, &camera_info);
if(camerainfo != IS_SUCCESS)
{
cout<<endl<<"Unable to acquire camera information"<<endl;
exit(-1);
}else{
cout<<endl<<"Camera information required"<<endl;
}
int sensorinfo = is_GetSensorInfo (hCam, &sInfo);
if(sensorinfo != IS_SUCCESS)
{
cout<<endl<<"Unable to acquire sensor information"<<endl;
exit(-1);
}else{
cout<<endl<<"Sensor information acquired"<<endl;
}
int colormode = is_SetColorMode(hCam, IS_CM_BGR8_PACKED);
if(colormode != IS_SUCCESS)
{
cout<<endl<<"Unable to set the color mode"<<endl;
exit(-1);
}else{
cout<<endl<<"Color mode set"<<endl;
}
int pXPos = (sInfo.nMaxWidth);
int pYPos = (sInfo.nMaxHeight);
int rit = is_AllocImageMem (hCam,pXPos,pYPos, 24, &m_pcImageMemory, &m_lMemoryId);
if(rit != IS_SUCCESS)
{
cout<<endl<<"UNABLE TO INITIALIZE MEMORY"<<endl;
system("PAUSE");
exit(-1);
}else{
cout<<endl<<"INITIALIZED MEMORY"<<endl;
}
int rat = is_SetImageMem (hCam, m_pcImageMemory, m_lMemoryId);
if(rat != IS_SUCCESS)
{
cout<<endl<<"UNABLE TO ACTIVATE MEMORY"<<endl;
system("PAUSE");
exit(-1);
}else{
cout<<endl<<"ACTIVATE MEMORY"<<endl;
}
double strenght_factor = 1.0;
int colorcorrection = is_SetColorCorrection(hCam, IS_CCOR_ENABLE, &strenght_factor);
double pval = 1;
int whiteb = is_SetAutoParameter(hCam, IS_SET_ENABLE_AUTO_WHITEBALANCE, &pval, 0);
double gval = 1;
int gains = is_SetAutoParameter(hCam, IS_SET_ENABLE_AUTO_GAIN, &gval, 0);
int dummy;
char *pMem, *pLast;
IMAGE_FILE_PARAMS ImageFileParams;
ImageFileParams.pwchFileName = L"./TestImage.bmp"; /// <-- Insert name and location of the image
ImageFileParams.pnImageID = NULL;
ImageFileParams.ppcImageMem = NULL;
ImageFileParams.nQuality = 0;
ImageFileParams.nFileType = IS_IMG_BMP;
int sho = is_FreezeVideo(hCam, IS_WAIT);
if(sho != IS_SUCCESS)
{
cout<<endl<<"UNABLE TO ACQUIRE FROM THE CAMERA"<<endl;
system("PAUSE");
exit(-1);
}
if (sho == IS_SUCCESS){
int m_Ret = is_GetActiveImageMem(hCam, &pLast, &dummy);
int n_Ret = is_GetImageMem(hCam, (void**)&pLast);
}
if (is_ImageFile(hCam, IS_IMAGE_FILE_CMD_SAVE, (void*)&ImageFileParams, sizeof(ImageFileParams)) == IS_SUCCESS)
{
cout << endl << "An Image was saved" << endl;
}
else
{
cout << endl << "something went wrong" << endl;
}
// Releases an image memory that was allocated
//is_FreeImageMem(hCam, pcImageMemory, nMemoryId);
// Disables the hCam camera handle and releases the data structures and memory areas taken up by the uEye camera
is_ExitCamera(hCam);
}
I have tried many things like the parameters below however it is still just white. I had the camera in the room which is darker and the image came out ok, so I defiantly think it’s due to the day light outside.
const wstring filenameU(filename.begin(), filename.end());
is_ParameterSet(hCam, IS_PARAMETERSET_CMD_LOAD_FILE,(void*) filenameU.c_str(), 0);
unsigned int range[3];
memset(range, 0, sizeof(range));
is_PixelClock(hCam, IS_PIXELCLOCK_CMD_GET_RANGE, (void*)range, sizeof(range));
unsigned int maximumPixelClock = range[1];
is_PixelClock(hCam, IS_PIXELCLOCK_CMD_SET, (void*)&maximumPixelClock, sizeof(maximumPixelClock));
double pval1 = auto_exposure, pval2 = 0;
double pval1 = 1, pval2 = 0;
is_SetAutoParameter(hCam, IS_SET_ENABLE_AUTO_SENSOR_SHUTTER, &pval1, &pval2);
is_SetAutoParameter(hCam, IS_SET_ENABLE_AUTO_SHUTTER,&pval1, &pval2);
is_SetAutoParameter(hCam, IS_SET_ENABLE_AUTO_SENSOR_FRAMERATE,&pval1, &pval2);
is_SetAutoParameter(hCam, IS_SET_AUTO_WB_OFFSET, &pval1, &pval2);
int desiredFrameRate = 60;
is_SetFrameRate(hCam, desiredFrameRate, &m_actualFrameRate);
You cannot just take one picture because the exposure might be to high. You have to capture a few images to give the auto exposure control a chance to lower the exposure. The reason you have to capture some frames is that the AEC is done in software and can only start to change parameters if it gets some frames.
I am new in OpenCV world and neural networks but I have some coding experience in C++/Java.
I created my first ANN MLP and learned it the XOR:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <iomanip>
using namespace cv;
using namespace ml;
using namespace std;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << "]" << endl;
}
}
}
int main()
{
const int hiddenLayerSize = 4;
float inputTrainingDataArray[4][2] = {
{ 0.0, 0.0 },
{ 0.0, 1.0 },
{ 1.0, 0.0 },
{ 1.0, 1.0 }
};
Mat inputTrainingData = Mat(4, 2, CV_32F, inputTrainingDataArray);
float outputTrainingDataArray[4][1] = {
{ 0.0 },
{ 1.0 },
{ 1.0 },
{ 0.0 }
};
Mat outputTrainingData = Mat(4, 1, CV_32F, outputTrainingDataArray);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::COUNT + TermCriteria::Type::EPS,
100000000,
0.000000000000000001
);
mlp->setTermCriteria(termCrit);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
mlp->train(trainingData
/*, ANN_MLP::TrainFlags::UPDATE_WEIGHTS
+ ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE*/
);
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat sample = Mat(1, inputTrainingData.cols, CV_32F, inputTrainingDataArray[i]);
Mat result;
mlp->predict(sample, result);
cout << sample << " -> ";// << result << endl;
print(result, 0);
cout << endl;
}
return 0;
}
It works very well for this simple problem, I also learn this network the 1-10 to binary conversion.
But i need to use MLP for simple image classification - road signs. I write the code for loading training images and preparing matrix for learning but I'm not able to train the network - it "learn" in one second even with 1 000 000 iterations! And it produce garbage results, the same for all inputs!
Here are my test images and the source code:
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/ml/ml.hpp>
#include <iostream>
#include <chrono>
#include <memory>
#include <iomanip>
#include <climits>
#include <Windows.h>
using namespace cv;
using namespace ml;
using namespace std;
using namespace chrono;
const int WIDTH_SIZE = 50;
const int HEIGHT_SIZE = (int)(WIDTH_SIZE * sqrt(3)) / 2;
const int IMAGE_DATA_SIZE = WIDTH_SIZE * HEIGHT_SIZE;
void print(Mat& mat, int prec)
{
for (int i = 0; i<mat.size().height; i++)
{
cout << "[ ";
for (int j = 0; j<mat.size().width; j++)
{
cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);
if (j != mat.size().width - 1)
cout << ", ";
else
cout << " ]" << endl;
}
}
}
bool loadImage(string imagePath, Mat& outputImage)
{
// load image in grayscale
Mat image = imread(imagePath, IMREAD_GRAYSCALE);
Mat temp;
// check for invalid input
if (image.empty()) {
cout << "Could not open or find the image" << std::endl;
return false;
}
// resize the image
Size size(WIDTH_SIZE, HEIGHT_SIZE);
resize(image, temp, size, 0, 0, CV_INTER_AREA);
// convert to float 1-channel
temp.convertTo(outputImage, CV_32FC1, 1.0/255.0);
return true;
}
vector<string> getFilesNamesInFolder(string folder)
{
vector<string> names;
char search_path[200];
sprintf(search_path, "%s/*.*", folder.c_str());
WIN32_FIND_DATA fd;
HANDLE hFind = ::FindFirstFile(search_path, &fd);
if (hFind != INVALID_HANDLE_VALUE) {
do {
// read all (real) files in current folder
// , delete '!' read other 2 default folder . and ..
if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
names.push_back(fd.cFileName);
}
} while (::FindNextFile(hFind, &fd));
::FindClose(hFind);
}
return names;
}
class Sign {
public:
enum class Category { A = 'A', B = 'B', C = 'C', D = 'D' };
Mat image;
Category category;
int number;
Sign(Mat& image, string name) :image(image) {
category = static_cast<Category>(name.at(0));
number = stoi(name.substr(2, name.length()));
};
};
vector<Sign> loadSignsFromFolder(String folderName) {
vector<Sign> roadSigns;
for (string fileName : getFilesNamesInFolder(folderName)) {
Mat image;
loadImage(folderName + fileName, image);
roadSigns.emplace_back(image, fileName.substr(0, (fileName.length() - 4))); //cut .png
}
return roadSigns;
}
void showSignsInWindows(vector<Sign> roadSigns) {
for (Sign sign : roadSigns) {
String windowName = "Sign " + to_string(sign.number);
namedWindow(windowName, WINDOW_AUTOSIZE);
imshow(windowName, sign.image);
}
waitKey(0);
}
Mat getInputDataFromSignsVector(vector<Sign> roadSigns) {
Mat roadSignsImageData;
for (Sign sign : roadSigns) {
Mat signImageDataInOneRow = sign.image.reshape(0, 1);
roadSignsImageData.push_back(signImageDataInOneRow);
}
return roadSignsImageData;
}
Mat getOutputDataFromSignsVector(vector<Sign> roadSigns) {
int signsCount = (int) roadSigns.size();
int signsVectorSize = signsCount + 1;
Mat roadSignsData(0, signsVectorSize, CV_32FC1);
int i = 1;
for (Sign sign : roadSigns) {
vector<float> outputTraningVector(signsVectorSize);
fill(outputTraningVector.begin(), outputTraningVector.end(), -1.0);
outputTraningVector[i++] = 1.0;
Mat tempMatrix(outputTraningVector, false);
roadSignsData.push_back(tempMatrix.reshape(0, 1));
}
return roadSignsData;
}
int main(int argc, char* argv[])
{
if (argc != 2) {
cout << " Usage: display_image ImageToLoadAndDisplay" << endl;
return -1;
}
const int hiddenLayerSize = 500;
vector<Sign> roadSigns = loadSignsFromFolder("../../../Znaki/A/");
Mat inputTrainingData = getInputDataFromSignsVector(roadSigns);
Mat outputTrainingData = getOutputDataFromSignsVector(roadSigns);
Ptr<ANN_MLP> mlp = ANN_MLP::create();
Mat layersSize = Mat(3, 1, CV_16U);
layersSize.row(0) = Scalar(inputTrainingData.cols);
layersSize.row(1) = Scalar(hiddenLayerSize);
layersSize.row(2) = Scalar(outputTrainingData.cols);
mlp->setLayerSizes(layersSize);
mlp->setActivationFunction(ANN_MLP::ActivationFunctions::SIGMOID_SYM, 1.0, 1.0);
mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.05, 0.05);
//mlp->setTrainMethod(ANN_MLP::TrainingMethods::RPROP);
TermCriteria termCrit = TermCriteria(
TermCriteria::Type::MAX_ITER //| TermCriteria::Type::EPS,
,100 //(int) INT_MAX
,0.000001
);
mlp->setTermCriteria(termCrit);
Ptr<TrainData> trainingData = TrainData::create(
inputTrainingData,
SampleTypes::ROW_SAMPLE,
outputTrainingData
);
auto start = system_clock::now();
mlp->train(trainingData
//, //ANN_MLP::TrainFlags::UPDATE_WEIGHTS
, ANN_MLP::TrainFlags::NO_INPUT_SCALE
+ ANN_MLP::TrainFlags::NO_OUTPUT_SCALE
);
auto duration = duration_cast<milliseconds> (system_clock::now() - start);
cout << "Training time: " << duration.count() << "ms" << endl;
for (int i = 0; i < inputTrainingData.rows; i++) {
Mat result;
//mlp->predict(inputTrainingData.row(i), result);
mlp->predict(roadSigns[i].image.reshape(0, 1), result);
//cout << result << endl;
print(result, 2);
}
//showSignsInWindows(roadSigns);
return 0;
}
What is wrong in this code, that XOR works but images not? I cheked the input and output matrix and they're correct... could somebody also explain me when to/shoud I use the ANN_MLP::TrainFlags::NO_INPUT_SCALE and ANN_MLP::TrainFlags::NO_OUTPUT_SCALE or what values of setActivationFunction and setTrainMethod parameters should I use?
Thanks!
There was a problem in backprop weight scale parameter - it was too big and the ANN couldn't learn more difficult things.
I changed the line to mlp->setTrainMethod(ANN_MLP::TrainingMethods::BACKPROP, 0.0001); and the hidden layer size to 100 (to speed up the learning) - now it's working!
I am processing video with opencv, but at the same time I need to play audio and simply control it, like loud or current frame number.
I think I should create a parallel process with ffmpeg, but I don't know how to do so. Can you explain what to do?
Or do you know another solution?
I think ffmpeg should be used to play audio and SDL for video in this case.
After opening the file with OpenCV and processing the frame, you can use OpenCV -> SDL to display it while retrieving the audio frames through ffmpeg and playing them with SDL.
Here is a nice collection of ffmpeg/SDL tutorials!
I also found a nice post that shows how to capture frames from a video file using ffmpeg, store them in OpenCV cv::Mat and display the result in a OpenCV window. But this way you can't play audio since OpenCV doesn't deal with that.
You might be interested in reading this post as well: How to avoid a growing delay with ffmpeg between sound and raw video data ?
EDIT:
I spent the last 4hrs coding a prototype to demonstrate how it's done. This demo reads video frames through OpenCV (so you can process them) and audio through ffmpeg, and SDL is used to play both! There are 2 limitations in this demo you must be aware: 1 - it assumes you are working with an OpenCV image packed as BGR (24bits), and 2 - audio and video are not being sync! Yes, I left have some work for you to do (yeeeey). But don't panic, page 6 has some ideas!
It's important to sync audio and video because you will be doing some processing on the frames, and that will certainly make the video and audio go out of sync real fast since they are being played independently of each other.
The ffmpeg tutorials I suggested above are very very important to understand the code, a lot of code from this demo came from there. They show how to deal with SDL, and how to read packets of audio/video streams.
#include <highgui.h>
#include <cv.h>
extern "C"
{
#include <SDL.h>
#include <SDL_thread.h>
#include <avcodec.h>
#include <avformat.h>
}
#include <iostream>
#include <stdio.h>
//#include <malloc.h>
using namespace cv;
#define SDL_AUDIO_BUFFER_SIZE 1024
typedef struct PacketQueue
{
AVPacketList *first_pkt, *last_pkt;
int nb_packets;
int size;
SDL_mutex *mutex;
SDL_cond *cond;
} PacketQueue;
PacketQueue audioq;
int audioStream = -1;
int videoStream = -1;
int quit = 0;
SDL_Surface* screen = NULL;
SDL_Surface* surface = NULL;
AVFormatContext* pFormatCtx = NULL;
AVCodecContext* aCodecCtx = NULL;
AVCodecContext* pCodecCtx = NULL;
void show_frame(IplImage* img)
{
if (!screen)
{
screen = SDL_SetVideoMode(img->width, img->height, 0, 0);
if (!screen)
{
fprintf(stderr, "SDL: could not set video mode - exiting\n");
exit(1);
}
}
// Assuming IplImage packed as BGR 24bits
SDL_Surface* surface = SDL_CreateRGBSurfaceFrom((void*)img->imageData,
img->width,
img->height,
img->depth * img->nChannels,
img->widthStep,
0xff0000, 0x00ff00, 0x0000ff, 0
);
SDL_BlitSurface(surface, 0, screen, 0);
SDL_Flip(screen);
}
void packet_queue_init(PacketQueue *q)
{
memset(q, 0, sizeof(PacketQueue));
q->mutex = SDL_CreateMutex();
q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt)
{
AVPacketList *pkt1;
if (av_dup_packet(pkt) < 0)
{
return -1;
}
//pkt1 = (AVPacketList*) av_malloc(sizeof(AVPacketList));
pkt1 = (AVPacketList*) malloc(sizeof(AVPacketList));
if (!pkt1) return -1;
pkt1->pkt = *pkt;
pkt1->next = NULL;
SDL_LockMutex(q->mutex);
if (!q->last_pkt)
q->first_pkt = pkt1;
else
q->last_pkt->next = pkt1;
q->last_pkt = pkt1;
q->nb_packets++;
q->size += pkt1->pkt.size;
SDL_CondSignal(q->cond);
SDL_UnlockMutex(q->mutex);
return 0;
}
static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
AVPacketList *pkt1;
int ret;
SDL_LockMutex(q->mutex);
for (;;)
{
if( quit)
{
ret = -1;
break;
}
pkt1 = q->first_pkt;
if (pkt1)
{
q->first_pkt = pkt1->next;
if (!q->first_pkt)
q->last_pkt = NULL;
q->nb_packets--;
q->size -= pkt1->pkt.size;
*pkt = pkt1->pkt;
//av_free(pkt1);
free(pkt1);
ret = 1;
break;
}
else if (!block)
{
ret = 0;
break;
}
else
{
SDL_CondWait(q->cond, q->mutex);
}
}
SDL_UnlockMutex(q->mutex);
return ret;
}
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size)
{
static AVPacket pkt;
static uint8_t *audio_pkt_data = NULL;
static int audio_pkt_size = 0;
int len1, data_size;
for (;;)
{
while (audio_pkt_size > 0)
{
data_size = buf_size;
len1 = avcodec_decode_audio2(aCodecCtx, (int16_t*)audio_buf, &data_size,
audio_pkt_data, audio_pkt_size);
if (len1 < 0)
{
/* if error, skip frame */
audio_pkt_size = 0;
break;
}
audio_pkt_data += len1;
audio_pkt_size -= len1;
if (data_size <= 0)
{
/* No data yet, get more frames */
continue;
}
/* We have data, return it and come back for more later */
return data_size;
}
if (pkt.data)
av_free_packet(&pkt);
if (quit) return -1;
if (packet_queue_get(&audioq, &pkt, 1) < 0) return -1;
audio_pkt_data = pkt.data;
audio_pkt_size = pkt.size;
}
}
void audio_callback(void *userdata, Uint8 *stream, int len)
{
AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
int len1, audio_size;
static uint8_t audio_buf[(AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2];
static unsigned int audio_buf_size = 0;
static unsigned int audio_buf_index = 0;
while (len > 0)
{
if (audio_buf_index >= audio_buf_size)
{
/* We have already sent all our data; get more */
audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
if(audio_size < 0)
{
/* If error, output silence */
audio_buf_size = 1024; // arbitrary?
memset(audio_buf, 0, audio_buf_size);
}
else
{
audio_buf_size = audio_size;
}
audio_buf_index = 0;
}
len1 = audio_buf_size - audio_buf_index;
if (len1 > len)
len1 = len;
memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
len -= len1;
stream += len1;
audio_buf_index += len1;
}
}
void setup_ffmpeg(char* filename)
{
if (av_open_input_file(&pFormatCtx, filename, NULL, 0, NULL) != 0)
{
fprintf(stderr, "FFmpeg failed to open file %s!\n", filename);
exit(-1);
}
if (av_find_stream_info(pFormatCtx) < 0)
{
fprintf(stderr, "FFmpeg failed to retrieve stream info!\n");
exit(-1);
}
// Dump information about file onto standard error
dump_format(pFormatCtx, 0, filename, 0);
// Find the first video stream
int i = 0;
for (i; i < pFormatCtx->nb_streams; i++)
{
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO && videoStream < 0)
{
videoStream = i;
}
if (pFormatCtx->streams[i]->codec->codec_type == CODEC_TYPE_AUDIO && audioStream < 0)
{
audioStream = i;
}
}
if (videoStream == -1)
{
fprintf(stderr, "No video stream found in %s!\n", filename);
exit(-1);
}
if (audioStream == -1)
{
fprintf(stderr, "No audio stream found in %s!\n", filename);
exit(-1);
}
// Get a pointer to the codec context for the audio stream
aCodecCtx = pFormatCtx->streams[audioStream]->codec;
// Set audio settings from codec info
SDL_AudioSpec wanted_spec;
wanted_spec.freq = aCodecCtx->sample_rate;
wanted_spec.format = AUDIO_S16SYS;
wanted_spec.channels = aCodecCtx->channels;
wanted_spec.silence = 0;
wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = aCodecCtx;
SDL_AudioSpec spec;
if (SDL_OpenAudio(&wanted_spec, &spec) < 0)
{
fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
exit(-1);
}
AVCodec* aCodec = avcodec_find_decoder(aCodecCtx->codec_id);
if (!aCodec)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1);
}
avcodec_open(aCodecCtx, aCodec);
// audio_st = pFormatCtx->streams[index]
packet_queue_init(&audioq);
SDL_PauseAudio(0);
// Get a pointer to the codec context for the video stream
pCodecCtx = pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
AVCodec* pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
if (pCodec == NULL)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Codec not found
}
// Open codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
fprintf(stderr, "Unsupported codec!\n");
exit(-1); // Could not open codec
}
}
int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cout << "Usage: " << argv[0] << " <video>" << std::endl;
return -1;
}
av_register_all();
// Init SDL
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER))
{
fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
return -1;
}
// Init ffmpeg and setup some SDL stuff related to Audio
setup_ffmpeg(argv[1]);
VideoCapture cap(argv[1]); // open the default camera
if (!cap.isOpened()) // check if we succeeded
{
std::cout << "Failed to load file!" << std::endl;
return -1;
}
AVPacket packet;
while (av_read_frame(pFormatCtx, &packet) >= 0)
{
if (packet.stream_index == videoStream)
{
// Actually this is were SYNC between audio/video would happen.
// Right now I assume that every VIDEO packet contains an entire video frame, and that's not true. A video frame can be made by multiple packets!
// But for the time being, assume 1 video frame == 1 video packet,
// so instead of reading the frame through ffmpeg, I read it through OpenCV.
Mat frame;
cap >> frame; // get a new frame from camera
// do some processing on the frame, either as a Mat or as IplImage.
// For educational purposes, applying a lame grayscale conversion
IplImage ipl_frame = frame;
for (int i = 0; i < ipl_frame.width * ipl_frame.height * ipl_frame.nChannels; i += ipl_frame.nChannels)
{
ipl_frame.imageData[i] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //B
ipl_frame.imageData[i+1] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //G
ipl_frame.imageData[i+2] = (ipl_frame.imageData[i] + ipl_frame.imageData[i+1] + ipl_frame.imageData[i+2])/3; //R
}
// Display it on SDL window
show_frame(&ipl_frame);
av_free_packet(&packet);
}
else if (packet.stream_index == audioStream)
{
packet_queue_put(&audioq, &packet);
}
else
{
av_free_packet(&packet);
}
SDL_Event event;
SDL_PollEvent(&event);
switch (event.type)
{
case SDL_QUIT:
SDL_FreeSurface(surface);
SDL_Quit();
break;
default:
break;
}
}
// the camera will be deinitialized automatically in VideoCapture destructor
// Close the codec
avcodec_close(pCodecCtx);
// Close the video file
av_close_input_file(pFormatCtx);
return 0;
}
On my Mac I compiled it with:
g++ ffmpeg_snd.cpp -o ffmpeg_snd -D_GNU_SOURCE=1 -D_THREAD_SAFE -I/usr/local/include/opencv -I/usr/local/include -I/usr/local/include/SDL -Wl,-framework,Cocoa -L/usr/local/lib -lopencv_core -lopencv_imgproc -lopencv_highgui -lopencv_ml -lopencv_video -lopencv_features2d -lopencv_calib3d -lopencv_objdetect -lopencv_contrib -lopencv_legacy -lopencv_flann -lSDLmain -lSDL -L/usr/local/lib -lavfilter -lavcodec -lavformat -I/usr/local/Cellar/ffmpeg/HEAD/include/libavcodec -I/usr/local/Cellar/ffmpeg/HEAD/include/libavformat