I am trying to create a bot to recognize text in an inactive window. To do this, I capture each frame using Bitmap. And I want Tesseract to scan the text (if there is one) on this frame and display it on the screen.
#include <iostream>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <Windows.h>
#include <string>
#include <tesseract/capi.h>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
using namespace cv;
Mat getMat(HWND hwnd) {
HDC deviceContext = GetDC(hwnd);
HDC memoryDeviceContext = CreateCompatibleDC(deviceContext);
RECT windowRect;
GetClientRect(hwnd, &windowRect);
int height = 500; /// windowRect.bottom windowRect.right
int width = 500;
HBITMAP bitmap = CreateCompatibleBitmap(deviceContext, width, height);
SelectObject(memoryDeviceContext, bitmap);
// Copy data into bitmap
BitBlt(memoryDeviceContext, 0, 0, width, height, deviceContext, 750, 359, SRCCOPY);
// Spesify format by using bitmapinfoheader
BITMAPINFOHEADER bi;
bi.biSize = sizeof(BITMAPINFOHEADER);
bi.biWidth = width;
bi.biHeight = -height;
bi.biPlanes = 1;
bi.biBitCount = 32;
bi.biCompression = BI_RGB;
bi.biSizeImage = 0; // No compression
bi.biXPelsPerMeter = 1;
bi.biYPelsPerMeter = 2;
bi.biClrUsed = 3;
bi.biClrImportant = 4;
Mat mat = Mat(height, width, CV_8UC4); // 8 bit unsigned ints 4 channels -> RGBA
// Transform data and store into mat.data
GetDIBits(memoryDeviceContext, bitmap, 0, height, mat.data, (BITMAPINFO*)&bi, DIB_RGB_COLORS);
// Clean up
DeleteObject(bitmap);
DeleteDC(memoryDeviceContext);
ReleaseDC(hwnd, deviceContext);
return mat;
}
int main() {
LPCWSTR windowTitle = L"Discord";
HWND hwnd = FindWindow(NULL, windowTitle);
/// String outText, imPath = argv[1];
/// tesseract::TessBaseAPI* temp = new tesseract::TessBaseAPI();
std::cout << "Start" << "\n";
while (true) {
Mat temp = getMat(hwnd);
cv::imshow("output", temp);
cv::waitKey(2);
}
std::cout << "Done\n";
return 1;
}
My attempts were unsuccessful:
while (true) {
Mat temp = getMat(hwnd);
cv::imshow("output", temp);
cv::waitKey(2);
String outText, imPath = argv[1];
tesseract::TessBaseAPI* temp = new tesseract::TessBaseAPI();
cv::Mat img = cv::imread((const unsigned char*)temp);
ocr->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY);
ocr->SetPageSegMode(tesseract::PSM_AUTO);
ocr->SetImage(img);
outText = String(ocr->GetUTF8Text());
}
Another attempt
while (true) {
Mat temp = getMat(hwnd);
cv::imshow("output", temp);
cv::waitKey(2);
tesseract::TessBaseAPI* ocr = new tesseract::TessBaseAPI();
char* outText;
ocr->SetPageSegMode(tesseract::PSM_AUTO);
ocr->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY);
ocr->SetImage(temp, 0, 0, 0, 0);
outText = ocr->GetUTF8Text();
ocr->End();
delete ocr;
return outText;
}
I just want every frame from Bitmap to be scanned for the presence of text. And this text was displayed. I will be glad of any help. Thanks
Just google for it. e.g. blogpost OpenCV and tesseract or example in tesseract doc.
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 10 months ago.
Improve this question
**Requirements: **
(1) Build OpenCV with Cuda and compile in C++
(2) Version = OpenCV Latest Version
(3) Build and Compile OpenCV Link: https://techawarey.com/programming/install-opencv-c-c-in-ubuntu-18-04-lts-step-by-step-guide/#Summary
(4) Library samples_utility is here: https://github.com/opencv/opencv_contrib/blob/master/modules/tracking/samples/samples_utility.hpp
(4) Compile Program Command: g++ test.cpp -o testoutput -std=c++11 'pkg-config --cflags --libs opencv'
(5) Run Program Command: ./testoutput
Code is working fine but not accurate
Step: 1
Read Frame from Camera
Select ROI(Region of Interest)
After that start KCF tracker with Sobal Features Extractor
Tracking the selected object.
Step: 2
Failure detect
After that call template matching function called MatchingMethod()
Run template matching
Get x, y value from template matching
After that reinitialize KCF tracker with Sobal Features Extractor.
This code is fine for still object when the object is moving the tracker false detection. I want to improve accuracy and reduce false detection.
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include <cstring>
#include <unistd.h>
#include "sample_utility.hpp"
#include <thread>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/cudaarithm.hpp>
#include <iomanip>
#include <stdlib.h>
#include <unistd.h>
//////////////////////////////
using namespace cv;
using namespace std;
////////////////////////////
// Convert to string
#define SSTR( x ) static_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
/// Global Variables
struct Array {
int arr[2];
};
Mat img;
Mat templ;
Mat result_h;
bool flag = true;
int match_method = 5;
int i=0;
int max_Trackbar = 5;
float fps;
int seconds = 0;
// Function Headers
void delay();
// prototype of the functino for feature extractor
void sobelExtractor(const Mat img, const Rect roi, Mat& feat);
struct Array MatchingMethod( int, void* );
int main(int argc, char **argv)
{
TrackerKCF::Params param;
param.compress_feature = true;
param.compressed_size = 2;
param.desc_npca = 0;
param.desc_pca = TrackerKCF::GRAY | TrackerKCF::CN;
param.detect_thresh = 0.8;
// create a tracker object
Ptr<TrackerKCF> tracker = TrackerKCF::create(param);
tracker->setFeatureExtractor(sobelExtractor);
VideoCapture cap(0);
// Exit if video is not opened
if(!cap.isOpened())
{
//cout << "Could not read video file" << endl;
return 1;
}
// Read first frame
Mat frame;
bool ok = cap.read(frame);
// Define initial bounding box
//Rect bbox(x, y, w, h);
// Uncomment the line below to select a different bounding box
Rect bbox = selectROI(frame, false);
// Display bounding box.
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
///////////////////////////
int H, W, cW, cH;
// print(f"hight {H} , Width {W}")
H = display_height;
W = display_width;
// Center point of the screen
cW = int(W / 2);
cH = int(H / 2);
Point p1(cW, cH);
// get bounding box
Mat imCrop = frame(bbox);
imwrite("1.png", imCrop);
//quit if ROI was not selected
if(bbox.width==0 || bbox.height==0)
return 0;
//////////////////////////
//imshow("Tracking", frame);
tracker->init(frame, bbox);
while(true)
{
Mat frame;
cap >> frame;
circle(frame, p1, 3, Scalar(0,255,0), -1);
// Start timer
if(bbox.width!=0 || bbox.height!=0){
double timer = (double)getTickCount();
// Update the tracking result
/////////////////////////////////////
bool ok = tracker->update(frame, bbox);
//////////////////////////////////////
//ok, bbox = tracker->update(frame);
// Calculate Frames per second (FPS)
fps = getTickFrequency() / ((double)getTickCount() - timer);
if (ok)
{
// Tracking success : Draw the tracked object
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
///////////////////////////////////////////////////
int xxx, yyy, height, width;
xxx = bbox.x;
yyy = bbox.y;
height = bbox.height;
width = bbox.width;
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xxx + (xxx + width)) / 2);
cyROI = int((yyy + (yyy + height)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
//cout<<diffX<<"\n";
//cout<<diffY<<"\n";
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
putText(frame, "FPS : " + SSTR(int(fps)), Point(100,20), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(50,170,50), 2);
putText(frame, "Difference From X-Axis: "+SSTR(int(diffX)), Point(100, 50), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
putText(frame, "Difference From Y-Axis: "+SSTR(int(diffY)), Point(100, 80), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
}
else
{
// Tracking failure detected.
putText(frame, "Tracking failure detected", Point(100,110), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,255),2);
templ = imread( "1.png", 1 );
img=frame.clone();
struct Array a = MatchingMethod( 0, 0 );
cout<<"X: "<<a.arr[0]<<"\n";
cout<<"Y: "<<a.arr[1]<<"\n";
cout<<"Width: "<<w<<"\n";
cout<<"Height: "<<h<<"\n";
int xx, yy, ww, hh;
xx = a.arr[0];
yy = a.arr[1];
ww = w;
hh = h;
Rect bbox(xx, yy, ww, hh);
tracker.release();
tracker = TrackerKCF::create(param);
tracker->setFeatureExtractor(sobelExtractor);
tracker->init(frame, bbox);
//roi.x = MatchingMethod.
//waitKey(30);
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
////////////////////////////////////////////////////////////////////////
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xx + (xx + ww)) / 2);
cyROI = int((yy + (yy + hh)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
//cout<<diffX<<"\n";
//cout<<diffY<<"\n";
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
///////////////////////////////////////////////////////////////////////////
}
}
else{
}
// Display frame.
imshow("Tracking", frame);
// Exit if ESC pressed.
int k = waitKey(1);
if(k == 27)
{
break;
}
}
return 0;
}
///////////////////////
void sobelExtractor(const Mat img, const Rect roi, Mat& feat){
Mat sobel[2];
Mat patch;
Rect region=roi;
// extract patch inside the image
if(roi.x<0){region.x=0;region.width+=roi.x;}
if(roi.y<0){region.y=0;region.height+=roi.y;}
if(roi.x+roi.width>img.cols)region.width=img.cols-roi.x;
if(roi.y+roi.height>img.rows)region.height=img.rows-roi.y;
if(region.width>img.cols)region.width=img.cols;
if(region.height>img.rows)region.height=img.rows;
patch=img(region).clone();
cvtColor(patch,patch, COLOR_BGR2GRAY);
// add some padding to compensate when the patch is outside image border
int addTop,addBottom, addLeft, addRight;
addTop=region.y-roi.y;
addBottom=(roi.height+roi.y>img.rows?roi.height+roi.y-img.rows:0);
addLeft=region.x-roi.x;
addRight=(roi.width+roi.x>img.cols?roi.width+roi.x-img.cols:0);
copyMakeBorder(patch,patch,addTop,addBottom,addLeft,addRight,BORDER_REPLICATE);
Sobel(patch, sobel[0], CV_32F,1,0,1);
Sobel(patch, sobel[1], CV_32F,0,1,1);
merge(sobel,2,feat);
feat=feat/255.0-0.5; // normalize to range -0.5 .. 0.5
}
////////////////////////////////////////////////////
struct Array MatchingMethod( int, void* )
{
/// Source image to display
Mat frame;
struct Array a;
/////////
for(int i=1; i<=4; i++){
img.copyTo( frame );
// break;
//}
//////////////////////////
cv::cuda::setDevice(0); // initialize CUDA
// convert from mat to gpumat
cv::cuda::GpuMat image_d(img);
cv::cuda::GpuMat templ_d(templ);
cv::cuda::GpuMat result;
// GPU -> NG
cv::Ptr<cv::cuda::TemplateMatching> alg =
cv::cuda::createTemplateMatching(image_d.type(), cv::TM_CCOEFF_NORMED);
alg->match(image_d, templ_d, result); // no return.
//cv::cuda::normalize(result, result, 0, 1, cv::NORM_MINMAX, -1);
double max_value, min_value;
cv::Point location;
cv::cuda::minMaxLoc(result, &min_value, &max_value, 0, &location);
/////////////////////////
double THRESHOLD = 3e-09; //0.3;
if( min_value <= THRESHOLD) {
//struct Array a;
a.arr[0] = location.x;
a.arr[1] = location.y;
cout<<"Hi"<<endl;
}
}
if(flag==true){
return a;
flag = false;
}
//}
}
Okey here is my answer to your question.
First of all, you are making a mistake by applying template matching when the tracker misses. Because template matching matches the feature if and only if it is totally same with the reference feature. So in your case, there will be shadows, light issues etc. in the environment, and you will never be able to get success results.
Secondly, if you delete the template matching scope, tracker will continue to search the target in the image effectively. Which changements I did in your code is listed below. With these changes, I got better results:
Delete the template matching scope
Decrease the detection threshold(param.detect_thresh) to 0.5
Create more tracker objects to catch the target: This change is the most important part. What I am suggesting is that create more and more tracker objects(in my case I did 4 tracker objects, but you can increase the number). Each tracker should get as input rectangle similar to ROI user chose but not the same coordinates. For example, if user chooses cv::Rect(200,200,400,400) then other tracker should get target as cv::Rect(180,190,400,400) , cv::Rect(220,180,400,400) ... and so on. Why you should do it because, tracker algorithm is feature based, so it will always try to get a similar features to the reference. By doing this, you will increase the feature references.
And here is my code to guide you:
#include <opencv2/core/utility.hpp>
#include <opencv2/tracking.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/core/ocl.hpp>
#include <iostream>
#include <cstring>
#include <unistd.h>
#include <thread>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/cudaarithm.hpp>
#include <iomanip>
#include <stdlib.h>
#include <unistd.h>
//////////////////////////////
using namespace cv;
using namespace std;
////////////////////////////
// Convert to string
#define SSTR( x ) static_cast< std::ostringstream & >( \
( std::ostringstream() << std::dec << x ) ).str()
/// Global Variables
struct Array {
int arr[2];
};
Mat img;
Mat templ;
Mat result_h;
bool flag = true;
int match_method = 5;
int i=0;
int max_Trackbar = 5;
float fps;
int seconds = 0;
// Function Headers
void delay();
// prototype of the functino for feature extractor
void sobelExtractor(const Mat img, const Rect roi, Mat& feat);
struct Array MatchingMethod( int, void* );
int main(int argc, char **argv)
{
TrackerKCF::Params param;
param.compress_feature = true;
param.compressed_size = 2;
param.desc_npca = 0;
param.desc_pca = TrackerKCF::GRAY | TrackerKCF::CN;
param.detect_thresh = 0.5;
// create a tracker object
Ptr<TrackerKCF> tracker = TrackerKCF::create(param);
tracker->setFeatureExtractor(sobelExtractor);
Ptr<TrackerKCF> tracker2 = TrackerKCF::create(param);
tracker2->setFeatureExtractor(sobelExtractor);
Ptr<TrackerKCF> tracker3 = TrackerKCF::create(param);
tracker3->setFeatureExtractor(sobelExtractor);
Ptr<TrackerKCF> tracker4 = TrackerKCF::create(param);
tracker4->setFeatureExtractor(sobelExtractor);
VideoCapture cap(0);
// Exit if video is not opened
if(!cap.isOpened())
{
//cout << "Could not read video file" << endl;
return 1;
}
cv::imshow("Tracking",0);
// Read first frame
Mat frame;
bool ok = cap.read(frame);
// Define initial bounding box
//Rect bbox(x, y, w, h);
// Uncomment the line below to select a different bounding box
Rect2d bbox = selectROI(frame, false);
// Display bounding box.
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
///////////////////////////
int H, W, cW, cH;
// print(f"hight {H} , Width {W}")
H = frame.rows;
W = frame.cols;
// Center point of the screen
cW = int(W / 2);
cH = int(H / 2);
Point p1(cW, cH);
//quit if ROI was not selected
if(bbox.width==0 || bbox.height==0)
return 0;
//////////////////////////
//imshow("Tracking", frame);
tracker->init(frame, bbox);
tracker2->init(frame, cv::Rect2d(bbox.x-10,bbox.y-10, bbox.width,bbox.height));
tracker3->init(frame, cv::Rect2d(bbox.x+10,bbox.y+10, bbox.width,bbox.height));
tracker4->init(frame, cv::Rect2d(bbox.x+20,bbox.y+20, bbox.width,bbox.height));
while(true)
{
Mat frame;
cap >> frame;
circle(frame, p1, 3, Scalar(0,255,0), -1);
// Start timer
if(bbox.width!=0 || bbox.height!=0){
double timer = (double)getTickCount();
// Update the tracking result
/////////////////////////////////////
bool ok = tracker->update(frame, bbox);
bool ok2 = tracker->update(frame, bbox);
bool ok3 = tracker->update(frame, bbox);
bool ok4 = tracker->update(frame, bbox);
//////////////////////////////////////
//ok, bbox = tracker->update(frame);
// Calculate Frames per second (FPS)
fps = getTickFrequency() / ((double)getTickCount() - timer);
if (ok || ok2 || ok3 || ok4)
{
// Tracking success : Draw the tracked object
rectangle(frame, bbox, Scalar( 255, 0, 0 ), 2, 1 );
///////////////////////////////////////////////////
int xxx, yyy, height, width;
xxx = bbox.x;
yyy = bbox.y;
height = bbox.height;
width = bbox.width;
int diffX, diffY;
float cxROI, cyROI;
cxROI = int((xxx + (xxx + width)) / 2);
cyROI = int((yyy + (yyy + height)) / 2);
diffX = cxROI - cW;
diffY = cH - cyROI;
//cout<<diffX<<"\n";
//cout<<diffY<<"\n";
Point p(cxROI, cyROI);
circle(frame, p, 3, Scalar(128,0,0), -1);
putText(frame, "FPS : " + SSTR(int(fps)), Point(100,20), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(50,170,50), 2);
putText(frame, "Difference From X-Axis: "+SSTR(int(diffX)), Point(100, 50), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
putText(frame, "Difference From Y-Axis: "+SSTR(int(diffY)), Point(100, 80), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(100, 200, 200), 2);
}
}
else{
}
// Display frame.
imshow("Tracking", frame);
// Exit if ESC pressed.
int k = waitKey(1);
if(k == 27)
{
break;
}
}
return 0;
}
///////////////////////
void sobelExtractor(const Mat img, const Rect roi, Mat& feat){
Mat sobel[2];
Mat patch;
Rect region=roi;
// extract patch inside the image
if(roi.x<0){region.x=0;region.width+=roi.x;}
if(roi.y<0){region.y=0;region.height+=roi.y;}
if(roi.x+roi.width>img.cols)region.width=img.cols-roi.x;
if(roi.y+roi.height>img.rows)region.height=img.rows-roi.y;
if(region.width>img.cols)region.width=img.cols;
if(region.height>img.rows)region.height=img.rows;
patch=img(region).clone();
cvtColor(patch,patch, COLOR_BGR2GRAY);
// add some padding to compensate when the patch is outside image border
int addTop,addBottom, addLeft, addRight;
addTop=region.y-roi.y;
addBottom=(roi.height+roi.y>img.rows?roi.height+roi.y-img.rows:0);
addLeft=region.x-roi.x;
addRight=(roi.width+roi.x>img.cols?roi.width+roi.x-img.cols:0);
copyMakeBorder(patch,patch,addTop,addBottom,addLeft,addRight,BORDER_REPLICATE);
Sobel(patch, sobel[0], CV_32F,1,0,1);
Sobel(patch, sobel[1], CV_32F,0,1,1);
merge(sobel,2,feat);
feat=feat/255.0-0.5; // normalize to range -0.5 .. 0.5
}
////////////////////////////////////////////////////
struct Array MatchingMethod( int, void* )
{
/// Source image to display
Mat frame;
struct Array a;
/////////
for(int i=1; i<=4; i++){
img.copyTo( frame );
// break;
//}
//////////////////////////
cv::cuda::setDevice(0); // initialize CUDA
// convert from mat to gpumat
cv::cuda::GpuMat image_d(img);
cv::cuda::GpuMat templ_d(templ);
cv::cuda::GpuMat result;
// GPU -> NG
cv::Ptr<cv::cuda::TemplateMatching> alg =
cv::cuda::createTemplateMatching(image_d.type(), cv::TM_CCOEFF_NORMED);
alg->match(image_d, templ_d, result); // no return.
//cv::cuda::normalize(result, result, 0, 1, cv::NORM_MINMAX, -1);
double max_value, min_value;
cv::Point location;
cv::cuda::minMaxLoc(result, &min_value, &max_value, 0, &location);
/////////////////////////
double THRESHOLD = 3e-09; //0.3;
if( min_value <= THRESHOLD) {
//struct Array a;
a.arr[0] = location.x;
a.arr[1] = location.y;
cout<<"Hi"<<endl;
}
}
if(flag==true){
return a;
flag = false;
}
//}
}
I am working on screen capture object recognition system.
My code:
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/dnn.hpp"
#include <opencv2/core/utils/trace.hpp>
#include <Windows.h>
#include <iostream>
using namespace std;
using namespace cv;
using namespace cv::dnn;
Mat hwnd2mat(HWND hwnd) {
HDC hwindowDC,hwindowCompatibleDC;
int height,width,srcheight,srcwidth;
HBITMAP hbwindow;
Mat src;
BITMAPINFOHEADER bi;
hwindowDC=GetDC(hwnd);
hwindowCompatibleDC=CreateCompatibleDC(hwindowDC);
SetStretchBltMode(hwindowCompatibleDC,COLORONCOLOR);
RECT windowsize; // get the height and width of the screen
GetClientRect(hwnd, &windowsize);
srcheight = windowsize.bottom;
srcwidth = windowsize.right;
height = windowsize.bottom/1; //change this to whatever size you want to resize to
width = windowsize.right/1;
src.create(height,width,CV_8UC4);
// create a bitmap
hbwindow = CreateCompatibleBitmap( hwindowDC, width, height);
bi.biSize = sizeof(BITMAPINFOHEADER); //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx
bi.biWidth = width;
bi.biHeight = -height; //this is the line that makes it draw upside down or not
bi.biPlanes = 1;
bi.biBitCount = 32;
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0;
bi.biClrUsed = 0;
bi.biClrImportant = 0;
// use the previously created device context with the bitmap
SelectObject(hwindowCompatibleDC, hbwindow);
// copy from the window device context to the bitmap device context
StretchBlt( hwindowCompatibleDC, 0,0, width, height, hwindowDC, 0, 0,srcwidth,srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors !
GetDIBits(hwindowCompatibleDC,hbwindow,0,height,src.data,(BITMAPINFO *)&bi,DIB_RGB_COLORS); //copy from hwindowCompatibleDC to hbwindow
// avoid memory leak
DeleteObject (hbwindow);
DeleteDC(hwindowCompatibleDC);
ReleaseDC(hwnd, hwindowDC);
return src;
}
string CLASSES[] = {"background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"};
float confidenceThreshold = 0.2;
int main(int argc, char **argv) {
CV_TRACE_FUNCTION();
String modelTxt = "resources/Caffe/MobileNetSSD_deploy.prototxt";
String modelBin = "resources/Caffe/MobileNetSSD_deploy.caffemodel";
Net net = readNetFromCaffe(modelTxt, modelBin);
if (net.empty()) {
std::cerr << "Can't load network by using the following files: " << std::endl;
std::cerr << "prototxt: " << modelTxt << std::endl;
std::cerr << "caffemodel: " << modelBin << std::endl;
exit(-1);
}
HWND hwndDesktop = GetDesktopWindow();
// namedWindow("output", WINDOW_NORMAL); // Zeby mozna bylo zmieniac rozmiar okna "real-time"
int key = 0;
// while( key != 27 ) {
Mat frame = hwnd2mat(hwndDesktop);
// resize(frame, frame, Size(800, 450));
// Mat frame = imread("resources/auto.png");
Mat img2;
resize(frame, img2, Size(300,300));
Mat inputBlob = blobFromImage(img2, 0.007843, Size(300,300), Scalar(127.5, 127.5, 127.5), false);
net.setInput(inputBlob, "data");
Mat detection = net.forward("detection_out");
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
ostringstream ss;
for (int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidenceThreshold) {
int idx = static_cast<int>(detectionMat.at<float>(i, 1));
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
rectangle(frame, object, Scalar(0, 255, 0), 2);
cout << CLASSES[idx] << ": " << confidence << endl;
ss.str("");
ss << confidence;
String conf(ss.str());
String label = CLASSES[idx] + ": " + conf;
int baseLine = 0;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
putText(frame, label, Point(xLeftBottom, yLeftBottom-10),
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,255,0));
}
}
imshow("output", frame);
// key = waitKey(60);
waitKey();
// }
}
When i use exteranl image everything works fine but when i use function called hwnd2mat it throws me exception:
Error: Assertion failed (inputs[0]->size[1] % blobs[0].size[1] == 0) in forward
I don't know what does it mean. I tried to resize image but it doesnt work.
I am using OpenCV-3.4.1 and MobileNetSSD model.
Thanks in advance.
Ok, i finally did it!
Code for taking screenshots:
Mat getScreenshot() {
HWND hwnd = GetDesktopWindow();
HDC hwindowDC,hwindowCompatibleDC;
int height,width,srcheight,srcwidth;
HBITMAP hbwindow;
Mat src;
BITMAPINFOHEADER bi;
hwindowDC=GetDC(hwnd);
hwindowCompatibleDC=CreateCompatibleDC(hwindowDC);
SetStretchBltMode(hwindowCompatibleDC,COLORONCOLOR);
RECT windowsize; // get the height and width of the screen
GetClientRect(hwnd, &windowsize);
srcheight = windowsize.bottom;
srcwidth = windowsize.right;
height = windowsize.bottom/1; //change this to whatever size you want to resize to
width = windowsize.right/1;
src.create(height,width,CV_8UC3); // This was problem
// create a bitmap
int iBits = GetDeviceCaps(hwindowDC, BITSPIXEL) * GetDeviceCaps(hwindowDC, PLANES);
WORD wBitCount;
if (iBits <= 1)
wBitCount = 1;
else if (iBits <= 4)
wBitCount = 4;
else if (iBits <= 8)
wBitCount = 8;
else
wBitCount = 24;
hbwindow = CreateCompatibleBitmap( hwindowDC, width, height);
bi.biSize = sizeof(BITMAPINFOHEADER); //http://msdn.microsoft.com/en-us/library/windows/window/dd183402%28v=vs.85%29.aspx
bi.biWidth = width;
bi.biHeight = -height; //this is the line that makes it draw upside down or not
bi.biPlanes = 1;
bi.biBitCount = wBitCount;
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0;
bi.biClrUsed = 256;
bi.biClrImportant = 0;
// use the previously created device context with the bitmap
SelectObject(hwindowCompatibleDC, hbwindow);
// copy from the window device context to the bitmap device context
StretchBlt( hwindowCompatibleDC, 0,0, width, height, hwindowDC, 0, 0,srcwidth,srcheight, SRCCOPY); //change SRCCOPY to NOTSRCCOPY for wacky colors !
GetDIBits(hwindowCompatibleDC,hbwindow,0,height,src.data,(BITMAPINFO *)&bi,DIB_RGB_COLORS); //copy from hwindowCompatibleDC to hbwindow
// avoid memory leak
DeleteObject (hbwindow);
DeleteDC(hwindowCompatibleDC);
ReleaseDC(hwnd, hwindowDC);
return src;
}
I am trying to display video for in a separate function for this i am using vector i push_back each frame in and then pass this vector to function but my function displays a single frame repetitively. My code is below. Please tell me what i am doing wrong.
// newproject.cpp : Defines the entry point for the console application.
#include "stdafx.h"
#include "highgui.h"
#include <stdio.h>
#include <cv.h>
#include <highgui.h>
#include <stdio.h>
#include <conio.h>
#include <opencv2/imgproc/imgproc.hpp> // Gaussian Blur
#include <opencv2/core/core.hpp> // Basic OpenCV structures (cv::Mat, Scalar)
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <conio.h>
using namespace cv;
using namespace std;
class frameprocessing{
Mat hsv_base;
MatND hist_base;
public:
void hsv_histogram(Mat Frame)
{
cvtColor( Frame, hsv_base, CV_BGR2HSV );
int h_bins = 50;
int s_bins = 32;
int histSize[] = { h_bins, s_bins };
float h_ranges[] = { 0, 256 };
float s_ranges[] = { 0, 180 };
const float* ranges[] = { h_ranges, s_ranges };
int channels[] = { 0, 1 };
calcHist( &hsv_base, 1, channels, Mat(), hist_base, 2, histSize, ranges, true, false );
}
};
class video{
Mat frame;
string filename;
double dWidth;
double dHeight;
public:
video()
{
}
video(string videoname)
{
vector<Mat> videoframes;
std::vector<Mat>::iterator it;
it = videoframes.begin();
filename = videoname;
VideoCapture capture(filename);
if( !capture.isOpened() )
{
exit(0);
}
dWidth = capture.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
dHeight = capture.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
frameprocessing obj;
for( ; ; )
{
capture >> frame;
if(frame.empty())
break;
obj.hsv_histogram(frame);
videoframes.push_back(frame);
}
displayvideo(videoframes);
// waitKey(0); // key press to close window
}
void writer()
{
Size frameSize(static_cast<int>(dWidth), static_cast<int>(dHeight));
VideoWriter oVideoWriter ("D:/MyVideo.avi", CV_FOURCC('P','I','M','1'), 20, frameSize, true); //initialize the VideoWriter object
if ( !oVideoWriter.isOpened() ) //if not initialize the VideoWriter successfully, exit the program
{
cout << "ERROR: Failed to write the video" << endl;
exit(0);
}
}
void displayvideo(vector<Mat> videoframe)
{
Mat tempframe;
while(!videoframe.empty()) //Show the image captured in the window and repeat
{
tempframe = videoframe.back();
imshow("video", tempframe);
videoframe.pop_back();
waitKey(20); // waits to display frame
}
// waitKey(0);
}
void displayframe(Mat frame)
{
imshow("video", frame);
waitKey(20); // waits to display frame
}
};
int _tmain(int argc, _TCHAR* argv[])
{
video obj("video.avi");
//obj.readvideo();
}
You need to copy or clone your frame to another Mat then push to your vector, change your code like
for( ; ; )
{
capture >> frame;
if(frame.empty())
break;
Mat tmp=frame.clone();
obj.hsv_histogram(tmp);
videoframes.push_back(tmp);
}
In your code your passing the same pointer allocated for frame to your vector every time, so you will get array of Mat pointing single(same) memory location in your vector. To know more about OpenCV Mat and memory allocation See documantation