Related
Im total beginner. I have a C++ GUI project in Visual Studio. It's working fine.
I would like to launch a function (when pressing a button in GUI) from existing cpp file opencvtesti.cpp, which I'm trying to include into this GUI project.
I added a existing file by pressing right mouse button over "source files" and added "opencvtesti.cpp". This .cpp was also workign fine before combining it to this GUI project. Also created opencvtesti.h and removed #includes from .cpp.
My code looks like this. But something goes wrong. Is there some dublicates in variables? I can't find them... I think I have done something wrong.
Errors:
Error LNK1169 one or more multiply defined symbols found Project5 C:\Users\....Project5.exe 1
Error LNK2005 "class Pylon::CPylonImage __cdecl SampleImageCreator::CreateMandelbrotFractal(enum Pylon::EPixelType,unsigned int,unsigned int)" (?CreateMandelbrotFractal#SampleImageCreator##$$FYA?AVCPylonImage#Pylon##W4EPixelType#3#II#Z) already defined in MyForm.obj Project5 C:\Users\....\source\repos\Project5\Project5\opencvtesti.obj 1
Error LNK2005 "class Pylon::CPylonImage __cdecl SampleImageCreator::CreateJuliaFractal(enum Pylon::EPixelType,unsigned int,unsigned int)" (?CreateJuliaFractal#SampleImageCreator##$$FYA?AVCPylonImage#Pylon##W4EPixelType#3#II#Z) already defined in MyForm.obj Project5 C:\Users\....\source\repos\Project5\Project5\opencvtesti.obj 1
Error LNK2005 "class Pylon::CPylonImage __cdecl SampleImageCreator::CreateMandelbrotFractal(enum Pylon::EPixelType,unsigned int,unsigned int)" (?CreateMandelbrotFractal#SampleImageCreator##YA?AVCPylonImage#Pylon##W4EPixelType#3#II#Z) already defined in MyForm.obj Project5 C:\Users\...\source\repos\Project5\Project5\opencvtesti.obj 1
Error LNK2005 "class Pylon::CPylonImage __cdecl SampleImageCreator::CreateJuliaFractal(enum Pylon::EPixelType,unsigned int,unsigned int)" (?CreateJuliaFractal#SampleImageCreator##YA?AVCPylonImage#Pylon##W4EPixelType#3#II#Z) already defined in MyForm.obj Project5 C:\Users\...\source\repos\Project5\Project5\opencvtesti.obj 1
MyForm.cpp:
#include "MyForm.h"
#include <iostream>
using namespace System;
using namespace System::Windows::Forms;
[STAThreadAttribute]
void main(array<String^>^ args) {
Application::EnableVisualStyles();
Application::SetCompatibleTextRenderingDefault(false);
Project5::MyForm form;
Application::Run(% form);
}
MyForm.h:
#pragma once
#include <fstream>
#include <iostream>
#include "Python.h"
#include "opencvtesti.h"
namespace Project5 {
using namespace System;
using namespace System::ComponentModel;
using namespace System::Collections;
using namespace System::Windows::Forms;
using namespace System::Data;
using namespace System::Drawing;
/// <summary>
/// Summary for MyForm
/// </summary>
public ref class MyForm : public System::Windows::Forms::Form
{
public:
MyForm(void)
{
InitializeComponent();
//
//TODO: Add the constructor code here
//
}
protected:
/// <summary>
/// Clean up any resources being used.
/// </summary>
~MyForm()
{
if (components)
{
delete components;
}
}
protected:
private: System::Windows::Forms::Button^ button1;
private:
/// <summary>
/// Required designer variable.
/// </summary>
System::ComponentModel::Container ^components;
#pragma region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
void InitializeComponent(void)
{
this->button1 = (gcnew System::Windows::Forms::Button());
this->SuspendLayout();
//
// button1
//
this->button1->Location = System::Drawing::Point(13, 67);
this->button1->Name = L"button1";
this->button1->Size = System::Drawing::Size(75, 23);
this->button1->TabIndex = 2;
this->button1->Text = L"Press this";
this->button1->UseVisualStyleBackColor = true;
this->button1->Click += gcnew System::EventHandler(this, &MyForm::button1_Click);
//
// MyForm
//
this->AutoScaleDimensions = System::Drawing::SizeF(6, 13);
this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
this->ClientSize = System::Drawing::Size(187, 180);
this->Controls->Add(this->button1);
this->Name = L"MyForm";
this->Text = L"GUI test";
this->ResumeLayout(false);
}
#pragma endregion
private: System::Void button1_Click(System::Object^ sender, System::EventArgs^ e) {
std::ofstream outfile("aukeaako.txt");
outfile << "No aukeniko? :)" << std::endl;
outfile.close();
Py_Initialize();
FILE* fd = fopen("aukesko.py", "r");
PyRun_SimpleFileEx(fd, "aukesko.py", 0);
}
};
}
opencvtesti.h:
#ifndef OPENCVTESTI_H
#define OPENCVTESTI_H
#pragma once
#include <pylon/PylonIncludes.h>
#include <pylon/PylonGUI.h>
#include <SampleImageCreator.h>
#include <conio.h>
int opencvtesti();
#endif
opencvtesti.cpp:
#include "opencvtesti.h"
// Namespace for using pylon objects.
using namespace Pylon;
// Namespace for using cout.
using namespace std;
// Number of images to be grabbed.
static const uint32_t c_countOfImagesToGrab = 1000;
int opencvtesti(int /*argc*/, char* /*argv*/[])
{
// The exit code of the sample application.
int exitCode = 0;
// Before using any pylon methods, the pylon runtime must be initialized.
PylonInitialize();
try
{
// Define constants.
static const uint32_t cNumTilesX = 3;
static const uint32_t cNumTilesY = 2;
static const uint32_t cWindowBorderSizeX = 25;
static const uint32_t cWindowBorderSizeY = 125;
static const uint32_t cScreenStartX = 40;
static const uint32_t cScreenStartY = 40;
static const uint32_t cMaxIndex = 31;
static const size_t cNumWindows = cNumTilesY * cNumTilesX;
static const uint32_t cMaxWidth = 640;
static const uint32_t cMaxHeight = 480;
// Create an array of image windows.
CPylonImageWindow imageWindows[cNumWindows];
// Create an Instant Camera object.
CInstantCamera camera(CTlFactory::GetInstance().CreateFirstDevice());
// Print the model name of the camera.
cout << "Using device " << camera.GetDeviceInfo().GetModelName() << endl;
// Start the grab. Only display the latest image.
camera.StartGrabbing(c_countOfImagesToGrab, GrabStrategy_LatestImageOnly);
// This smart pointer will receive the grab result data.
CGrabResultPtr ptrGrabResult;
// Grab images and show the tiles of each image in separate image windows.
while (camera.IsGrabbing())
{
// Wait for an image and then retrieve it. A timeout of 5000 ms is used.
camera.RetrieveResult(5000, ptrGrabResult, TimeoutHandling_ThrowException);
// If the image was grabbed successfully.
if (ptrGrabResult->GrabSucceeded())
{
// This image object is used for splitting the grabbed image into tiles.
CPylonImage image;
// Attach the grab result to a pylon image.
image.AttachGrabResultBuffer(ptrGrabResult);
// Compute tile sizes.
uint32_t imageTileWidth = min(image.GetWidth(), cMaxWidth) / cNumTilesX;
uint32_t imageTileHeight = min(image.GetHeight(), cMaxHeight) / cNumTilesY;
imageTileWidth -= imageTileWidth % GetPixelIncrementX(image.GetPixelType());
imageTileHeight -= imageTileWidth % GetPixelIncrementY(image.GetPixelType());
uint32_t windowTileWidth = imageTileWidth + cWindowBorderSizeX;
uint32_t windowTileHeight = imageTileHeight + cWindowBorderSizeY;
// Create and display the tiles of the grabbed image.
for (uint32_t indexTileX = 0; indexTileX < cNumTilesX; ++indexTileX)
{
for (uint32_t indexTileY = 0; indexTileY < cNumTilesY; ++indexTileY)
{
size_t arrayIndex = indexTileY * cNumTilesX + indexTileX;
bool windowCreated = false;
if (!imageWindows[arrayIndex].IsValid())
{
// Create the image window and position the image window as a tile on the screen.
// The Image Window stores the last size and position.
// The last Image Window indices are used here to avoid changing
// the settings of the windows used for other samples.
size_t windowIndex = cMaxIndex - arrayIndex;
imageWindows[arrayIndex].Create(windowIndex,
cScreenStartX + indexTileX * windowTileWidth,
cScreenStartY + indexTileY * windowTileHeight,
windowTileWidth,
windowTileHeight
);
windowCreated = true;
}
// Get the image area of interest (Image AOI) that includes the tile. This is a zero copy operation.
CPylonImage tile = image.GetAoi(indexTileX * imageTileWidth, indexTileY * imageTileHeight, imageTileWidth, imageTileHeight);
// Set the tile image.
imageWindows[arrayIndex].SetImage(tile);
// Show the image.
imageWindows[arrayIndex].Show();
if (windowCreated)
{
// Wait a little to show how the windows appear on the screen.
::Sleep(200);
}
}
}
}
else
{
throw RUNTIME_EXCEPTION("Error image grab failed: %hs", ptrGrabResult->GetErrorDescription().c_str());
}
}
// Destroy the windows.
for (size_t arrayIndex = 0; arrayIndex < cNumWindows; ++arrayIndex)
{
// Close() closes and destroys the window.
imageWindows[arrayIndex].Close();
// Wait a little to show how the windows are removed from the screen.
::Sleep(200);
}
}
catch (const GenericException& e)
{
// Error handling.
cerr << "An exception occurred." << endl
<< e.GetDescription() << endl;
exitCode = 1;
cerr << endl << "Press enter to exit." << endl;
while (cin.get() != '\n');
}
// Releases all pylon resources.
PylonTerminate();
return exitCode;
}
Edit:
SampelImageCreator.h looks like this:
// Contains functions for creating sample images.
#ifndef INCLUDED_SAMPLEIMAGECREATOR_H_2792867
#define INCLUDED_SAMPLEIMAGECREATOR_H_2792867
#include <pylon/PylonImage.h>
#include <pylon/Pixel.h>
#include <pylon/ImageFormatConverter.h>
namespace SampleImageCreator
{
Pylon::CPylonImage CreateJuliaFractal( Pylon::EPixelType pixelType, uint32_t width, uint32_t height )
{
// Allow all the names in the namespace Pylon to be used without qualification.
using namespace Pylon;
// Define Constants.
static const SRGB8Pixel palette[] =
{
{0, 28, 50}, {0, 42, 75}, {0, 56, 100}, {0, 70, 125}, {0, 84, 150},
{0, 50, 0}, {0, 100, 0}, {0, 150, 0}, {0, 200, 0}, {0, 250, 0},
{50, 0, 0}, {100, 0, 0}, {150, 0, 0}, {200, 0, 0}, {250, 0, 0}
};
uint32_t numColors = sizeof( palette ) / sizeof( palette[0] );
const double cX = -0.735;
const double cY = 0.11;
const double cMaxX = 1.6;
const double cMinX = -1.6;
const double cMaxY = 1;
const double cMinY = -1;
const uint32_t cMaxIterations = 50;
// Create image.
CPylonImage juliaFractal( CPylonImage::Create( PixelType_RGB8packed, width, height ) );
// Get the pointer to the first pixel.
SRGB8Pixel* pCurrentPixel = (SRGB8Pixel*) juliaFractal.GetBuffer();
// Compute the fractal.
for (uint32_t pixelY = 0; pixelY < height; ++pixelY)
{
for (uint32_t pixelX = 0; pixelX < width; ++pixelX, ++pCurrentPixel)
{
long double x = ((cMaxX - cMinX) / width) * pixelX + cMinX;
long double y = cMaxY - pixelY * ((cMaxY - cMinY) / height);
long double xd = 0;
long double yd = 0;
uint32_t i = 0;
for (; i < cMaxIterations; ++i)
{
xd = x * x - y * y + cX;
yd = 2 * x * y + cY;
x = xd;
y = yd;
if ((x * x + y * y) > 4)
{
break;
}
}
if (i >= cMaxIterations)
{
*pCurrentPixel = palette[0];
}
else
{
*pCurrentPixel = palette[i % numColors];
}
}
}
// Convert the image to the target format if needed.
if (juliaFractal.GetPixelType() != pixelType)
{
CImageFormatConverter converter;
converter.OutputPixelFormat = pixelType;
converter.OutputBitAlignment = OutputBitAlignment_MsbAligned;
converter.Convert( juliaFractal, CPylonImage( juliaFractal ) );
}
// Return the image.
return juliaFractal;
}
Pylon::CPylonImage CreateMandelbrotFractal( Pylon::EPixelType pixelType, uint32_t width, uint32_t height )
{
// Allow all the names in the namespace Pylon to be used without qualification.
using namespace Pylon;
// Define constants.
static const SRGB8Pixel palette[] =
{
{0, 28, 50}, {0, 42, 75}, {0, 56, 100}, {0, 70, 125}, {0, 84, 150},
{0, 50, 0}, {0, 100, 0}, {0, 150, 0}, {0, 200, 0}, {0, 250, 0},
{50, 0, 0}, {100, 0, 0}, {150, 0, 0}, {200, 0, 0}, {250, 0, 0}
};
uint32_t numColors = sizeof( palette ) / sizeof( palette[0] );
const double cMaxX = 1.0;
const double cMinX = -2.0;
const double cMaxY = 1.2;
const double cMinY = -1.2;
const uint32_t cMaxIterations = 50;
// Create image.
CPylonImage mandelbrotFractal( CPylonImage::Create( PixelType_RGB8packed, width, height ) );
// Get the pointer to the first pixel.
SRGB8Pixel* pCurrentPixel = (SRGB8Pixel*) mandelbrotFractal.GetBuffer();
// Compute the fractal.
for (uint32_t pixelY = 0; pixelY < height; ++pixelY)
{
for (uint32_t pixelX = 0; pixelX < width; ++pixelX, ++pCurrentPixel)
{
long double xStart = ((cMaxX - cMinX) / width) * pixelX + cMinX;
long double yStart = cMaxY - pixelY * ((cMaxY - cMinY) / height);
long double x = xStart;
long double y = yStart;
long double xd = 0;
long double yd = 0;
uint32_t i = 0;
for (; i < cMaxIterations; ++i)
{
xd = x * x - y * y + xStart;
yd = 2 * x * y + yStart;
x = xd;
y = yd;
if ((x * x + y * y) > 4)
{
break;
}
}
if (i >= cMaxIterations)
{
*pCurrentPixel = palette[0];
}
else
{
*pCurrentPixel = palette[i % numColors];
}
}
}
// Convert the image to the target format if needed.
if (mandelbrotFractal.GetPixelType() != pixelType)
{
CImageFormatConverter converter;
converter.OutputPixelFormat = pixelType;
converter.OutputBitAlignment = OutputBitAlignment_MsbAligned;
converter.Convert( mandelbrotFractal, CPylonImage( mandelbrotFractal ) );
}
// Return the image.
return mandelbrotFractal;
}
}
#endif /* INCLUDED_SAMPLEIMAGECREATOR_H_2792867 */
If you provide a definition of your function within a header file, you must declare it with inline keyword:
namespace SampleImageCreator
{
inline Pylon::CPylonImage CreateJuliaFractal( Pylon::EPixelType pixelType, uint32_t width, uint32_t height )
{
Also, Template functions are inline by default.
In your case you have an ODR (One Definition Rule) violation, that is your in-header defined functions are added to every translation unit that includes your header. Multiple Definitions are not allowed, otherwise the linker would not know which definition to use.
But if you use inline, you allow the linker to have multiple definitions.
Try to wrap code in opencvtesti files in personal namespace to avoid problems with linking
Check all your headers, especially SampleImageCreator.h and make sure that there is no redefinition of the variables specified in the errors
It could be problem with call conventions, like cdecl. Chance is
0.01%, but you can "play" with settings in Properties -> C/C++ -> Other -> Call conventions
I am trying to play a sin wave sound with SDL2 by using the audio queue on C++. In order to do that, I have created a class "Speaker", which has a pushBeep function that is called every time a beep needs to be generated. I have created an AudioDevice successfully, and it is also successful when I do the QueueAudio to the device (I have checked on the debugger) but I can't seem to get any sound out of it.
I have tried changing the way I generate the samples in numerous ways, also, as I said previously, I have checked that the device is properly opened and the QueueAudio returns 0 for success.
This is the class
Speaker::Speaker()
{
SDL_AudioSpec ds;
ds.freq = Speaker::SPEAKER_FREQUENCY;
ds.format = AUDIO_F32;
ds.channels = 1;
ds.samples = 4096;
ds.callback = NULL;
ds.userdata = this;
SDL_AudioSpec os;
this->dev = SDL_OpenAudioDevice(NULL, 0, &ds, &os, NULL);
std::cout << "DEVICE: " << this->dev << std::endl;
SDL_PauseAudioDevice(this->dev, 0);
}
Speaker::~Speaker()
{
SDL_CloseAudioDevice(this->dev);
}
void Speaker::pushBeep(double freq, int duration) {
int nSamples = duration * Speaker::SPEAKER_FREQUENCY / 1000;
float* samples = new float[nSamples];
double v = 0.0;
for (int idx = 0; idx < nSamples; idx++) {
//float value = (float)Speaker::SPEAKER_AMPLITUDE * std::sin(v * 2 * M_PI / Speaker::SPEAKER_FREQUENCY);
float value = 440.0;
samples[idx] = value;
v += freq;
}
int a = SDL_QueueAudio(this->dev, (void*)samples, nSamples * sizeof(float));
std::cout << a << std::endl;
delete[] samples;
samples = NULL;
}
And this is how I call it
Speaker s;
s.pushBeep(440.0, 1000);
When I try with the sin wave generation code (commented) it gives me a "double to float loss of precision" error. When I use the fixed value (not commented) it does not give the error, but it still does not work.
I expect the program to output the sound.
Couple of things you are missing, or maybe you didn't add to your code snippet. You didn't specify an audio callback so when you call SDL_QueueAudio(); it didn't know what to do with the data I'm pretty sure. And you weren't calling SDL_PauseAudioDevice() in your example with the delay.
#include <math.h>
#include <SDL2/SDL.h>
#include <SDL2/SDL_audio.h>
#include <iostream>
namespace AudioGen
{
const int AMPLITUDE = 1;
const int SAMPLE_RATE = 44000;
// Globals
float *in_buffer;
SDL_atomic_t callback_sample_pos;
SDL_Event event;
SDL_bool running = SDL_TRUE;
/**
* Structure for holding audio metadata such as frequency
*/
struct AudioData
{
int sampleNum;
float frequency;
};
void audio_callback(void *user_data, Uint8 *raw_buffer, int bytes)
{
float *buffer = (float*)raw_buffer;
AudioData &audio_data(*static_cast<AudioData*>(user_data));
int nSamples = bytes / 4; // For F32
std::cout << nSamples << std::endl;
for(int i = 0; i < nSamples; i++, audio_data.sampleNum++)
{
double time = (double)audio_data.sampleNum / (double)SAMPLE_RATE;
buffer[i] = (float)(AMPLITUDE * sin(2.0f * M_PI * audio_data.frequency * time));
}
}
int buffer_length;
void callback(void *user_data, Uint8 *raw_buffer, int bytes)
{
float *buffer = (float*)raw_buffer;
int nSamples = bytes/4;
auto local_sample_pos = SDL_AtomicGet(&callback_sample_pos);
for(int i = 0; i < nSamples; ++i)
{
// Stop running audio if all samples are finished playing
if(buffer_length == local_sample_pos)
{
running = SDL_FALSE;
break;
}
buffer[i] = in_buffer[local_sample_pos];
++local_sample_pos;
}
SDL_AtomicSet(&callback_sample_pos, local_sample_pos);
}
class Speaker
{
public:
Speaker()
{
SDL_Init(SDL_INIT_AUDIO);
SDL_AudioSpec ds;
ds.freq = SAMPLE_RATE;
ds.format = AUDIO_F32;
ds.channels = 1;
ds.samples = 4096;
ds.callback = callback;
ds.userdata = &ad; // metadata for frequency
SDL_AudioSpec os;
dev = SDL_OpenAudioDevice(NULL, 0, &ds, &os, SDL_AUDIO_ALLOW_FORMAT_CHANGE);
}
~Speaker()
{
SDL_CloseAudioDevice(dev);
SDL_Quit();
}
void pushBeep(float frequency, int duration)
{
ad.frequency = frequency; // set the frequency for the beep
SDL_PauseAudioDevice(dev, 0);
SDL_Delay(duration); // wait while sound is playing
SDL_PauseAudioDevice(dev, 1);
}
void pushBeep2(float frequency, int duration )
{
int nSamples = duration * SAMPLE_RATE / 1000;
in_buffer = new float[nSamples];
buffer_length = nSamples;
for (int idx = 0; idx < nSamples; idx++) {
double time = (double)idx / (double)SAMPLE_RATE;
in_buffer[idx] = (float)(AMPLITUDE * std::sin(2.0f * M_PI * frequency * time));
}
SDL_QueueAudio(dev, in_buffer, nSamples * sizeof(float));
SDL_PauseAudioDevice(dev, 0);
while(running){
while(SDL_PollEvent(&event)!=0);
}
delete[] in_buffer;
}
private:
SDL_AudioDeviceID dev;
AudioData ad;
int sampleNum = 0;
};
} // End of namespace AudioGen
int main(int argc, char *argv[])
{
AudioGen::Speaker speaker;
//speaker.pushBeep(440, 1000);
speaker.pushBeep2(440.0f, 1000);
return 0;
}
I have an image and want to encode it with OpenH264.
So far this is the code I derived from their wiki:
#include <fstream>
#include <iterator>
#include <iostream>
#include <codec_api.h> //standard api for openh264
//additional libaries used by sample code
#include <codec_app_def.h>
#include <codec_def.h>
#include <codec_ver.h>
#include <assert.h>
#include <vector>
#include <cstring>
int main()
{
//parameter values
int width = 1920;
int height = 1080;
int framerate = 60;
int bitrate = 5000000;
int total_num = 500; //what does this value do?
//end parameter values
//Read in the File from bmp
std::vector<char> buf; //to store the image information
std::basic_ifstream<char> file("/home/megamol/Git/h264_sample/build/test.bmp", std::ios::binary); //opens bitstream to source
buf = std::vector<char>((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); // reads in data to the vector
std::cout << "sizeof buf: " << buf.size() << std::endl;
//Step 1: set up Encoder
ISVCEncoder* encoder_; //declaration of encoder pointer
int rv = WelsCreateSVCEncoder (&encoder_);
//Step 2: initialize with basic parameter
SEncParamBase param;
memset(¶m, 0, sizeof (SEncParamBase));
param.iUsageType = EUsageType::SCREEN_CONTENT_REAL_TIME;
param.fMaxFrameRate = framerate;
param.iPicWidth = width;
param.iPicHeight = height;
param.iTargetBitrate = bitrate; //default value of example
encoder_->Initialize(¶m);
//Step 3: set video format
int videoFormat = videoFormatI420;
encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
//Step 4: encocode and store output bitstream
int frameSize = width * height * 3 / 2;
buf.resize(frameSize);
SFrameBSInfo info;
std::vector<char> compressedData;
memset (&info, 0, sizeof (SFrameBSInfo));
SSourcePicture pic;
memset (&pic, 0, sizeof (SSourcePicture));
pic.iPicWidth = width;
pic.iPicHeight = height;
pic.iColorFormat = videoFormatI420;
pic.iStride[0] = pic.iPicWidth;
pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
pic.pData[0] = reinterpret_cast<unsigned char*>(&buf[0]);
pic.pData[1] = pic.pData[0] + width * height;
pic.pData[2] = pic.pData[1] + (width * height >> 2);
//encodes the frame
rv = encoder_->EncodeFrame (&pic, &info); // encodes the Frame
//encoding done encoded Frame should be stored in &info
//begin decoding block
ISVCDecoder *pSvcDecoder;
unsigned char *pBuf= &info;
return 0;
}
I'm not entirely sure whether this is the correct usage of OpenH264 but I'm also not sure how to test it properly.
Now the code example is kind of poorly documented.
What is BufferedData buf; for example? I get that that's supposed to be the input but what is that type? Like how do I load my test.bmp as BufferedData? I don't think that I'm doing that correctly yet.
Another thing I'm pretty confused about is how do I access the output after the encoding? In the example it just says //output bitstream and nothing about saving this output anywhere. I thought the output was info like it says in the codec_api.h header file:
/**
* #brief Encode one frame
* #param kpSrcPic the pointer to the source luminance plane
* chrominance data:
* CbData = kpSrc + m_iMaxPicWidth * m_iMaxPicHeight;
* CrData = CbData + (m_iMaxPicWidth * m_iMaxPicHeight)/4;
* the application calling this interface needs to ensure the data validation between the location
* #param pBsInfo output bit stream
* #return 0 - success; otherwise -failed;
*/
virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) = 0;
But apparently it only saves informations about the output. I'm just really confused about all of this.
Based on https://github.com/cisco/openh264/blob/master/codec/console/enc/src/welsenc.cpp
#include <codec_api.h>
#include <cassert>
#include <cstring>
#include <vector>
#include <fstream>
#include <iostream>
//Tested with OpenCV 3.3
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
using namespace std;
using namespace cv;
int main()
{
ISVCEncoder *encoder_ = nullptr;
int rv = WelsCreateSVCEncoder (&encoder_);
assert (0==rv);
assert (encoder_ != nullptr);
int width = 640;
int height = 480;
int total_num = 100;
SEncParamBase param;
memset (¶m, 0, sizeof (SEncParamBase));
param.iUsageType = CAMERA_VIDEO_REAL_TIME;
param.fMaxFrameRate = 30;
param.iPicWidth = width;
param.iPicHeight = height;
param.iTargetBitrate = 5000000;
encoder_->Initialize (¶m);
Mat image = imread("test.jpg", IMREAD_COLOR );
Mat imageResized, imageYuv, imageYuvMini;
resize(image, imageResized, Size(width, height));
Mat imageYuvCh[3], imageYuvMiniCh[3];
cvtColor(imageResized, imageYuv, cv::COLOR_BGR2YUV);
split(imageYuv, imageYuvCh);
resize(imageYuv, imageYuvMini, Size(width/2, height/2));
split(imageYuvMini, imageYuvMiniCh);
SFrameBSInfo info;
memset (&info, 0, sizeof (SFrameBSInfo));
SSourcePicture pic;
memset (&pic, 0, sizeof (SSourcePicture));
pic.iPicWidth = width;
pic.iPicHeight = height;
pic.iColorFormat = videoFormatI420;
pic.iStride[0] = imageYuvCh[0].step;
pic.iStride[1] = imageYuvMiniCh[1].step;
pic.iStride[2] = imageYuvMiniCh[2].step;
pic.pData[0] = imageYuvCh[0].data;
pic.pData[1] = imageYuvMiniCh[1].data;
pic.pData[2] = imageYuvMiniCh[2].data;
ofstream outFi;
outFi.open ("test.264", ios::out | ios::binary);
for(int num = 0; num<total_num; num++)
{
//prepare input data
rv = encoder_->EncodeFrame (&pic, &info);
assert (rv == cmResultSuccess);
if (info.eFrameType != videoFrameTypeSkip /*&& cbk != nullptr*/)
{
//output bitstream
for (int iLayer=0; iLayer < info.iLayerNum; iLayer++)
{
SLayerBSInfo* pLayerBsInfo = &info.sLayerInfo[iLayer];
int iLayerSize = 0;
int iNalIdx = pLayerBsInfo->iNalCount - 1;
do {
iLayerSize += pLayerBsInfo->pNalLengthInByte[iNalIdx];
--iNalIdx;
} while (iNalIdx >= 0);
unsigned char *outBuf = pLayerBsInfo->pBsBuf;
outFi.write((char *)outBuf, iLayerSize);
}
}
}
if (encoder_) {
encoder_->Uninitialize();
WelsDestroySVCEncoder (encoder_);
}
outFi.close();
}
I try to use extern function in Halide. In my context, I want to do it on GPU.
I compile in AOT compilation with opencl statement.
Of course, opencl can still use CPU, so I use this:
halide_set_ocl_device_type("gpu");
For now, everything is schedule at compute_root().
First question, if I use compute_root() and OpenCL gpu, did my process will be compute on the device with some CopyHtoD and DtoH? (Or it will be on Host buffer)
Second question, more related to the extern functions. We use some extern call because some of our algorithm is not in Halide.
Extern call:
foo.define_extern("cool_foo", args, Float(32), 4);
Extern retrieve:
extern "C" int cool_foo(buffer_t * in, int w, int h, int z, buffer_t * out){ .. }
But, in the cool_foo function, my buffer_t are load only in host memory. The dev address is 0 (default).
If I try to copy the memory before the algorithm:
halide_copy_to_dev(NULL, &in);
It does nothing.
If I make available only the device memory:
in.host = NULL;
My host pointer are null, but the device address is still 0.
(dev_dirty is true on my case and host_dirty is false)
Any idea?
EDIT (To answer dsharlet)
Here's the structure of my code:
Parse data correctly on CPU. --> Sent the buffer on the GPU (Using halide_copy_to_dev...) --> Enter in Halide structure, read parameter and Add a boundary condition --> Go in my extern function -->...
I don't have a valid buffer_t in my extern function.
I schedule everything in compute_root(), but use HL_TARGET=host-opencl and set ocl to gpu.
Before entering in Halide, I can read my device address and it's ok.
Here's my code:
Before Halide, everything was CPU stuff(The pointer) and we transfert it to GPU
buffer_t k = { 0, (uint8_t *) k_full, {w_k, h_k, num_patch_x * num_patch_y * 3}, {1, w_k, w_k * h_k}, {0}, sizeof(float), };
#if defined( USEGPU )
// Transfer into GPU
halide_copy_to_dev(NULL, &k);
k.host_dirty = false;
k.dev_dirty = true;
//k.host = NULL; // It's k_full
#endif
halide_func(&k)
Inside Halide:
ImageParam ...
Func process;
process = halide_sub_func(k, width, height, k.channels());
process.compute_root();
...
Func halide_sub_func(ImageParam k, Expr width, Expr height, Expr patches)
{
Func kBounded("kBounded"), kShifted("kShifted"), khat("khat"), khat_tuple("khat_tuple");
kBounded = repeat_image(constant_exterior(k, 0.0f), 0, width, 0, height, 0, patches);
kShifted(x, y, pi) = kBounded(x + k.width() / 2, y + k.height() / 2, pi);
khat = extern_func(kShifted, width, height, patches);
khat_tuple(x, y, pi) = Tuple(khat(0, x, y, pi), khat(1, x, y, pi));
kShifted.compute_root();
khat.compute_root();
return khat_tuple;
}
Outside Halide(Extern function):
inline ....
{
//The buffer_t.dev and .host are 0 and null. I expect a null from the host, but the dev..
}
I find the solution for my problem.
I post the answer in code just here. (Since I did a little offline test, the variable name doesn't match)
Inside Halide: (Halide_func.cpp)
#include <Halide.h>
using namespace Halide;
using namespace Halide::BoundaryConditions;
Func thirdPartyFunction(ImageParam f);
Func fourthPartyFunction(ImageParam f);
Var x, y;
int main(int argc, char **argv) {
// Input:
ImageParam f( Float( 32 ), 2, "f" );
printf(" Argument: %d\n",argc);
int test = atoi(argv[1]);
if (test == 1) {
Func f1;
f1(x, y) = f(x, y) + 1.0f;
f1.gpu_tile(x, 256);
std::vector<Argument> args( 1 );
args[ 0 ] = f;
f1.compile_to_file("halide_func", args);
} else if (test == 2) {
Func fOutput("fOutput");
Func fBounded("fBounded");
fBounded = repeat_image(f, 0, f.width(), 0, f.height());
fOutput(x, y) = fBounded(x-1, y) + 1.0f;
fOutput.gpu_tile(x, 256);
std::vector<Argument> args( 1 );
args[ 0 ] = f;
fOutput.compile_to_file("halide_func", args);
} else if (test == 3) {
Func h("hOut");
h = thirdPartyFunction(f);
h.gpu_tile(x, 256);
std::vector<Argument> args( 1 );
args[ 0 ] = f;
h.compile_to_file("halide_func", args);
} else {
Func h("hOut");
h = fourthPartyFunction(f);
std::vector<Argument> args( 1 );
args[ 0 ] = f;
h.compile_to_file("halide_func", args);
}
}
Func thirdPartyFunction(ImageParam f) {
Func g("g");
Func fBounded("fBounded");
Func h("h");
//Boundary
fBounded = repeat_image(f, 0, f.width(), 0, f.height());
g(x, y) = fBounded(x-1, y) + 1.0f;
h(x, y) = g(x, y) - 1.0f;
// Need to be comment out if you want to use GPU schedule.
//g.compute_root(); //At least one stage schedule alone
//h.compute_root();
return h;
}
Func fourthPartyFunction(ImageParam f) {
Func fBounded("fBounded");
Func g("g");
Func h("h");
//Boundary
fBounded = repeat_image(f, 0, f.width(), 0, f.height());
// Preprocess
g(x, y) = fBounded(x-1, y) + 1.0f;
g.compute_root();
g.gpu_tile(x, y, 256, 1);
// Extern
std::vector < ExternFuncArgument > args = { g, f.width(), f.height() };
h.define_extern("extern_func", args, Int(16), 3);
h.compute_root();
return h;
}
The external function: (external_func.h)
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cinttypes>
#include <cstring>
#include <fstream>
#include <map>
#include <vector>
#include <complex>
#include <chrono>
#include <iostream>
#include <clFFT.h> // All OpenCL I need are include.
using namespace std;
// Useful stuff.
void completeDetails2D(buffer_t buffer) {
// Read all elements:
std::cout << "Buffer information:" << std::endl;
std::cout << "Extent: " << buffer.extent[0] << ", " << buffer.extent[1] << std::endl;
std::cout << "Stride: " << buffer.stride[0] << ", " << buffer.stride[1] << std::endl;
std::cout << "Min: " << buffer.min[0] << ", " << buffer.min[1] << std::endl;
std::cout << "Elem size: " << buffer.elem_size << std::endl;
std::cout << "Host dirty: " << buffer.host_dirty << ", Dev dirty: " << buffer.dev_dirty << std::endl;
printf("Host pointer: %p, Dev pointer: %" PRIu64 "\n\n\n", buffer.host, buffer.dev);
}
extern cl_context _ZN6Halide7Runtime8Internal11weak_cl_ctxE;
extern cl_command_queue _ZN6Halide7Runtime8Internal9weak_cl_qE;
extern "C" int extern_func(buffer_t * in, int width, int height, buffer_t * out)
{
printf("In extern\n");
completeDetails2D(*in);
printf("Out extern\n");
completeDetails2D(*out);
if(in->dev == 0) {
// Boundary stuff
in->min[0] = 0;
in->min[1] = 0;
in->extent[0] = width;
in->extent[1] = height;
return 0;
}
// Super awesome stuff on GPU
// ...
cl_context & ctx = _ZN6Halide7Runtime8Internal11weak_cl_ctxE; // Found by zougloub
cl_command_queue & queue = _ZN6Halide7Runtime8Internal9weak_cl_qE; // Same
printf("ctx: %p\n", ctx);
printf("queue: %p\n", queue);
cl_mem buffer_in;
buffer_in = (cl_mem) in->dev;
cl_mem buffer_out;
buffer_out = (cl_mem) out->dev;
// Just copying data from one buffer to another
int err = clEnqueueCopyBuffer(queue, buffer_in, buffer_out, 0, 0, 256*256*4, 0, NULL, NULL);
printf("copy: %d\n", err);
err = clFinish(queue);
printf("finish: %d\n\n", err);
return 0;
}
Finally, the non-Halide stuff: (Halide_test.cpp)
#include <halide_func.h>
#include <iostream>
#include <cinttypes>
#include <external_func.h>
// Extern function available inside the .o generated.
#include "HalideRuntime.h"
int main(int argc, char **argv) {
// Init the kernel in GPU
halide_set_ocl_device_type("gpu");
// Create a buffer
int width = 256;
int height = 256;
float * bufferHostIn = (float*) malloc(sizeof(float) * width * height);
float * bufferHostOut = (float*) malloc(sizeof(float) * width * height);
for( int j = 0; j < height; ++j) {
for( int i = 0; i < width; ++i) {
bufferHostIn[i + j * width] = i+j;
}
}
buffer_t bufferHalideIn = {0, (uint8_t *) bufferHostIn, {width, height}, {1, width, width * height}, {0, 0}, sizeof(float), true, false};
buffer_t bufferHalideOut = {0, (uint8_t *) bufferHostOut, {width, height}, {1, width, width * height}, {0, 0}, sizeof(float), true, false};
printf("IN\n");
completeDetails2D(bufferHalideIn);
printf("Data (host): ");
for(int i = 0; i < 10; ++ i) {
printf(" %f, ", bufferHostIn[i]);
}
printf("\n");
printf("OUT\n");
completeDetails2D(bufferHalideOut);
// Send to GPU
halide_copy_to_dev(NULL, &bufferHalideIn);
halide_copy_to_dev(NULL, &bufferHalideOut);
bufferHalideIn.host_dirty = false;
bufferHalideIn.dev_dirty = true;
bufferHalideOut.host_dirty = false;
bufferHalideOut.dev_dirty = true;
// TRICKS Halide to force the use of device.
bufferHalideIn.host = NULL;
bufferHalideOut.host = NULL;
printf("IN After device\n");
completeDetails2D(bufferHalideIn);
// Halide function
halide_func(&bufferHalideIn, &bufferHalideOut);
// Get back to HOST
bufferHalideIn.host = (uint8_t*)bufferHostIn;
bufferHalideOut.host = (uint8_t*)bufferHostOut;
halide_copy_to_host(NULL, &bufferHalideOut);
halide_copy_to_host(NULL, &bufferHalideIn);
// Validation
printf("\nOUT\n");
completeDetails2D(bufferHalideOut);
printf("Data (host): ");
for(int i = 0; i < 10; ++ i) {
printf(" %f, ", bufferHostOut[i]);
}
printf("\n");
// Free all
free(bufferHostIn);
free(bufferHostOut);
}
You can compile the halide_func with the test 4 to use all the Extern functionnality.
Here's some of the conclusion I have. (Thanks to Zalman and zougloub)
Compute_root don't call the device if you use it alone.
We need gpu() of gpu_tile() in the code to call GPU routine. (BTW, you need to put all your variable inside)
gpu_tile les than your item will crash your stuff.
BoundaryCondition works well in GPU.
Before calling extern function, the Func that goes as a input need to be:
f.compute_root(); f.gpu_tile(x,y,...,...); The compute_root in the middle stage is not implicit.
If the dev address is 0, it's normal, we resend the dimension and the extern will be called again.
Last stage as a compute_root() implicit.
Are you aware of the bounds inference protocol for external array functions? This takes place when the host pointer of any buffer is NULL. (Briefly, in this case, you need to fill in the extent fields of the buffer_t structures that have NULL host pointers and do nothing else.) If you have already taken care of that, then ignore the above.
If you've tested that the host pointers are non-NULL for all buffers, then calling halide_copy_to_dev should work. You may need to explicitly set host_dirty to true beforehand to get the copy part to happen, depending where the buffer came from. (I would hope Halide gets this right and it is already set if the buffer came from a previous pipeline stage on the CPU. But if the buffer came from something outside Halide, the dirty bits are probably false from initialization. It seems halide_dev_malloc should set dev_dirty if it allocates device memory, and currently it does not.)
I would expect the dev field to be populated after a call to halide_copy_to_dev as the first thing it does is call halide_dev_malloc. You can try calling halide_dev_malloc explicitly yourself, setting host_dirty and then calling halide_copy_to_dev.
Is the previous stage on the host or on the GPU? If it is on the GPU, I'd expect the input buffer to be on the GPU as well.
This API needs work. I am in the middle of a first refactoring of somethings that will help, but ultimately it will require changing the buffer_t structure. It is possible to get most things to work, but it requires a modifying the host_dirty and dev_dirty bits as well as calling the halide_dev* APIs in just the right way. Thank you for your patience.
The instructions for libjpeg-turbo here describes the TurboJPEG API thus: "This API wraps libjpeg-turbo and provides an easy-to-use interface for compressing and decompressing JPEG images in memory". Great, but are there some solid examples of using this API available? Just looking to decompress a fairly vanilla jpeg in memory.
I've found a few bits such as https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c, which appears to be using the TurboJPEG API, but are there any more solid/varied examples?
The source for libjpeg-turbo is well documented, so that does help.
Ok, I know that you did already solve your problem, but as some people, just like me, could be searching some simple example I will share what I created.
It is an example, compressing and decompressing an RGB image. Otherwise I think that the API documentation of TurboJPEG is quite easy to understand!
Compression:
#include <turbojpeg.h>
const int JPEG_QUALITY = 75;
const int COLOR_COMPONENTS = 3;
int _width = 1920;
int _height = 1080;
long unsigned int _jpegSize = 0;
unsigned char* _compressedImage = NULL; //!< Memory is allocated by tjCompress2 if _jpegSize == 0
unsigned char buffer[_width*_height*COLOR_COMPONENTS]; //!< Contains the uncompressed image
tjhandle _jpegCompressor = tjInitCompress();
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
tjDestroy(_jpegCompressor);
//to free the memory allocated by TurboJPEG (either by tjAlloc(),
//or by the Compress/Decompress) after you are done working on it:
tjFree(&_compressedImage);
After that you have the compressed image in _compressedImage.
To decompress you have to do the following:
Decompression:
#include <turbojpeg.h>
long unsigned int _jpegSize; //!< _jpegSize from above
unsigned char* _compressedImage; //!< _compressedImage from above
int jpegSubsamp, width, height;
unsigned char buffer[width*height*COLOR_COMPONENTS]; //!< will contain the decompressed image
tjhandle _jpegDecompressor = tjInitDecompress();
tjDecompressHeader2(_jpegDecompressor, _compressedImage, _jpegSize, &width, &height, &jpegSubsamp);
tjDecompress2(_jpegDecompressor, _compressedImage, _jpegSize, buffer, width, 0/*pitch*/, height, TJPF_RGB, TJFLAG_FASTDCT);
tjDestroy(_jpegDecompressor);
Some random thoughts:
I just came back over this as I am writing my bachelor thesis, and I noticed that if you run the compression in a loop it is preferable to store the biggest size of the JPEG buffer to not have to allocate a new one every turn. Basically, instead of doing:
long unsigned int _jpegSize = 0;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &_jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
we would add an object variable, holding the size of the allocated memory long unsigned int _jpegBufferSize = 0; and before every compression round we would set the jpegSize back to that value:
long unsigned int jpegSize = _jpegBufferSize;
tjCompress2(_jpegCompressor, buffer, _width, 0, _height, TJPF_RGB,
&_compressedImage, &jpegSize, TJSAMP_444, JPEG_QUALITY,
TJFLAG_FASTDCT);
_jpegBufferSize = _jpegBufferSize >= jpegSize? _jpegBufferSize : jpegSize;
after the compression one would compare the memory size with the actual jpegSize and set it to the jpegSize if it is higher than the previous memory size.
I ended up using below code as a working example for both JPEG encoding and decoding. Best example that I can find, it's self-contained that initializes a dummy image and output the encoded image to a local file.
Below code is NOT my own, credit goes to https://sourceforge.net/p/libjpeg-turbo/discussion/1086868/thread/e402d36f/#8722 . Posting it here again to help anyone finds it's difficult to get libjpeg turbo working.
#include "turbojpeg.h"
#include <iostream>
#include <string.h>
#include <errno.h>
using namespace std;
int main(void)
{
unsigned char *srcBuf; //passed in as a param containing pixel data in RGB pixel interleaved format
tjhandle handle = tjInitCompress();
if(handle == NULL)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TJ Error: " << err << " UNABLE TO INIT TJ Compressor Object\n";
return -1;
}
int jpegQual =92;
int width = 128;
int height = 128;
int nbands = 3;
int flags = 0;
unsigned char* jpegBuf = NULL;
int pitch = width * nbands;
int pixelFormat = TJPF_GRAY;
int jpegSubsamp = TJSAMP_GRAY;
if(nbands == 3)
{
pixelFormat = TJPF_RGB;
jpegSubsamp = TJSAMP_411;
}
unsigned long jpegSize = 0;
srcBuf = new unsigned char[width * height * nbands];
for(int j = 0; j < height; j++)
{
for(int i = 0; i < width; i++)
{
srcBuf[(j * width + i) * nbands + 0] = (i) % 256;
srcBuf[(j * width + i) * nbands + 1] = (j) % 256;
srcBuf[(j * width + i) * nbands + 2] = (j + i) % 256;
}
}
int tj_stat = tjCompress2( handle, srcBuf, width, pitch, height,
pixelFormat, &(jpegBuf), &jpegSize, jpegSubsamp, jpegQual, flags);
if(tj_stat != 0)
{
const char *err = (const char *) tjGetErrorStr();
cerr << "TurboJPEG Error: " << err << " UNABLE TO COMPRESS JPEG IMAGE\n";
tjDestroy(handle);
handle = NULL;
return -1;
}
FILE *file = fopen("out.jpg", "wb");
if (!file) {
cerr << "Could not open JPEG file: " << strerror(errno);
return -1;
}
if (fwrite(jpegBuf, jpegSize, 1, file) < 1) {
cerr << "Could not write JPEG file: " << strerror(errno);
return -1;
}
fclose(file);
//write out the compress date to the image file
//cleanup
int tjstat = tjDestroy(handle); //should deallocate data buffer
handle = 0;
}
In the end I used a combination of random code found on the internet (e.g. https://github.com/erlyvideo/jpeg/blob/master/c_src/jpeg.c) and the .c and header files for libjeg-turbo, which are well documented.
This official API is a good information source aswell.
Here's a fragment of code what I use to load jpeg's from memory. Maybe it will require a bit of fixing, because I extracted it from different files in my project. It will load both - grayscale and rgb images (bpp will be set either to 1 or to 3).
struct Image
{
int bpp;
int width;
int height;
unsigned char* data;
};
struct jerror_mgr
{
jpeg_error_mgr base;
jmp_buf jmp;
};
METHODDEF(void) jerror_exit(j_common_ptr jinfo)
{
jerror_mgr* err = (jerror_mgr*)jinfo->err;
longjmp(err->jmp, 1);
}
METHODDEF(void) joutput_message(j_common_ptr)
{
}
bool Image_LoadJpeg(Image* image, unsigned char* img_data, unsigned int img_size)
{
jpeg_decompress_struct jinfo;
jerror_mgr jerr;
jinfo.err = jpeg_std_error(&jerr.base);
jerr.base.error_exit = jerror_exit;
jerr.base.output_message = joutput_message;
jpeg_create_decompress(&jinfo);
image->data = NULL;
if (setjmp(jerr.jmp)) goto bail;
jpeg_mem_src(&jinfo, img_data, img_size);
if (jpeg_read_header(&jinfo, TRUE) != JPEG_HEADER_OK) goto bail;
jinfo.dct_method = JDCT_FLOAT; // change this to JDCT_ISLOW on Android/iOS
if (!jpeg_start_decompress(&jinfo)) goto bail;
if (jinfo.num_components != 1 && jinfo.num_components != 3) goto bail;
image->data = new (std::nothrow) unsigned char [jinfo.output_width * jinfo.output_height * jinfo.output_components];
if (!image->data) goto bail;
{
JSAMPROW ptr = image->data;
while (jinfo.output_scanline < jinfo.output_height)
{
if (jpeg_read_scanlines(&jinfo, &ptr, 1) != 1) goto bail;
ptr += jinfo.output_width * jinfo.output_components;
}
}
if (!jpeg_finish_decompress(&jinfo)) goto bail;
image->bpp = jinfo.output_components;
image->width = jinfo.output_width;
image->height = jinfo.output_height;
jpeg_destroy_decompress(&jinfo);
return true;
bail:
jpeg_destroy_decompress(&jinfo);
if (image->data) delete [] data;
return false;
}