create video from images with animation - c++

I have ten images to create a video with animation , like that
https://youtu.be/iu9uazolcC8
it use freeImage and FFmpeg.
MovieWriter writer(fileName, IMG_WIDTH, IMG_HEIGHT);
vector<char*> ls = {
"D:/createVideo/src/c/1.jpg",
"D:/createVideo/src/c/2.jpg",
"D:/createVideo/src/c/3.jpg",
"D:/createVideo/src/c/4.jpg",
"D:/createVideo/src/c/5.jpg",
"D:/createVideo/src/c/6.jpg",
"D:/createVideo/src/c/7.jpg",
"D:/createVideo/src/c/8.jpg",
"D:/createVideo/src/c/9.jpg",
"D:/createVideo/src/c/10.jpg",
};
FREE_IMAGE_FORMAT pic_type = FIF_JPEG;
FIBITMAP *fiCopy = NULL;
FIBITMAP *corpImg = NULL;
uint8_t *ut = NULL;
for (int i=0;i<10;i++)
{
char *src_pic_path = ls[i];
FIBITMAP *fi = FreeImage_Load(pic_type, src_pic_path);
if(fi==NULL){
continue;
}
int w = IMG_WIDTH;
int h = IMG_HEIGHT;
unsigned int index = 0;
while (w > IMG_MIN_W_SCALE || h > IMG_MIN_H_SCALE)
{
fiCopy = FreeImage_RescaleRect(fi, IMG_WIDTH, IMG_HEIGHT,0, 0, w * 2, h * 2);
if(fiCopy == NULL){
break;
}
corpImg = FreeImage_Rescale(fiCopy, IMG_WIDTH, IMG_HEIGHT);
if(corpImg == NULL){
break;
}
ut = FreeImage_GetBits(corpImg);
if(ut == NULL){
break;
}
w -= ONE_W_PIECE;
h -= ONE_H_PIECE;
writer.addFrame(ut);
FreeImage_Unload(corpImg);
FreeImage_Unload(fiCopy);
index++;
}
}
but it so slowly, how to optimization that code? or how to faster to create that video?
would you know about an option to do that?

Related

Encoding AAC with ffmpeg (c++)

I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:
Setup:
int initialize_encoding_audio(const char *filename)
{
int ret;
AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
avcodec_register_all();
av_register_all();
aud_codec = avcodec_find_encoder(aud_codec_id);
avcodec_register(aud_codec);
if (!aud_codec)
return COULD_NOT_FIND_AUD_CODEC;
aud_codec_context = avcodec_alloc_context3(aud_codec);
if (!aud_codec_context)
return CONTEXT_CREATION_ERROR;
aud_codec_context->bit_rate = 192000;
aud_codec_context->sample_rate = select_sample_rate(aud_codec);
aud_codec_context->sample_fmt = sample_fmt;
aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
aud_codec_context->codec = aud_codec;
aud_codec_context->codec_id = aud_codec_id;
ret = avcodec_open2(aud_codec_context, aud_codec, NULL);
if (ret < 0)
return COULD_NOT_OPEN_AUD_CODEC;
outctx = avformat_alloc_context();
ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);
outctx->audio_codec = aud_codec;
outctx->audio_codec_id = aud_codec_id;
audio_st = avformat_new_stream(outctx, aud_codec);
audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
audio_st->codecpar->channels = aud_codec_context->channels;
audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
audio_st->codecpar->codec_id = aud_codec_id;
audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
audio_st->codecpar->format = sample_fmt;
audio_st->codecpar->frame_size = aud_codec_context->frame_size;
audio_st->codecpar->block_align = aud_codec_context->block_align;
audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
outctx->streams = new AVStream*[1];
outctx->streams[0] = audio_st;
av_dump_format(outctx, 0, filename, 1);
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
return COULD_NOT_OPEN_FILE;
}
ret = avformat_write_header(outctx, NULL);
aud_frame = av_frame_alloc();
aud_frame->nb_samples = aud_codec_context->frame_size;
aud_frame->format = aud_codec_context->sample_fmt;
aud_frame->channel_layout = aud_codec_context->channel_layout;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);
if (!aud_frame)
return COULD_NOT_ALLOCATE_FRAME;
aud_frame_counter = 0;
return 0;
}
Encoding:
int encode_audio_samples(uint8_t **aud_samples)
{
int ret;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
{
aud_frame->data[0][i] = aud_samples[0][i];
aud_frame->data[1][i] = aud_samples[1][i];
}
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
if (ret < 0)
return ERROR_ENCODING_SAMPLES_SEND;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);
pkt.stream_index = audio_st->index;
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_SAMPLES_RECEIVE;
else
break;
}
return 0;
}
Finish encoding:
int finish_audio_encoding()
{
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
int ret = avcodec_send_frame(aud_codec_context, NULL);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == -AVERROR(AVERROR_EOF))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
}
av_write_trailer(outctx);
}
Main:
void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
int j, i;
float v;
for (j = 0; j < frame_size; j++)
{
v = sin(*t);
*left_samples = v;
*right_samples = v;
left_samples++;
right_samples++;
*t += *tincr;
*tincr += *tincr2;
}
}
int main()
{
int frame_rate = 30; // this should be like 96000 / 1024 or somthing i guess?
float t, tincr, tincr2;
initialize_encoding_audio("audio.mp4");
int sec = 50;
float_t** aud_samples;
int src_samples_linesize;
int src_nb_samples = 1024;
int src_channels = 2;
int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);
t = 0;
tincr = 0;
tincr2 = 0;
for (size_t i = 0; i < frame_rate * sec; i++)
{
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
encode_audio_samples((uint8_t **)aud_samples);
}
finish_audio_encoding();
//cleanup();
return 0;
}
I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.
Thanks in advance!
Edit: the whole source: http://pastebin.com/jYtmkhek
Edit2: Added initialization of tincr & tincr2
Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.
float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.
Also, avformat_new_stream() creates the stream for you, don't do it with new.
Update:
I removed all the c++ stuff to test this. Here's the code that works: pastebin
And here's the resulting file: audio.mp4
ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg
Diff:
1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
<
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
< SwsContext *sws_ctx;
< SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
< AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
< AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
> enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
> enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
< outctx->streams = new AVStream*[1];
< outctx->streams[0] = audio_st;
---
> //outctx->streams = new AVStream*[1];
> //outctx->streams[0] = audio_st;
182c177
< while (true)
---
> while (1)
216c211
< while (true)
---
> while (1)
291c286
< float t, tincr, tincr2;
---
> float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
< }

How to create grayscale video with avconv?

I've been looking for some time for a quick and reliable way of creating grayscale videos with avconv library from frames that are captured/created with OpenCV, in a C++ application.
I know that OpenCV have it's internal way of creating videos, however, it has some encoding performance and options limitations.
So, in this way, I would like to know which are the options for accomplishing this task?
For accomplishing this task, one of the options that I've found, which complies with my needs is the following class:
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/mathematics.h>
}
class AVConvVideoMaker
{
AVCodec* codec;
AVCodecContext* context;
AVFrame* picture;
int imgWidth, imgHeight, imgBufferSize;
FILE* outputFile;
int outputBufferSize;
uint8_t* outputBuffer;
int pictureBufferSize;
uint8_t* pictureBuffer;
int outputSize;
public:
AVConvVideoMaker(std::string outputFilePath, int imgWidth, int imgHeight)
: codec(NULL)
, context(NULL)
, picture(NULL)
, imgWidth(imgWidth)
, imgHeight(imgHeight)
, imgBufferSize(imgWidth*imgHeight)
, outputFile(fopen(outputFilePath.c_str(), "wb"))
, outputBufferSize(100000)
, outputBuffer(new uint8_t[outputBufferSize])
, pictureBufferSize((imgBufferSize*3)/2)
, pictureBuffer(new uint8_t[pictureBufferSize])
, outputSize(0)
{
avcodec_register_all();
this->codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO);
if (!this->codec)
{
throw std::runtime_error("Codec not found");
}
this->context = avcodec_alloc_context3(codec);
this->picture = avcodec_alloc_frame();
this->context->bit_rate = 400000;
this->context->width = this->imgWidth;
this->context->height = this->imgHeight;
this->context->time_base = (AVRational){1, 25};
this->context->gop_size = 10;
this->context->max_b_frames = 1;
this->context->pix_fmt = PIX_FMT_YUV420P;
if(avcodec_open2(this->context, this->codec, NULL) < 0)
{
throw std::runtime_error("Could not open codec");
}
if(!this->outputFile)
{
throw std::runtime_error("Could not open video output file");
}
this->picture->data[0] = this->pictureBuffer;
this->picture->data[1] = this->picture->data[0] + imgBufferSize;
this->picture->data[2] = this->picture->data[1] + imgBufferSize / 4;
this->picture->linesize[0] = this->imgWidth;
this->picture->linesize[1] = this->imgWidth / 2;
this->picture->linesize[2] = this->imgWidth / 2;
}
void insertFrame(cv::Mat1b& img)
{
fflush(stdout);
/* Y */
for(int y=0; y < this->context->height; y++)
{
for(int x=0; x < this->context->width; x++)
{
this->picture->data[0][y * picture->linesize[0] + x] = img.at<uchar>(y,x);
}
}
/* Cb and Cr */
for(int y=0; y < this->context->height/2; y++)
{
for(int x=0; x < this->context->width/2; x++)
{
this->picture->data[1][y * this->picture->linesize[1] + x] = 128;
this->picture->data[2][y * this->picture->linesize[2] + x] = 128;
}
}
this->outputSize = avcodec_encode_video(this->context, this->outputBuffer, this->outputBufferSize, this->picture);
fwrite(this->outputBuffer, 1, outputSize, this->outputFile);
}
~AVConvVideoMaker()
{
this->outputBuffer[0] = 0x00;
this->outputBuffer[1] = 0x00;
this->outputBuffer[2] = 0x01;
this->outputBuffer[3] = 0xb7;
fwrite(this->outputBuffer, 1, 4, this->outputFile);
fclose(this->outputFile);
avcodec_close(this->context);
av_free(this->context);
av_free(this->picture);
delete outputBuffer;
delete pictureBuffer;
}
};
For compiling this in Ubuntu 16.04, you have to link with:
g++ --std=c++11 main.cpp -lopencv_core -lopencv_highgui -lavutil -lavcodec

SDL text box highlighting text

I am making a program in C++ that uses SDL to display text boxes. I've made the basic functions of my text boxes like typing text, limitations, copy/paste, etc. but I am stuck at make it able to highlight text.
Here's how I draw textboxes:
void draw_text_box(SDL_Surface *screan, char *message, struct textbox *text_box, int r, int g, int b)
{
int temp_width;
int temp_frame;
SDL_Surface *text;
SDL_Color textcolor = {0,0,0,0};
SDL_Rect temp_location, clr_box;
temp_width = text_box->location.w; //preserve the correct width
temp_location = text_box->location;
temp_location.y += 4;
temp_location.h = 12;
clr_box = text_box->location;
//colors!
textcolor.r = r;
textcolor.g = g;
textcolor.b = b;
//text = TTF_RenderText_Solid(ttf_font, text_box->prev_message, textcolor);
//SDL_BlitSurface( text, NULL, screan, &text_box->location);//<-changes "location" width to width of text
//clear
SDL_FillRect(screan, &clr_box, SDL_MapRGB(screen->format, 0, 0, 0));
//strcpy(text_box->prev_message, message);
//set the text
text = TTF_RenderText_Solid(ttf_font, message, textcolor);
if(!text) return;
//set the offset
temp_location.x = text_box->last_shift;
temp_location.y = 0;
temp_location.h = text->h;
if(text_box->cursor_loc - text_box->last_shift > temp_location.w)
temp_location.x = text_box->cursor_loc - temp_location.w;
if(text_box->cursor_loc - 3 < text_box->last_shift)
temp_location.x = text_box->cursor_loc - 3;
if(temp_location.x < 0) temp_location.x = 0;
text_box->last_shift = temp_location.x;
//draw
SDL_BlitSurface( text, &temp_location, screan, &text_box->location);//<-changes "location" width to width of text
text_box->location.w = temp_width;
}
Setting the text:
void set_text_box_text(SDL_Surface *screan, struct textbox *text_box, char *message)
{
if(text_box->message != message)
strcpy(text_box->message, message);
text_box->char_amt = strlen(message);
text_box->cursor = text_box->char_amt;
text_box->last_shift = 0;
if (screan) //if not NULL
{
if (text_box->selected)
select_text_box(screan,text_box);
else
unselect_text_box(screan,text_box);
}
else
{
;//text_box->prev_message[0] = '\0';
}
}
Select and deselect the textbox:
void select_text_box(SDL_Surface *screan, struct textbox *text_box)
{
char temp_message[405] = "";
char temp_left[405];
int i;
text_box->selected = 1;
if (text_box->pass_char == '\0')
{
if (text_box->cursor != 0)
{
left(temp_left, text_box->message, text_box->cursor);
strcat(temp_message, temp_left);
}
temp_message[text_box->cursor] = '|';
temp_message[text_box->cursor + 1] = '\0';
right(temp_left, text_box->message, text_box->cursor);
strcat(temp_message, temp_left);
}
else
{
for(i=0;i<text_box->cursor;i++)
temp_message[i] = text_box->pass_char;
temp_message[i] = '|';
for(i++;i<text_box->char_amt + 1;i++)
temp_message[i] = text_box->pass_char;
temp_message[i] = '\0';
}
draw_text_box(screan, temp_message, text_box, 250, 250, 250);
}
void unselect_text_box(SDL_Surface *screan, struct textbox *text_box)
{
char temp_message[405];
int i;
text_box->selected = 0;
if (text_box->pass_char == '\0')
draw_text_box(screan, text_box->message, text_box, 250, 250, 250);
else
{
for(i=0;i<text_box->char_amt;i++)
temp_message[i] = text_box->pass_char;
temp_message[i] = '\0';
draw_text_box(screan, temp_message, text_box, 250, 250, 250);
}
}

What is the fastest way to extract a thumbnail from an image?

I want to extract a thumbnail from an image, so I tried to use GDI+. What I did was to create a new small Bitmap to hold the thumbnail, and using Graphics::DrawImage() to draw the bitmap into it (and hence getting a thumbnail).
But when using this approach on a large number of images, it gets really slow. So is there a way to speed it up (by using only Windows API and not an external library).
Note: I know that some images store a thumbnail inside of them, but I am looking to extract a thumbnail from the ones that don't.
You can increase speed by loading thumbnail from EXIF (for JPEG and some other formats). But GetThumbnailImage's result isn't good. Here is another implementation:
Usage:
Gdiplus::Bitmap* thumb = GetThumbnail(filename, thumbWidth, thumbHeight);
Code:
PropertyItem* GetPropertyItemFromImage(Gdiplus::Image* bm, PROPID propId) {
UINT itemSize = bm->GetPropertyItemSize(propId);
if (!itemSize) {
return 0;
}
PropertyItem* item = reinterpret_cast<PropertyItem*>(malloc(itemSize));
if (bm->GetPropertyItem(propId, itemSize, item) != Ok) {
free(item);
return 0;
}
return item;
}
UINT VoidToInt(void* data, unsigned int size) {
switch (size) {
case 8:
return *reinterpret_cast<UINT*>(data);
case 4:
return *reinterpret_cast<DWORD*>(data);
case 2:
return *reinterpret_cast<WORD*>(data);
default:
return *reinterpret_cast<BYTE*>(data);
}
}
typedef IStream * (STDAPICALLTYPE *SHCreateMemStreamFuncType)(const BYTE *pInit, UINT cbInit);
SHCreateMemStreamFuncType SHCreateMemStreamFunc = 0;
bool IsVista() {
static int isVista = -1;
if (isVista == -1)
{
OSVERSIONINFO osver;
osver.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
isVista = (::GetVersionEx(&osver) &&
osver.dwPlatformId == VER_PLATFORM_WIN32_NT &&
(osver.dwMajorVersion >= 6));
}
return isVista != FALSE;
}
Gdiplus::Bitmap* BitmapFromMemory(BYTE* data, unsigned int imageSize) {
if (WinUtils::IsVista()) {
if (!SHCreateMemStreamFunc) {
HMODULE lib = LoadLibrary(_T("Shlwapi.dll"));
SHCreateMemStreamFunc = reinterpret_cast<SHCreateMemStreamFuncType>(GetProcAddress(lib, "SHCreateMemStream"));
if (!SHCreateMemStreamFunc) {
return 0;
}
}
Gdiplus::Bitmap * bitmap;
IStream* pStream = SHCreateMemStreamFunc(data, imageSize);
if (pStream) {
bitmap = Gdiplus::Bitmap::FromStream(pStream);
pStream->Release();
if (bitmap) {
if (bitmap->GetLastStatus() == Gdiplus::Ok) {
return bitmap;
}
delete bitmap;
}
}
} else {
HGLOBAL buffer = ::GlobalAlloc(GMEM_MOVEABLE, imageSize);
if (buffer) {
void* pBuffer = ::GlobalLock(buffer);
if (pBuffer) {
Gdiplus::Bitmap * bitmap;
CopyMemory(pBuffer, data, imageSize);
IStream* pStream = NULL;
if (::CreateStreamOnHGlobal(buffer, FALSE, &pStream) == S_OK) {
bitmap = Gdiplus::Bitmap::FromStream(pStream);
pStream->Release();
if (bitmap) {
if (bitmap->GetLastStatus() == Gdiplus::Ok) {
return bitmap;
}
delete bitmap;
}
}
::GlobalUnlock(buffer);
}
::GlobalFree(buffer);
}
}
return 0;
}
// Based on original method from http://danbystrom.se/2009/01/05/imagegetthumbnailimage-and-beyond/
Gdiplus::Bitmap* GetThumbnail(Gdiplus::Image* bm, int width, int height, Gdiplus::Size* realSize = 0) {
using namespace Gdiplus;
if (realSize) {
realSize->Width = bm->GetWidth();
realSize->Height = bm->GetHeight();
}
Size sz = AdaptProportionalSize(Size(width, height), Size(bm->GetWidth(), bm->GetHeight()));
Bitmap* res = new Bitmap(sz.Width, sz.Height);
Graphics gr(res);
gr.SetInterpolationMode(Gdiplus::InterpolationModeHighQualityBicubic);
UINT size = bm->GetPropertyItemSize(PropertyTagThumbnailData);
if (size) {
// Loading thumbnail from EXIF data (fast)
enum ThumbCompression { ThumbCompressionJPEG, ThumbCompressionRGB, ThumbCompressionYCbCr, ThumbCompressionUnknown }
compression = ThumbCompressionJPEG;
PropertyItem* thumbnailFormatItem = GetPropertyItemFromImage(bm, PropertyTagThumbnailFormat);
if (thumbnailFormatItem) {
UINT format = VoidToInt(thumbnailFormatItem->value, thumbnailFormatItem->length);
if (format == 0) {
compression = ThumbCompressionRGB;
} else if (format == 1) {
compression = ThumbCompressionJPEG;
} else {
compression = ThumbCompressionUnknown;
}
free(thumbnailFormatItem);
} else {
PropertyItem* compressionItem = GetPropertyItemFromImage(bm, PropertyTagThumbnailCompression);
if (compressionItem) {
WORD compressionTag = *reinterpret_cast<WORD*>(compressionItem->value);
if (compressionTag == 1) {
compression = ThumbCompressionRGB;
PropertyItem* photometricInterpretationItem = GetPropertyItemFromImage(bm, PropertyTagPhotometricInterp);
if (photometricInterpretationItem) {
WORD photoMetricInterpretationTag = VoidToInt(photometricInterpretationItem->value, photometricInterpretationItem->length);
free(photometricInterpretationItem);
if (photoMetricInterpretationTag == 6) {
compression = ThumbCompressionYCbCr;
}
}
} else if (compressionTag == 6) {
compression = ThumbCompressionJPEG;
}
free(compressionItem);
}
}
int originalThumbWidth = 0, originalThumbHeight = 0;
if (compression == ThumbCompressionJPEG || compression == ThumbCompressionRGB) {
PropertyItem* thumbDataItem = GetPropertyItemFromImage(bm, PropertyTagThumbnailData);
if (thumbDataItem) {
if (compression == ThumbCompressionJPEG) {
Bitmap* src = BitmapFromMemory(reinterpret_cast<BYTE*>(thumbDataItem->value), thumbDataItem->length);
if (src) {
gr.DrawImage(src, 0, 0, sz.Width, sz.Height);
delete src;
free(thumbDataItem);
return res;
}
} else if (compression == ThumbCompressionRGB) {
PropertyItem* widthItem = GetPropertyItemFromImage(bm, PropertyTagThumbnailImageWidth);
if (widthItem) {
originalThumbWidth = VoidToInt(widthItem->value, widthItem->length);
free(widthItem);
}
PropertyItem* heightItem = GetPropertyItemFromImage(bm, PropertyTagThumbnailImageHeight);
if (heightItem) {
originalThumbHeight = VoidToInt(heightItem->value, heightItem->length);
free(heightItem);
}
if (originalThumbWidth && originalThumbHeight) {
BITMAPINFOHEADER bih;
memset(&bih, 0, sizeof(bih));
bih.biSize = sizeof(bih);
bih.biWidth = originalThumbWidth;
bih.biHeight = -originalThumbHeight;
bih.biPlanes = 1;
bih.biBitCount = 24;
BITMAPINFO bi;
memset(&bi, 0, sizeof(bi));
bi.bmiHeader = bih;
BYTE* data = reinterpret_cast<BYTE*>(thumbDataItem->value);
BYTE temp;
// Convert RGB to BGR
for (unsigned int offset = 0; offset < thumbDataItem->length; offset += 3) {
temp = data[offset];
data[offset] = data[offset + 2];
data[offset + 2] = temp;
}
Bitmap src(&bi, thumbDataItem->value);
if (src.GetLastStatus() == Ok) {
gr.DrawImage(&src, 0, 0, sz.Width, sz.Height);
free(thumbDataItem);
return res;
}
}
} else {
// other type of compression not implemented
}
free(thumbDataItem);
}
}
}
// Fallback - Load full image and draw it (slow)
gr.DrawImage(bm, 0, 0, sz.Width, sz.Height);
return res;
}
Gdiplus::Bitmap* GetThumbnail(const CString& filename, int width, int height, Gdiplus::Size* realSize) {
using namespace Gdiplus;
Image bm(filename);
if (bm.GetLastStatus() != Ok) {
return 0;
}
return GetThumbnail(&bm, width, height, realSize);
}
Gdiplus::Size AdaptProportionalSize(const Gdiplus::Size& szMax, const Gdiplus::Size& szReal)
{
int nWidth;
int nHeight;
double sMaxRatio;
double sRealRatio;
if (szMax.Width < 1 || szMax.Height < 1 || szReal.Width < 1 || szReal.Height < 1)
return Size();
sMaxRatio = szMax.Width / static_cast<double>(szMax.Height);
sRealRatio = szReal.Width / static_cast<double>(szReal.Height);
if (sMaxRatio < sRealRatio) {
nWidth = min(szMax.Width, szReal.Width);
nHeight = static_cast<int>(round(nWidth / sRealRatio));
} else {
nHeight = min(szMax.Height, szReal.Height);
nWidth = static_cast<int>(round(nHeight * sRealRatio));
}
return Size(nWidth, nHeight);
}

Some new version of opencv_performance for opencv_traincascade?

I have noted that the cascades trained with the program opencv_traincascade does not run with the current version of opencv_performance. I've tried to convert the old performance cpp file to load the new types of cascades, but without success. The code is here:
#include "cv.h"
#include "highgui.h"
#include <cstdio>
#include <cmath>
#include <ctime>
#include <math.h>
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
#include <stdio.h>
#ifndef PATH_MAX
#define PATH_MAX 512
#endif /* PATH_MAX */
/*typedef struct HidCascade {
int size;
int count;
} HidCascade;
*/
typedef struct ObjectPos {
float x;
float y;
float width;
int found; /* for reference */
int neghbors;
} ObjectPos;
using namespace std;
using namespace cv;
int main(int argc, char* argv[]) {
int i, j;
char* classifierdir = NULL;
//char* samplesdir = NULL;
int saveDetected = 1;
double scale_factor = 1.1;
float maxSizeDiff = 1.5F;
float maxPosDiff = 1.1F;
/* number of stages. if <=0 all stages are used */
//int nos = -1, nos0;
int width = 25;
int height = 15;
int rocsize;
FILE* info;
FILE* resultados;
char* infoname;
char fullname[PATH_MAX];
//char detfilename[PATH_MAX];
char* filename;
//char detname[] = "det-";
CascadeClassifier cascade;
double totaltime;
if (!(resultados = fopen("resultados.txt", "w"))) {
printf("Cannot create results file.\n");
exit(-1);
}
infoname = (char*) "";
rocsize = 20;
if (argc == 1) {
printf("Usage: %s\n -data <classifier_directory_name>\n"
" -info <collection_file_name>\n"
" [-maxSizeDiff <max_size_difference = %f>]\n"
" [-maxPosDiff <max_position_difference = %f>]\n"
" [-sf <scale_factor = %f>]\n"
" [-ni]\n"
" [-rs <roc_size = %d>]\n"
" [-w <sample_width = %d>]\n"
" [-h <sample_height = %d>]\n", argv[0], maxSizeDiff,
maxPosDiff, scale_factor, rocsize, width, height);
return 0;
}
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-data")) {
classifierdir = argv[++i];
} else if (!strcmp(argv[i], "-info")) {
infoname = argv[++i];
} else if (!strcmp(argv[i], "-maxSizeDiff")) {
maxSizeDiff = (float) atof(argv[++i]);
} else if (!strcmp(argv[i], "-maxPosDiff")) {
maxPosDiff = (float) atof(argv[++i]);
} else if (!strcmp(argv[i], "-sf")) {
scale_factor = atof(argv[++i]);
} else if (!strcmp(argv[i], "-ni")) {
saveDetected = 0;
} else if (!strcmp(argv[i], "-rs")) {
rocsize = atoi(argv[++i]);
} else if (!strcmp(argv[i], "-w")) {
width = atoi(argv[++i]);
} else if (!strcmp(argv[i], "-h")) {
height = atoi(argv[++i]);
}
}
if (!cascade.load(classifierdir)) {
printf("Unable to load classifier from %s\n", classifierdir);
return 1;
}
strcpy(fullname, infoname);
filename = strrchr(fullname, '\\');
if (filename == NULL) {
filename = strrchr(fullname, '/');
}
if (filename == NULL) {
filename = fullname;
} else {
filename++;
}
info = fopen(infoname, "r");
totaltime = 0.0;
if (info != NULL) {
int x, y, width, height;
Mat img;
int hits, missed, falseAlarms;
int totalHits, totalMissed, totalFalseAlarms;
int found;
float distance;
int refcount;
ObjectPos* ref;
int detcount;
ObjectPos* det;
int error = 0;
int* pos;
int* neg;
pos = (int*) cvAlloc(rocsize * sizeof(*pos));
neg = (int*) cvAlloc(rocsize * sizeof(*neg));
for (i = 0; i < rocsize; i++) {
pos[i] = neg[i] = 0;
}
printf("+================================+======+======+======+\n");
printf("| File Name | Hits |Missed| False|\n");
printf("+================================+======+======+======+\n");
fprintf(resultados,
"+================================+======+======+======+\n");
fprintf(resultados,
"| File Name | Hits |Missed| False|\n");
fprintf(resultados,
"+================================+======+======+======+\n");
//fprintf (resultados, "%d\n",framesCnt);
totalHits = totalMissed = totalFalseAlarms = 0;
while (!feof(info)) {
fscanf(info, "%s %d", filename, &refcount);
img = imread(fullname);
if (!img.data) {
cout << "ow" << endl;
return -1;
}
ref = (ObjectPos*) cvAlloc(refcount * sizeof(*ref));
for (i = 0; i < refcount; i++) {
error = (fscanf(info, "%d %d %d %d", &x, &y, &width, &height)
!= 4);
if (error)
break;
ref[i].x = 0.5F * width + x;
ref[i].y = 0.5F * height + y;
ref[i].width = sqrt(0.5F * (width * width + height * height));
ref[i].found = 0;
ref[i].neghbors = 0; //in the new cascade, where to get the neighbors?
}
vector<Rect> obj_detectados;
Rect retang;
if (!error) {
totaltime -= time(0);
cascade.detectMultiScale(img, obj_detectados, scale_factor, 4, 0
//|CV_HAAR_FIND_BIGGEST_OBJECT
// |CV_HAAR_DO_ROUGH_SEARCH
| CV_HAAR_SCALE_IMAGE, Size(25, 15));
totaltime += time(0);
if (obj_detectados.size() == 0) {
detcount = 0;
} else {
detcount = obj_detectados.size();
}
det = (detcount > 0) ?
((ObjectPos*) cvAlloc(detcount * sizeof(*det))) : NULL;
hits = missed = falseAlarms = 0;
for (vector<Rect>::const_iterator r = obj_detectados.begin();
r != obj_detectados.end(); r++, i++) {
Point r1, r2;
r1.x = (r->x);
r1.y = (r->y);
r2.x = (r->x + r->width);
r2.y = (r->y + r->height);
retang.x = r1.x;
retang.y = r1.y;
retang.width = abs(r2.x - r1.x);
retang.height = abs(r2.y - r1.y);
if (saveDetected) {
rectangle(img, retang, Scalar(0, 0, 255), 3, CV_AA);
}
det[i].x = 0.5F*r->width + r->x;
det[i].y = 0.5F*r->height + r->y;
det[i].width = sqrt(0.5F * (r->width * r->width
+ r->height * r->height));
det[i].neghbors = 1; // i don't know if it will work...
// det[i].neghbors = r.neighbors; --- how to do it in the new version??
found = 0;
for (j = 0; j < refcount; j++) {
distance = sqrtf( (det[i].x - ref[j].x) * (det[i].x - ref[j].x) +
(det[i].y - ref[j].y) * (det[i].y - ref[j].y) );
//cout << distance << endl;
if( (distance < ref[j].width * maxPosDiff) &&
(det[i].width > ref[j].width / maxSizeDiff) &&
(det[i].width < ref[j].width * maxSizeDiff) )
{
ref[j].found = 1;
ref[j].neghbors = MAX( ref[j].neghbors, det[i].neghbors );
found = 1;
}
}
if (!found) {
falseAlarms++;
neg[MIN(det[i].neghbors, rocsize - 1)]++;
//neg[MIN(0, rocsize - 1)]++;
}
}
//imshow("teste", img);
if (saveDetected) {
//strcpy(detfilename, detname);
//strcat(detfilename, filename);
//strcpy(filename, detfilename);
imwrite(fullname, img);
//cvvSaveImage(fullname, img);
}
for (j = 0; j < refcount; j++) {
if (ref[j].found) {
hits++;
//pos[MIN(0, rocsize - 1)]++;
pos[MIN(ref[j].neghbors, rocsize - 1)]++;
} else {
missed++;
}
}
totalHits += hits;
totalMissed += missed;
totalFalseAlarms += falseAlarms;
printf("|%32.64s|%6d|%6d|%6d|\n", filename, hits, missed,
falseAlarms);
//printf("+--------------------------------+------+------+------+\n");
fprintf(resultados, "|%32.64s|%6d|%6d|%6d|\n", filename, hits,
missed, falseAlarms);
//fprintf(resultados,
// "+--------------------------------+------+------+------+\n");
fflush(stdout);
if (det) {
cvFree( &det);
det = NULL;
}
} /* if( !error ) */
//char c = (char) waitKey(10);
// if (c == 27)
// exit(0);
cvFree( &ref);
}
fclose(info);
printf("|%32.32s|%6d|%6d|%6d|\n", "Total", totalHits, totalMissed,
totalFalseAlarms);
fprintf(resultados, "|%32.32s|%6d|%6d|%6d|\n", "Total", totalHits,
totalMissed, totalFalseAlarms);
printf("+================================+======+======+======+\n");
fprintf(resultados,
"+================================+======+======+======+\n");
//printf("Number of stages: %d\n", nos);
//printf("Number of weak classifiers: %d\n", numclassifiers[nos - 1]);
printf("Total time: %f\n", totaltime);
fprintf(resultados, "Total time: %f\n", totaltime);
/* print ROC to stdout */
for (i = rocsize - 1; i > 0; i--) {
pos[i - 1] += pos[i];
neg[i - 1] += neg[i];
}
//fprintf(stderr, "%d\n", nos);
for (i = 0; i < rocsize; i++) {
fprintf(stderr, "\t%d\t%d\t%f\t%f\n", pos[i], neg[i],
((float) pos[i]) / (totalHits + totalMissed),
((float) neg[i]) / (totalHits + totalMissed));
}
cvFree( &pos);
cvFree( &neg);
}
return 0;
}
My doubt is about the det[i].neghbors = r.neighbors; in the old performance.cpp. How I retrieve the neighbors in this new version?
Anyone could help me to convert opencv_performance to run the new cascades from opencv_traincascade?
Many thanks!