I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:
Setup:
int initialize_encoding_audio(const char *filename)
{
int ret;
AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
avcodec_register_all();
av_register_all();
aud_codec = avcodec_find_encoder(aud_codec_id);
avcodec_register(aud_codec);
if (!aud_codec)
return COULD_NOT_FIND_AUD_CODEC;
aud_codec_context = avcodec_alloc_context3(aud_codec);
if (!aud_codec_context)
return CONTEXT_CREATION_ERROR;
aud_codec_context->bit_rate = 192000;
aud_codec_context->sample_rate = select_sample_rate(aud_codec);
aud_codec_context->sample_fmt = sample_fmt;
aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
aud_codec_context->codec = aud_codec;
aud_codec_context->codec_id = aud_codec_id;
ret = avcodec_open2(aud_codec_context, aud_codec, NULL);
if (ret < 0)
return COULD_NOT_OPEN_AUD_CODEC;
outctx = avformat_alloc_context();
ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);
outctx->audio_codec = aud_codec;
outctx->audio_codec_id = aud_codec_id;
audio_st = avformat_new_stream(outctx, aud_codec);
audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
audio_st->codecpar->channels = aud_codec_context->channels;
audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
audio_st->codecpar->codec_id = aud_codec_id;
audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
audio_st->codecpar->format = sample_fmt;
audio_st->codecpar->frame_size = aud_codec_context->frame_size;
audio_st->codecpar->block_align = aud_codec_context->block_align;
audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
outctx->streams = new AVStream*[1];
outctx->streams[0] = audio_st;
av_dump_format(outctx, 0, filename, 1);
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
return COULD_NOT_OPEN_FILE;
}
ret = avformat_write_header(outctx, NULL);
aud_frame = av_frame_alloc();
aud_frame->nb_samples = aud_codec_context->frame_size;
aud_frame->format = aud_codec_context->sample_fmt;
aud_frame->channel_layout = aud_codec_context->channel_layout;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);
if (!aud_frame)
return COULD_NOT_ALLOCATE_FRAME;
aud_frame_counter = 0;
return 0;
}
Encoding:
int encode_audio_samples(uint8_t **aud_samples)
{
int ret;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
{
aud_frame->data[0][i] = aud_samples[0][i];
aud_frame->data[1][i] = aud_samples[1][i];
}
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
if (ret < 0)
return ERROR_ENCODING_SAMPLES_SEND;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);
pkt.stream_index = audio_st->index;
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_SAMPLES_RECEIVE;
else
break;
}
return 0;
}
Finish encoding:
int finish_audio_encoding()
{
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
int ret = avcodec_send_frame(aud_codec_context, NULL);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == -AVERROR(AVERROR_EOF))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
}
av_write_trailer(outctx);
}
Main:
void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
int j, i;
float v;
for (j = 0; j < frame_size; j++)
{
v = sin(*t);
*left_samples = v;
*right_samples = v;
left_samples++;
right_samples++;
*t += *tincr;
*tincr += *tincr2;
}
}
int main()
{
int frame_rate = 30; // this should be like 96000 / 1024 or somthing i guess?
float t, tincr, tincr2;
initialize_encoding_audio("audio.mp4");
int sec = 50;
float_t** aud_samples;
int src_samples_linesize;
int src_nb_samples = 1024;
int src_channels = 2;
int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);
t = 0;
tincr = 0;
tincr2 = 0;
for (size_t i = 0; i < frame_rate * sec; i++)
{
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
encode_audio_samples((uint8_t **)aud_samples);
}
finish_audio_encoding();
//cleanup();
return 0;
}
I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.
Thanks in advance!
Edit: the whole source: http://pastebin.com/jYtmkhek
Edit2: Added initialization of tincr & tincr2
Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.
float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.
Also, avformat_new_stream() creates the stream for you, don't do it with new.
Update:
I removed all the c++ stuff to test this. Here's the code that works: pastebin
And here's the resulting file: audio.mp4
ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg
Diff:
1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
<
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
< SwsContext *sws_ctx;
< SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
< AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
< AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
> enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
> enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
< outctx->streams = new AVStream*[1];
< outctx->streams[0] = audio_st;
---
> //outctx->streams = new AVStream*[1];
> //outctx->streams[0] = audio_st;
182c177
< while (true)
---
> while (1)
216c211
< while (true)
---
> while (1)
291c286
< float t, tincr, tincr2;
---
> float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
< }
Related
Environment:
Ubuntu 16.04 (x64)
C++
ffmpeg
Use-case
Multiple MPEG-TS fragments are rapidly decoded ( numerous every sec )
The format of the TS fragments is dynamic and can't be known ahead of time
The first A/V frames of each fragment are needed to be extracted
Problem statement
The code bellow successfully decodes A/V, BUT, has a huge memory leak ( MBytes/sec )
According to the docs seems all memory is freed as it should ( does it... ? )
Why do I get this huge mem leak, what am I missing in the following code snap ?
struct MEDIA_TYPE {
ffmpeg::AVMediaType eType;
union {
struct {
ffmpeg::AVPixelFormat colorspace;
int width, height;
float fFPS;
} video;
struct : WAVEFORMATEX {
short sSampleFormat;
} audio;
} format;
};
struct FRAME {
enum { MAX_PALNES = 3 + 1 };
int iStrmId;
int64_t pts; // Duration in 90Khz clock resolution
uint8_t** ppData; // Null terminated
int32_t* pStride;// Zero terminated
};
HRESULT ProcessTS(IN Operation op, IN uint8_t* pTS, IN uint32_t uiBytes, bool(*cb)(IN const MEDIA_TYPE& mt, IN FRAME& frame, IN PVOID pCtx), IN PVOID pCbCtx)
{
uiBytes -= uiBytes % 188;// align to 188 packet size
struct CONTEXT {
uint8_t* pTS;
uint32_t uiBytes;
int32_t iPos;
} ctx = { pTS, uiBytes, 0 };
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this);
ffmpeg::AVFormatContext* pFmtCtx = 0;
if (0 == (pFmtCtx = ffmpeg::avformat_alloc_context()))
return E_OUTOFMEMORY;
ffmpeg::AVIOContext* pIoCtx = ffmpeg::avio_alloc_context(pTS, uiBytes, 0, &ctx
, [](void *opaque, uint8_t *buf, int buf_size)->int {
auto pCtx = (CONTEXT*)opaque;
int size = pCtx->uiBytes;
if (pCtx->uiBytes - pCtx->iPos < buf_size)
size = pCtx->uiBytes - pCtx->iPos;
if (size > 0) {
memcpy(buf, pCtx->pTS + pCtx->iPos, size);
pCtx->iPos += size;
}
return size;
}
, 0
, [](void* opaque, int64_t offset, int whence)->int64_t {
auto pCtx = (CONTEXT*)opaque;
switch (whence)
{
case SEEK_SET:
pCtx->iPos = offset;
break;
case SEEK_CUR:
pCtx->iPos += offset;
break;
case SEEK_END:
pCtx->iPos = pCtx->uiBytes - offset;
break;
case AVSEEK_SIZE:
return pCtx->uiBytes;
}
return pCtx->iPos;
});
pFmtCtx->pb = pIoCtx;
int iRet = ffmpeg::avformat_open_input(&pFmtCtx, "fakevideo.ts", m_pInputFmt, 0);
if (ERROR_SUCCESS != iRet) {
assert(false);
pFmtCtx = 0;// a user-supplied AVFormatContext will be freed on failure.
return E_FAIL;
}
struct DecodeContext {
ffmpeg::AVStream* pStream;
ffmpeg::AVCodec* pDecoder;
int iFramesProcessed;
};
HRESULT hr = S_OK;
int iStreamsProcessed = 0;
bool bVideoFound = false;
int64_t ptsLast = 0;
int64_t dtsLast = 0;
auto pContext = (DecodeContext*)alloca(sizeof(DecodeContext) * pFmtCtx->nb_streams);
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++) {
assert(pFmtCtx->streams[i]->index == i);
pContext[i].pStream = pFmtCtx->streams[i];
pContext[i].pDecoder = ffmpeg::avcodec_find_decoder(pFmtCtx->streams[i]->codec->codec_id);
pContext[i].iFramesProcessed= 0;
if (0 == pContext[i].pDecoder)
continue;
if ((iRet = ffmpeg::avcodec_open2(pFmtCtx->streams[i]->codec, pContext[i].pDecoder, NULL)) < 0) {
_ASSERT(FALSE);
hr = E_FAIL;
goto ErrExit;
}
}
while (S_OK == hr) {
ffmpeg::AVFrame* pFrame = 0;
ffmpeg::AVPacket pkt;
ffmpeg::av_init_packet(&pkt);
if (ERROR_SUCCESS != (iRet = ffmpeg::av_read_frame(pFmtCtx, &pkt))) {
hr = E_FAIL;
break;
}
if ((0 == dtsLast) && (0 != pkt.dts))
dtsLast = pkt.dts;
if ((0 == ptsLast) && (0 != pkt.pts))
ptsLast = pkt.pts;
DecodeContext& ctx = pContext[pkt.stream_index];
if (Operation::DECODE_FIRST_FRAME_OF_EACH_STREAM == op) {
if (iStreamsProcessed == pFmtCtx->nb_streams) {
hr = S_FALSE;
goto Next;
}
if (ctx.iFramesProcessed > 0)
goto Next;
iStreamsProcessed++;
}
if (0 == ctx.pDecoder)
goto Next;
if (0 == (pFrame = ffmpeg::av_frame_alloc())) {
hr = E_OUTOFMEMORY;
goto Next;
}
LOGTRACE(TSDecoder, "ProcessTS(%d, 0x%.8x, %d, 0x%.8x, 0x%.8x), this=0x%.8x, decode, S:%d, T:%d\r\n", (int)op, pTS, uiBytes, cb, pCbCtx, this, pkt.stream_index, ctx.pStream->codec->codec_type);
int bGotFrame = false;
int iBytesUsed = 0;
MEDIA_TYPE mt;
memset(&mt, 0, sizeof(mt));
mt.eType = ctx.pStream->codec->codec_type;
switch (mt.eType) {
case ffmpeg::AVMediaType::AVMEDIA_TYPE_AUDIO:
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
// FFMPEG AAC decoder oddity, first call to 'avcodec_decode_audio4' results mute audio where the second result the expected audio
bGotFrame = false;
if ((iRet = ffmpeg::avcodec_decode_audio4(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
goto Next;
}
_ASSERT(pkt.size == iRet);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (false == bGotFrame)
goto Next;
iBytesUsed = ctx.pStream->codec->frame_size;
mt.format.audio.nChannels = ctx.pStream->codec->channels;
mt.format.audio.nSamplesPerSec = ctx.pStream->codec->sample_rate;
mt.format.audio.wBitsPerSample = ffmpeg::av_get_bytes_per_sample(ctx.pStream->codec->sample_fmt) * 8;
mt.format.audio.nBlockAlign = mt.format.audio.nChannels * mt.format.audio.wBitsPerSample / 8;
mt.format.audio.sSampleFormat = (short)pFrame->format;
break;
case ffmpeg::AVMediaType::AVMEDIA_TYPE_VIDEO:
if ((iRet = ffmpeg::avcodec_decode_video2(ctx.pStream->codec, pFrame, &bGotFrame, &pkt)) < 0) {
hr = E_FAIL;
break;
}
if (false == bGotFrame)
goto Next;
assert(ffmpeg::AVPixelFormat::AV_PIX_FMT_YUV420P == ctx.pStream->codec->pix_fmt);// Thats is the only color space currently supported
iBytesUsed = (ctx.pStream->codec->width * ctx.pStream->codec->height * 3) / 2;
mt.format.video.width = ctx.pStream->codec->width;
mt.format.video.height = ctx.pStream->codec->height;
mt.format.video.colorspace = ctx.pStream->codec->pix_fmt;
mt.format.video.fFPS = (float)ctx.pStream->codec->framerate.num / ctx.pStream->codec->framerate.den;
bVideoFound = true;
break;
default:
goto Next;
}
ctx.iFramesProcessed++;
{
FRAME f = { ctx.pStream->index, ((0 == ptsLast) ? dtsLast : ptsLast), (uint8_t**)pFrame->data, (int32_t*)pFrame->linesize };
if ((iRet > 0) && (false == cb(mt, f, pCbCtx)))
hr = S_FALSE;// Breaks the loop
}
Next:
ffmpeg::av_free_packet(&pkt);
if (0 != pFrame) {
//ffmpeg::av_frame_unref(pFrame);
ffmpeg::av_frame_free(&pFrame);
pFrame = 0;
}
}
ErrExit:
for (unsigned int i = 0; i < pFmtCtx->nb_streams; i++)
ffmpeg::avcodec_close(pFmtCtx->streams[i]->codec);
pIoCtx->buffer = 0;// We have allocated the buffer, no need for ffmpeg to free it 4 us
pFmtCtx->pb = 0;
ffmpeg::av_free(pIoCtx);
ffmpeg::avformat_close_input(&pFmtCtx);
ffmpeg::avformat_free_context(pFmtCtx);
return hr;
}
You need to unref the packets before reusing them. And there's no need to allocate and deallocate them all the time.
Here's how I do it which might help you:
// Initialise a packet queue
std::list<AVPacket *> packets;
...
for (int c = 0; c < MAX_PACKETS; c++) {
ff->packets.push_back(av_packet_alloc());
}
while (!quit) {
... get packet from queue
int err = av_read_frame(ff->context, packet);
... process packet (audio, video, etc)
av_packet_unref(packet); // add back to queue for reuse
}
// Release packets
while (ff->packets.size()) { // free packets
AVPacket *packet = ff->packets.front();
av_packet_free(&packet);
ff->packets.pop_front();
}
In your code you've freed a packet which wasn't allocated in the first place.
Hi i am trying to record from a board and i have successfully record 4 seconds. Problem is when i try to record for more time, i got an error telling me that there not enough memory. my target is to record a 5 minutes file. Until now i have create a buffer named snIn[256] where are the samples. i send it to a big buffer of [16K * 4sec] and when it is full, i create the wav file.
#include "SAI_InOut.hpp"
#include "F746_GUI.hpp"
#include "Delay.hpp"
#include "WaveformDisplay.hpp"
#include "SDFileSystem.h"
#include "wavfile.h"
using namespace Mikami;
#define RES_STR_SIZE 0x20
#define WAVFILE_SAMPLES_PER_SECOND 16000
#define REC_TIME 4
//Create an SDFileSystem object
SDFileSystem sd("sd");
bool flag = 1;
int count = 0;
char *res_buf;
int rp = 0;
const int NUM_SAMPLES = WAVFILE_SAMPLES_PER_SECOND * REC_TIME;
Array<int16_t> my_buffer(NUM_SAMPLES);
int j = 0;
static const char *target_filename = "/sd/rectest.wav";
const int SEG_SIZE = 256;
int sent_array = 0;
int rec(const char *filename, Array<int16_t> my_buffer)
{
j = 0;
flag = 0;
sent_array = 0;
WavFileResult result;
wavfile_info_t info;
wavfile_data_t data;
WAVFILE_INFO_AUDIO_FORMAT(&info) = 1;
WAVFILE_INFO_NUM_CHANNELS(&info) = 1;
WAVFILE_INFO_SAMPLE_RATE(&info) = WAVFILE_SAMPLES_PER_SECOND;
WAVFILE_INFO_BITS_PER_SAMPLE(&info) = 16;
WAVFILE_INFO_BYTE_RATE(&info) = WAVFILE_INFO_NUM_CHANNELS(&info) * WAVFILE_INFO_SAMPLE_RATE(&info) * (WAVFILE_INFO_BITS_PER_SAMPLE(&info) / 8);
WAVFILE_INFO_BLOCK_ALIGN(&info) = 2;
WAVFILE *wf = wavfile_open(filename, WavFileModeWrite, &result);
if (result != WavFileResultOK) {
wavfile_result_string(result, res_buf, RES_STR_SIZE);
printf("%s", res_buf);
return result;
} else printf ("Open file success \r\n");
rp = 0;
WAVFILE_DATA_NUM_CHANNELS(&data) = 1;
result = wavfile_write_info(wf, &info);
if (result != WavFileResultOK) {
wavfile_result_string(result, res_buf, RES_STR_SIZE);
printf("%s", res_buf);
return result; } else printf ("Write info success \r\n");
while ( rp < NUM_SAMPLES ) {
WAVFILE_DATA_CHANNEL_DATA(&data, 0) = my_buffer[rp];
result = wavfile_write_data(wf, &data);
rp += 1;
}
if (result != WavFileResultOK) {
wavfile_result_string(result, res_buf, RES_STR_SIZE);
printf("%s", res_buf);
return result; } else printf ("Write Data file success \r\n");
result = wavfile_close(wf);
if (result != WavFileResultOK) {
wavfile_result_string(result, res_buf , RES_STR_SIZE);
printf("%s", res_buf);
return result; } else printf ("Close file success \r\n");
//UnMount the filesystem
sd.unmount();
printf("Success rec !\r\n");
return 0;
}
int main()
{
//Mount the filesystem
sd.mount();
const float MAX_DELAY = 0.5f; // 最大遅延,単位:秒
const int FS = I2S_AUDIOFREQ_16K; // 標本化周波数: 16 kHz
const uint32_t MAX_ARRAY_SIZE = (uint32_t)(MAX_DELAY*FS);
SaiIO mySai(SaiIO::BOTH, 256, FS, INPUT_DEVICE_DIGITAL_MICROPHONE_2);
Label myLabel(185, 10, "Delay System", Label::CENTER, Font16);
// ButtonGroup: "ON", "OFF"
const uint16_t BG_LEFT = 370;
const uint16_t BG_WIDTH = 100;
const uint16_t BG_HEIGHT = 45;
ButtonGroup onOff(BG_LEFT, 40, BG_WIDTH/2, BG_HEIGHT,
2, (string[]){"ON", "OFF"}, 0, 0, 2, 1);
const uint16_t SB_LEFT = BG_LEFT - 320;
const uint16_t SB_WIDTH = 270;
const uint16_t SB_Y0 = 240;
char str[20];
sprintf(str, " %3.1f [s]", MAX_DELAY);
SeekBar barDelay(SB_LEFT, SB_Y0, SB_WIDTH,
0, MAX_ARRAY_SIZE, 0, "0", "", str);
NumericLabel<float> labelDelay(SB_LEFT+SB_WIDTH/2, SB_Y0-40, "DELEY: %4.2f", 0, Label::CENTER);
DelaySystem delaySystem(MAX_ARRAY_SIZE);
WaveformDisplay displayIn(*GuiBase::GetLcdPtr(), SB_LEFT+7, 70, 256, 9,LCD_COLOR_WHITE, LCD_COLOR_CYAN,GuiBase::ENUM_BACK);
Label inLabel(SB_LEFT-30, 65, "IN");
WaveformDisplay displayOut(*GuiBase::GetLcdPtr(), SB_LEFT+7, 130, 256, 9,LCD_COLOR_WHITE, LCD_COLOR_CYAN,GuiBase::ENUM_BACK);
Label outLabel(SB_LEFT-30, 125, "OUT");
int runStop = 1;
Array<int16_t> snIn(mySai.GetLength());
Array<int16_t> snOut(mySai.GetLength());
mySai.RecordIn();
mySai.PlayOut();
mySai.PauseOut();
while (true)
{
// On/OFF
int num;
if (onOff.GetTouchedNumber(num))
if (runStop != num)
{
if (num == 0) mySai.ResumeOut();
else mySai.PauseOut();
runStop = num;
}
if (mySai.IsCompleted())
{
for (int n=0; n<mySai.GetLength() ; n++)
{
int16_t xL, xR;
mySai.Input(xL,xR);
int16_t xn = xL + xR;
snIn[n] = xn;
my_buffer[j] = xn;
j++;
if (j == NUM_SAMPLES && flag == 1) {
rec (target_filename , my_buffer); }
int16_t yn = delaySystem.Execute(xn);
mySai.Output(yn, yn);
snOut[n] = yn;
}
mySai.Reset();
displayIn.Execute(snIn);
}
}
}
I thought about a possible solution, to fill directly the "data field" of the wavefile with the snIn[256] buffer (instead of using my_buffer) again and again and at the end close the wavfile. Please let me know what you think about that and other solutions
things to note: 1) while a write operation is being performed, more data is still coming in.
At the very least I would double buffer that data, so can be writing one buffer while the other one fills.
Usually this means using an interrupt to collect the samples (into which ever buffer is currently being filed.)
the foreground program waits for the current buffer to be 'full', then initiates write operation.,
then waits again for a buffer to be 'full'
The interrupt function tracks which buffer is being filled and the current index into that buffer. When a buffer is full, set a 'global' status to let the foreground program know which buffer is ready to be written.
The foreground program writes the buffer, then resets the status for that buffer.
I've been looking for some time for a quick and reliable way of creating grayscale videos with avconv library from frames that are captured/created with OpenCV, in a C++ application.
I know that OpenCV have it's internal way of creating videos, however, it has some encoding performance and options limitations.
So, in this way, I would like to know which are the options for accomplishing this task?
For accomplishing this task, one of the options that I've found, which complies with my needs is the following class:
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/mathematics.h>
}
class AVConvVideoMaker
{
AVCodec* codec;
AVCodecContext* context;
AVFrame* picture;
int imgWidth, imgHeight, imgBufferSize;
FILE* outputFile;
int outputBufferSize;
uint8_t* outputBuffer;
int pictureBufferSize;
uint8_t* pictureBuffer;
int outputSize;
public:
AVConvVideoMaker(std::string outputFilePath, int imgWidth, int imgHeight)
: codec(NULL)
, context(NULL)
, picture(NULL)
, imgWidth(imgWidth)
, imgHeight(imgHeight)
, imgBufferSize(imgWidth*imgHeight)
, outputFile(fopen(outputFilePath.c_str(), "wb"))
, outputBufferSize(100000)
, outputBuffer(new uint8_t[outputBufferSize])
, pictureBufferSize((imgBufferSize*3)/2)
, pictureBuffer(new uint8_t[pictureBufferSize])
, outputSize(0)
{
avcodec_register_all();
this->codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO);
if (!this->codec)
{
throw std::runtime_error("Codec not found");
}
this->context = avcodec_alloc_context3(codec);
this->picture = avcodec_alloc_frame();
this->context->bit_rate = 400000;
this->context->width = this->imgWidth;
this->context->height = this->imgHeight;
this->context->time_base = (AVRational){1, 25};
this->context->gop_size = 10;
this->context->max_b_frames = 1;
this->context->pix_fmt = PIX_FMT_YUV420P;
if(avcodec_open2(this->context, this->codec, NULL) < 0)
{
throw std::runtime_error("Could not open codec");
}
if(!this->outputFile)
{
throw std::runtime_error("Could not open video output file");
}
this->picture->data[0] = this->pictureBuffer;
this->picture->data[1] = this->picture->data[0] + imgBufferSize;
this->picture->data[2] = this->picture->data[1] + imgBufferSize / 4;
this->picture->linesize[0] = this->imgWidth;
this->picture->linesize[1] = this->imgWidth / 2;
this->picture->linesize[2] = this->imgWidth / 2;
}
void insertFrame(cv::Mat1b& img)
{
fflush(stdout);
/* Y */
for(int y=0; y < this->context->height; y++)
{
for(int x=0; x < this->context->width; x++)
{
this->picture->data[0][y * picture->linesize[0] + x] = img.at<uchar>(y,x);
}
}
/* Cb and Cr */
for(int y=0; y < this->context->height/2; y++)
{
for(int x=0; x < this->context->width/2; x++)
{
this->picture->data[1][y * this->picture->linesize[1] + x] = 128;
this->picture->data[2][y * this->picture->linesize[2] + x] = 128;
}
}
this->outputSize = avcodec_encode_video(this->context, this->outputBuffer, this->outputBufferSize, this->picture);
fwrite(this->outputBuffer, 1, outputSize, this->outputFile);
}
~AVConvVideoMaker()
{
this->outputBuffer[0] = 0x00;
this->outputBuffer[1] = 0x00;
this->outputBuffer[2] = 0x01;
this->outputBuffer[3] = 0xb7;
fwrite(this->outputBuffer, 1, 4, this->outputFile);
fclose(this->outputFile);
avcodec_close(this->context);
av_free(this->context);
av_free(this->picture);
delete outputBuffer;
delete pictureBuffer;
}
};
For compiling this in Ubuntu 16.04, you have to link with:
g++ --std=c++11 main.cpp -lopencv_core -lopencv_highgui -lavutil -lavcodec
When I decode frames from avi file and then decode them in x264 and save to mp4 file, the fps of the output file is always 12,800. Therefore the file is played very fast. But, when I save the encoded in h264 frames in avi format and not mp4, so the fps is as I wanted - 25.
What could be the problem?
Here the code I wrote in VS2010:
#include "stdafx.h"
#include "inttypes.h"
extern "C" {
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include <libswscale/swscale.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
#include <iostream>
using namespace std;
int main(int argc, char* argv[])
{
const char* inFileName = "C:\\000227_C1_GAME.avi";
const char* outFileName = "c:\\test.avi";
const char* outFileType = "avi";
av_register_all();
AVFormatContext* inContainer = NULL;
if(avformat_open_input(&inContainer, inFileName, NULL, NULL) < 0)
exit(1);
if(avformat_find_stream_info(inContainer, NULL) < 0)
exit(1);
// Find video stream
int videoStreamIndex = -1;
for (unsigned int i = 0; i < inContainer->nb_streams; ++i)
{
if (inContainer->streams[i] && inContainer->streams[i]->codec &&
inContainer->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoStreamIndex = i;
break;
}
}
if (videoStreamIndex == -1) exit(1);
AVFormatContext* outContainer = NULL;
if(avformat_alloc_output_context2(&outContainer, NULL, outFileType, outFileName) < 0)
exit(1);
// ----------------------------
// Decoder
// ----------------------------
AVStream const *const inStream = inContainer->streams[videoStreamIndex];
AVCodec *const decoder = avcodec_find_decoder(inStream->codec->codec_id);
if(!decoder)
exit(1);
if(avcodec_open2(inStream->codec, decoder, NULL) < 0)
exit(1);
// ----------------------------
// Encoder
// ----------------------------
AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!encoder)
exit(1);
AVStream *outStream = avformat_new_stream(outContainer, encoder);
if(!outStream)
exit(1);
avcodec_get_context_defaults3(outStream->codec, encoder);
// Construct encoder
if(outContainer->oformat->flags & AVFMT_GLOBALHEADER)
outStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
outStream->codec->coder_type = AVMEDIA_TYPE_VIDEO;
outStream->codec->pix_fmt = AV_PIX_FMT_YUV420P;
outStream->codec->width = inStream->codec->width;
outStream->codec->height = inStream->codec->height;
outStream->codec->codec_id = encoder->id;
outStream->codec->bit_rate = 500000;
//outStream->codec->rc_min_rate = 600000;
//outStream->codec->rc_max_rate = 800000;
outStream->codec->time_base.den = 25;
outStream->codec->time_base.num = 1;
outStream->codec->gop_size = 250; // Keyframe interval(=GOP length). Determines maximum distance distance between I-frames
outStream->codec->keyint_min = 25; // minimum GOP size
outStream->codec->max_b_frames = 3;//16; // maximum number of B-frames between non-B-frames
outStream->codec->b_frame_strategy = 1; // decides the best number of B-frames to use. Default mode in x264.
outStream->codec->scenechange_threshold = 40;
outStream->codec->refs = 6; // abillity to reference frames other than the one immediately prior to the current frame. specify how many references can be used.
outStream->codec->qmin = 0;//10;
outStream->codec->qmax = 69;//51;
outStream->codec->qcompress = 0.6;
outStream->codec->max_qdiff = 4;
outStream->codec->i_quant_factor = 1.4;//0.71;
outStream->codec->refs=1;//3;
outStream->codec->chromaoffset = -2;
outStream->codec->thread_count = 1;
outStream->codec->trellis = 1;
outStream->codec->me_range = 16;
outStream->codec->me_method = ME_HEX; //hex
outStream->codec->flags2 |= CODEC_FLAG2_FAST;
outStream->codec->coder_type = 1;
if(outStream->codec->codec_id == AV_CODEC_ID_H264)
{
av_opt_set(outStream->codec->priv_data, "preset", "slow", 0);
}
// Open encoder
if(avcodec_open2(outStream->codec, encoder, NULL) < 0)
exit(1);
// Open output container
if(avio_open(&outContainer->pb, outFileName, AVIO_FLAG_WRITE) < 0)
exit(1);
//close_o
AVFrame *decodedFrame = avcodec_alloc_frame();
if(!decodedFrame)
exit(1);
AVFrame *encodeFrame = avcodec_alloc_frame();
if(!encodeFrame)
exit(1);
encodeFrame->format = outStream->codec->pix_fmt;
encodeFrame->width = outStream->codec->width;
encodeFrame->height = outStream->codec->height;
if(av_image_alloc(encodeFrame->data, encodeFrame->linesize,
outStream->codec->width, outStream->codec->height,
outStream->codec->pix_fmt, 1) < 0)
exit(1);
av_dump_format(inContainer, 0, inFileName,0);
//Write header to ouput container
avformat_write_header(outContainer, NULL);
AVPacket decodePacket, encodedPacket;
int got_frame, len;
while(av_read_frame(inContainer, &decodePacket)>=0)
{
if (decodePacket.stream_index == videoStreamIndex)
{
len = avcodec_decode_video2(inStream->codec, decodedFrame, &got_frame, &decodePacket);
if(len < 0)
exit(1);
if(got_frame)
{
av_init_packet(&encodedPacket);
encodedPacket.data = NULL;
encodedPacket.size = 0;
if(avcodec_encode_video2(outStream->codec, &encodedPacket, decodedFrame, &got_frame) < 0)
exit(1);
if(got_frame)
{
if (outStream->codec->coded_frame->key_frame)
encodedPacket.flags |= AV_PKT_FLAG_KEY;
encodedPacket.stream_index = outStream->index;
if(av_interleaved_write_frame(outContainer, &encodedPacket) < 0)
exit(1);
av_free_packet(&encodedPacket);
}
}
}
av_free_packet(&decodePacket);
}
av_write_trailer(outContainer);
avio_close(outContainer->pb);
avcodec_free_frame(&encodeFrame);
avcodec_free_frame(&decodedFrame);
avformat_free_context(outContainer);
av_close_input_file(inContainer);
return 0;
}
The problem was with PTS and DTS of the packet. Before writing the packet to output( before av_interleaved_write_frame command) set PTS and DTS like this
if (encodedPacket.pts != AV_NOPTS_VALUE)
encodedPacket.pts = av_rescale_q(encodedPacket.pts, outStream->codec->time_base, outStream->time_base);
if (encodedPacket.dts != AV_NOPTS_VALUE)
encodedPacket.dts = av_rescale_q(encodedPacket.dts, outStream->codec->time_base, outStream->time_base);
I have noted that the cascades trained with the program opencv_traincascade does not run with the current version of opencv_performance. I've tried to convert the old performance cpp file to load the new types of cascades, but without success. The code is here:
#include "cv.h"
#include "highgui.h"
#include <cstdio>
#include <cmath>
#include <ctime>
#include <math.h>
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
#include <stdio.h>
#ifndef PATH_MAX
#define PATH_MAX 512
#endif /* PATH_MAX */
/*typedef struct HidCascade {
int size;
int count;
} HidCascade;
*/
typedef struct ObjectPos {
float x;
float y;
float width;
int found; /* for reference */
int neghbors;
} ObjectPos;
using namespace std;
using namespace cv;
int main(int argc, char* argv[]) {
int i, j;
char* classifierdir = NULL;
//char* samplesdir = NULL;
int saveDetected = 1;
double scale_factor = 1.1;
float maxSizeDiff = 1.5F;
float maxPosDiff = 1.1F;
/* number of stages. if <=0 all stages are used */
//int nos = -1, nos0;
int width = 25;
int height = 15;
int rocsize;
FILE* info;
FILE* resultados;
char* infoname;
char fullname[PATH_MAX];
//char detfilename[PATH_MAX];
char* filename;
//char detname[] = "det-";
CascadeClassifier cascade;
double totaltime;
if (!(resultados = fopen("resultados.txt", "w"))) {
printf("Cannot create results file.\n");
exit(-1);
}
infoname = (char*) "";
rocsize = 20;
if (argc == 1) {
printf("Usage: %s\n -data <classifier_directory_name>\n"
" -info <collection_file_name>\n"
" [-maxSizeDiff <max_size_difference = %f>]\n"
" [-maxPosDiff <max_position_difference = %f>]\n"
" [-sf <scale_factor = %f>]\n"
" [-ni]\n"
" [-rs <roc_size = %d>]\n"
" [-w <sample_width = %d>]\n"
" [-h <sample_height = %d>]\n", argv[0], maxSizeDiff,
maxPosDiff, scale_factor, rocsize, width, height);
return 0;
}
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-data")) {
classifierdir = argv[++i];
} else if (!strcmp(argv[i], "-info")) {
infoname = argv[++i];
} else if (!strcmp(argv[i], "-maxSizeDiff")) {
maxSizeDiff = (float) atof(argv[++i]);
} else if (!strcmp(argv[i], "-maxPosDiff")) {
maxPosDiff = (float) atof(argv[++i]);
} else if (!strcmp(argv[i], "-sf")) {
scale_factor = atof(argv[++i]);
} else if (!strcmp(argv[i], "-ni")) {
saveDetected = 0;
} else if (!strcmp(argv[i], "-rs")) {
rocsize = atoi(argv[++i]);
} else if (!strcmp(argv[i], "-w")) {
width = atoi(argv[++i]);
} else if (!strcmp(argv[i], "-h")) {
height = atoi(argv[++i]);
}
}
if (!cascade.load(classifierdir)) {
printf("Unable to load classifier from %s\n", classifierdir);
return 1;
}
strcpy(fullname, infoname);
filename = strrchr(fullname, '\\');
if (filename == NULL) {
filename = strrchr(fullname, '/');
}
if (filename == NULL) {
filename = fullname;
} else {
filename++;
}
info = fopen(infoname, "r");
totaltime = 0.0;
if (info != NULL) {
int x, y, width, height;
Mat img;
int hits, missed, falseAlarms;
int totalHits, totalMissed, totalFalseAlarms;
int found;
float distance;
int refcount;
ObjectPos* ref;
int detcount;
ObjectPos* det;
int error = 0;
int* pos;
int* neg;
pos = (int*) cvAlloc(rocsize * sizeof(*pos));
neg = (int*) cvAlloc(rocsize * sizeof(*neg));
for (i = 0; i < rocsize; i++) {
pos[i] = neg[i] = 0;
}
printf("+================================+======+======+======+\n");
printf("| File Name | Hits |Missed| False|\n");
printf("+================================+======+======+======+\n");
fprintf(resultados,
"+================================+======+======+======+\n");
fprintf(resultados,
"| File Name | Hits |Missed| False|\n");
fprintf(resultados,
"+================================+======+======+======+\n");
//fprintf (resultados, "%d\n",framesCnt);
totalHits = totalMissed = totalFalseAlarms = 0;
while (!feof(info)) {
fscanf(info, "%s %d", filename, &refcount);
img = imread(fullname);
if (!img.data) {
cout << "ow" << endl;
return -1;
}
ref = (ObjectPos*) cvAlloc(refcount * sizeof(*ref));
for (i = 0; i < refcount; i++) {
error = (fscanf(info, "%d %d %d %d", &x, &y, &width, &height)
!= 4);
if (error)
break;
ref[i].x = 0.5F * width + x;
ref[i].y = 0.5F * height + y;
ref[i].width = sqrt(0.5F * (width * width + height * height));
ref[i].found = 0;
ref[i].neghbors = 0; //in the new cascade, where to get the neighbors?
}
vector<Rect> obj_detectados;
Rect retang;
if (!error) {
totaltime -= time(0);
cascade.detectMultiScale(img, obj_detectados, scale_factor, 4, 0
//|CV_HAAR_FIND_BIGGEST_OBJECT
// |CV_HAAR_DO_ROUGH_SEARCH
| CV_HAAR_SCALE_IMAGE, Size(25, 15));
totaltime += time(0);
if (obj_detectados.size() == 0) {
detcount = 0;
} else {
detcount = obj_detectados.size();
}
det = (detcount > 0) ?
((ObjectPos*) cvAlloc(detcount * sizeof(*det))) : NULL;
hits = missed = falseAlarms = 0;
for (vector<Rect>::const_iterator r = obj_detectados.begin();
r != obj_detectados.end(); r++, i++) {
Point r1, r2;
r1.x = (r->x);
r1.y = (r->y);
r2.x = (r->x + r->width);
r2.y = (r->y + r->height);
retang.x = r1.x;
retang.y = r1.y;
retang.width = abs(r2.x - r1.x);
retang.height = abs(r2.y - r1.y);
if (saveDetected) {
rectangle(img, retang, Scalar(0, 0, 255), 3, CV_AA);
}
det[i].x = 0.5F*r->width + r->x;
det[i].y = 0.5F*r->height + r->y;
det[i].width = sqrt(0.5F * (r->width * r->width
+ r->height * r->height));
det[i].neghbors = 1; // i don't know if it will work...
// det[i].neghbors = r.neighbors; --- how to do it in the new version??
found = 0;
for (j = 0; j < refcount; j++) {
distance = sqrtf( (det[i].x - ref[j].x) * (det[i].x - ref[j].x) +
(det[i].y - ref[j].y) * (det[i].y - ref[j].y) );
//cout << distance << endl;
if( (distance < ref[j].width * maxPosDiff) &&
(det[i].width > ref[j].width / maxSizeDiff) &&
(det[i].width < ref[j].width * maxSizeDiff) )
{
ref[j].found = 1;
ref[j].neghbors = MAX( ref[j].neghbors, det[i].neghbors );
found = 1;
}
}
if (!found) {
falseAlarms++;
neg[MIN(det[i].neghbors, rocsize - 1)]++;
//neg[MIN(0, rocsize - 1)]++;
}
}
//imshow("teste", img);
if (saveDetected) {
//strcpy(detfilename, detname);
//strcat(detfilename, filename);
//strcpy(filename, detfilename);
imwrite(fullname, img);
//cvvSaveImage(fullname, img);
}
for (j = 0; j < refcount; j++) {
if (ref[j].found) {
hits++;
//pos[MIN(0, rocsize - 1)]++;
pos[MIN(ref[j].neghbors, rocsize - 1)]++;
} else {
missed++;
}
}
totalHits += hits;
totalMissed += missed;
totalFalseAlarms += falseAlarms;
printf("|%32.64s|%6d|%6d|%6d|\n", filename, hits, missed,
falseAlarms);
//printf("+--------------------------------+------+------+------+\n");
fprintf(resultados, "|%32.64s|%6d|%6d|%6d|\n", filename, hits,
missed, falseAlarms);
//fprintf(resultados,
// "+--------------------------------+------+------+------+\n");
fflush(stdout);
if (det) {
cvFree( &det);
det = NULL;
}
} /* if( !error ) */
//char c = (char) waitKey(10);
// if (c == 27)
// exit(0);
cvFree( &ref);
}
fclose(info);
printf("|%32.32s|%6d|%6d|%6d|\n", "Total", totalHits, totalMissed,
totalFalseAlarms);
fprintf(resultados, "|%32.32s|%6d|%6d|%6d|\n", "Total", totalHits,
totalMissed, totalFalseAlarms);
printf("+================================+======+======+======+\n");
fprintf(resultados,
"+================================+======+======+======+\n");
//printf("Number of stages: %d\n", nos);
//printf("Number of weak classifiers: %d\n", numclassifiers[nos - 1]);
printf("Total time: %f\n", totaltime);
fprintf(resultados, "Total time: %f\n", totaltime);
/* print ROC to stdout */
for (i = rocsize - 1; i > 0; i--) {
pos[i - 1] += pos[i];
neg[i - 1] += neg[i];
}
//fprintf(stderr, "%d\n", nos);
for (i = 0; i < rocsize; i++) {
fprintf(stderr, "\t%d\t%d\t%f\t%f\n", pos[i], neg[i],
((float) pos[i]) / (totalHits + totalMissed),
((float) neg[i]) / (totalHits + totalMissed));
}
cvFree( &pos);
cvFree( &neg);
}
return 0;
}
My doubt is about the det[i].neghbors = r.neighbors; in the old performance.cpp. How I retrieve the neighbors in this new version?
Anyone could help me to convert opencv_performance to run the new cascades from opencv_traincascade?
Many thanks!