Im trying to convert a char* to float* - c++

Im trying to record audio using ALSA and pass it to be processed. The audio sample is returned from this which is char* to a float*
Ive tried so many solutions I think I understand that it's not really a char buffer but a byte buffer but how I get it a float.
This returns the buffer:
const unsigned char* arBuffer(void)
{
return buffer;
}
I need to consume the output of the microphone as a float
int32_t O_DecodeAudioBuffer(float *audioBuffer, int size, void *oxyingObject)
{
Core *oxying = (COxyCore*)oxyingObject;
//Decode audioBuffer to check if begin token is found, we should keep previous buffer to check if token was started in previous
//var mDecoding > 0 when token has been found, once decoding is finished, mDecoding = 0
return oxying->mDecoder->DecodeAudioBuffer(audioBuffer, size);
}
Im writing a program to consume the the above as api:
void* mOxyCore; is declared
I then try and pass the arBuffer() which wouldn't work as expected.
while(arIsRunning())
{
int ret = DecodeAudioBuffer(arBuffer(), arBufferSize(), mCore);
}
The Alsa:
/* Use the newer ALSA API */
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <pthread.h>
#include "settings.h"
#include "audiorecorder.h"
pthread_t thr;
pthread_mutex_t mutex;
snd_pcm_t *handle;
snd_pcm_uframes_t frames;
unsigned char* buffer;
BOOL running;
size_t buffersize;
BOOL arIsRunning(void)
{
return running;
}
void arAcquireBuffer(void)
{
//printf("Acquired buffer\n");
pthread_mutex_lock(&mutex);
}
void arReleaseBuffer(void)
{
//printf("Released buffer\n");
pthread_mutex_unlock(&mutex);
}
const unsigned char* arBuffer(void)
{
return buffer;
}
const size_t arBufferSize(void)
{
return buffersize;
}
void* entry_point(void *arg)
{
int rc;
fprintf(stderr, "Listening...\n");
while (running)
{
arAcquireBuffer();
rc = snd_pcm_readi(handle, buffer, frames);
//stream to stdout - useful for testing/debugging
//write(1, buffer, buffersize);
arReleaseBuffer();
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "overrun occurred\n");
snd_pcm_prepare(handle);
}
else if (rc < 0) {
fprintf(stderr, "error from read: %s\n", snd_strerror(rc));
running = FALSE;
}
else if (rc != (int)frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
}
return NULL;
}
int arInitialise(void)
{
snd_pcm_hw_params_t *params;
unsigned int val;
int rc, dir;
running = FALSE;
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, RECORDER_DEVICE, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0) {
fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
return rc;
}
else
{
fprintf(stderr, "Successfully opened default capture device.\n");
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
fprintf(stderr, "Format set to PCM Signed 16bit Little Endian.\n");
/* Channels */
snd_pcm_hw_params_set_channels(handle, params, NUM_CHANNELS);
fprintf(stderr, "Channels set to %d.\n", NUM_CHANNELS);
/* sampling rate */
val = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
fprintf(stderr, "Samplerate set to %d.\n", val);
/* Set period to FRAMES_PER_BUFFER frames. */
frames = FRAMES_PER_BUFFER;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
return rc;
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
buffersize = frames * 2 * NUM_CHANNELS; /* 2 bytes/sample * channels */
buffer = (unsigned char*) malloc(buffersize);
/* We want to loop forever */
//snd_pcm_hw_params_get_period_time(params, &val, &dir);
return 0;
}
int arStartRecording(void)
{
if(running) return 1;
if(pthread_mutex_init(&mutex, NULL))
{
printf("Unable to initialize mutex\n");
return -1;
}
if(pthread_create(&thr, NULL, &entry_point, NULL))
{
fprintf(stderr, "Could not create recorder thread!\n");
running = FALSE;
return -1;
}
running = TRUE;
return 0;
}
void arStopRecording(void)
{
running = FALSE;
}
void arFree(void)
{
running = FALSE;
sleep(500);
snd_pcm_drain(handle);
snd_pcm_close(handle);
pthread_mutex_destroy(&mutex);
free(buffer);
}

The problem here isn't a cast, but a representation issue.
Audio is generally represented as a series of samples. There are quite a few ways to represent each sample: on a scale from -1.0f to +1.0f, or -32767 to +32767, or many others.
Alsa supports in fact many formats, and you chose SND_PCM_FORMAT_S16_LE so that's -32767 to +32767. You could cast that to std::int16_t*, assuming your C++ environment is Little-Endian (almost certain). You can't cast it to float*, for that you'd need to ask for SND_PCM_FORMAT_FLOAT_LE

Related

How do I use the FFmpeg libraries to extract every nth frame from a video and save it as a small image file in C++?

After experimenting with the examples on the FFmpeg documentation, I was finally able to create a short program that extracts every nth frame from a video. However, the output files that it produces are huge at over 15mb for each image. How can I change this to produce lower quality images?
The result I am trying to get is done easily on the command line with:
ffmpeg -i [input video] -vf "select=not(mod(n\,10))" -fps_mode vfr img_%03d.jpg
For a video with about 500 frames, this creates 50 images that are only about 800kb each; how am would I be able to mimic this in my program?
My code consists of opening the input file, decoding the packets, then saving the frames:
#include <cstdio>
#include <cstdlib>
#include <iostream>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}
static AVFormatContext *fmt_ctx;
static AVCodecContext *dec_ctx;
static int video_stream_index = -1;
// OPEN THE INPUT FILE
static int open_input_file(const char *filename) {
// INIT VARS AND FFMPEG OBJECTS
int ret;
const AVCodec *dec;
// OPEN INPUT FILE
if((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
printf("ERROR: failed to open input file\n");
return ret;
}
// FIND STREAM INFO BASED ON INPUT FILE
if((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
printf("ERROR: failed to find stream information\n");
return ret;
}
// FIND THE BEST VIDEO STREAM FOR THE INPUT FILE
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if(ret < 0) {
printf("ERROR: failed to find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
// ALLOCATE THE DECODING CONTEXT FOR THE INPUT FILE
dec_ctx = avcodec_alloc_context3(dec);
if(!dec_ctx) {
printf("ERROR: failed to allocate decoding context\n");
// CAN NOT ALLOCATE MEMORY ERROR
return AVERROR(ENOMEM);
}
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
// INIT THE VIDEO DECODER
if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
printf("ERROR: failed to open video decoder\n");
return ret;
}
return 0;
}
// SAVE THE FILE
static void save(unsigned char *buf, int wrap, int x_size, int y_size, char *file_name) {
// INIT THE EMPTY FILE
FILE *file;
// OPEN AND WRITE THE IMAGE FILE
file = fopen(file_name, "wb");
fprintf(file, "P6\n%d %d\n%d\n", x_size, y_size, 255);
for(int i = 0; i < y_size; i++) {
fwrite(buf + i * wrap, 1, x_size * 3, file);
}
fclose(file);
}
// DECODE FRAME AND CONVERT IT TO AN RGB IMAGE
static void decode(AVCodecContext *cxt, AVFrame *frame, AVPacket *pkt,
const char *out_file_name, const char *file_ext, int mod=1) {
// INIT A BLANK CHAR TO HOLD THE FILE NAME AND AN EMPTY INT TO HOLD FUNCTION RETURN VALUES
char buf[1024];
int ret;
// SEND PACKET TO DECODER
ret = avcodec_send_packet(cxt, pkt);
if(ret < 0) {
printf("ERROR: error sending packet for decoding\n");
exit(1);
}
// CREATE A SCALAR CONTEXT FOR CONVERSION
SwsContext *sws_ctx = sws_getContext(dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt, dec_ctx->width,
dec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
// CREATE A NEW RGB FRAME FOR CONVERSION
AVFrame* rgb_frame = av_frame_alloc();
rgb_frame->format = AV_PIX_FMT_RGB24;
rgb_frame->width = dec_ctx->width;
rgb_frame->height = dec_ctx->height;
// ALLOCATE A NEW BUFFER FOR THE RGB CONVERSION FRAME
av_frame_get_buffer(rgb_frame, 0);
// WHILE RETURN COMES BACK OKAY (FUNCTION RETURNS >= 0)...
while(ret >= 0) {
// GET FRAME BACK FROM DECODER
ret = avcodec_receive_frame(cxt, frame);
// IF "RESOURCE TEMP NOT AVAILABLE" OR "END OF FILE" ERROR...
if(ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
return;
} else if(ret < 0) {
printf("ERROR: error during decoding\n");
exit(1);
}
// IF FRAME NUMBER IF THE (MOD)TH FRAME...
if(cxt->frame_number % mod == 0){
// OUTPUT WHICH FRAME IS BEING SAVED
printf("saving frame %03d\n", cxt->frame_number);
// REMOVES TEMPORARY BUFFERED DATA
fflush(stdout);
// SCALE (CONVERT) THE OLD FRAME TO THE NEW RGB FRAME
sws_scale(sws_ctx, frame->data, frame->linesize, 0, frame->height,
rgb_frame->data, rgb_frame->linesize);
// SET "BUF" TO THE OUTPUT FILE PATH (SAVES TO "out_file_name_###.file_ext")
snprintf(buf, sizeof(buf), "%s_%03d.%s", out_file_name, cxt->frame_number, file_ext);
// SAVE THE FRAME
save(rgb_frame->data[0], rgb_frame->linesize[0], rgb_frame->width, rgb_frame->height, buf);
}
}
}
int main() {
// SIMULATE COMMAND LINE ARGUMENTS
char argv0[] = "test";
char argv1[] = "/User/Desktop/frames/test_video.mov";
char *argv[] = {argv0, argv1, nullptr};
// INIT VARS AND FFMPEG OBJECTS
int ret;
AVPacket *packet;
AVFrame *frame;
// ALLOCATE FRAME AND PACKET
frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
// IF FILE DOESN'T OPEN, GO TO THE END
if((ret = open_input_file(argv[1])) < 0) {
goto end;
}
// READ ALL THE PACKETS - simple
while(av_read_frame(fmt_ctx, packet) >= 0) {
// IF PACKET INDEX MATCHES VIDEO INDEX...
if (packet->stream_index == video_stream_index) {
// SEND PACKET TO THE DECODER and SAVE
std::string name = "/User/Desktop/frames/img";
std::string ext = "bmp";
decode(dec_ctx, frame, packet, name.c_str(), ext.c_str(), 5);
}
// UNREFERENCE THE PACKET
av_packet_unref(packet);
}
// END MARKER
end:
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_packet_free(&packet);
// FINAL ERROR CATCH
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
exit(1);
}
exit(0);
}
I am not sure how to go about producing images that are much smaller in size like the ones produced on the command line. I have a feeling that this is possible somehow during the conversion to RGB or the saving of the file but I can't seem to figure out how.
Also, is there any way that I could go about this much more efficiently? On the command line, this finishes very quickly (no more than a second or two for a 9 sec. movie at ~60 fps).
The command line version compresses the frame into jpeg file hence the size is very small. On the other hand, your code writes the rgb values directly into a file (regardless of the file extension). The size of the image is then Height x Width x 3 bytes, which is very big.
Solution: Adjust your save function to also compress the image.
Code example from Github - save_frame_as_jpeg.c:
int save_frame_as_jpeg(AVCodecContext *pCodecCtx, AVFrame *pFrame, int FrameNo)
{
AVCodec *jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
if (!jpegCodec) { return -1; }
AVCodecContext *jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) { return -1; }
jpegContext->pix_fmt = pCodecCtx->pix_fmt;
jpegContext->height = pFrame->height;
jpegContext->width = pFrame->width;
if (avcodec_open2(jpegContext, jpegCodec, NULL) < 0)
{ return -1; }
FILE *JPEGFile;
char JPEGFName[256];
AVPacket packet = {.data = NULL, .size = 0};
av_init_packet(&packet);
int gotFrame;
if (avcodec_encode_video2(jpegContext, &packet, pFrame, &gotFrame) < 0)
{ return -1; }
sprintf(JPEGFName, "dvr-%06d.jpg", FrameNo);
JPEGFile = fopen(JPEGFName, "wb");
fwrite(packet.data, 1, packet.size, JPEGFile);
fclose(JPEGFile);
av_free_packet(&packet);
avcodec_close(jpegContext);
return 0;
}

C++ ffmpeg Specified pixel format is invalid or not supported

So I have a program that reads an opengl window and encodes the read data as a video. Now through a series of experimentation I have learned that the bit format of my glfw window is 8:8:8 as returned by glfwGetVideoMode(monitor). So I use this function to read the window:
glReadPixels(0, 0,gl_width, gl_height,GL_RGBA, GL_UNSIGNED_BYTE, (GLvoid*) Buffer);
and I simply encode it in the AV_PIX_FMT_YUV420P format.
Under normal circumstances this method works just fine. However, when I actually run the program, the output I get, as opposed to what I can see in the glfw window, is really low resolution and a bit pixelated.
Here is what my GLFW window looks like:
Now this is what I want it to look like. It looks just fine on the opengl window, and I encode it directly without altering Buffer.
And here is what the encoded result, test.mp4 looks like when I run it using mplayer or similar software:
It's a lot more blurry and pixelated compare to the GLFW window. With some experimentation and following an answer to another question I asked, I us avcodec_find_best_pix_fmt_of_list((*codec)->pix_fmts, AV_PIX_FMT_RGBA, 1, &ret) and it returned 13. Which led me to believe using AV_PIX_FMT_YUVJ422P is the best option for this convertion to not have a blurry/pixelated result. However, no matter which function I pass, every single format gives off an error except AV_PIX_FMT_YUV420P. The error is:
[mpeg4 # 0x558e74f47900] Specified pixel format yuvj422p is invalid or not supported
I have no idea why this is happening, as the format is bound to a define and it is changed throughout the entire program when I change the define.
Here is my encoder so far (I have trimmed some parts):
video_encoder.cpp:
int video_encoder::write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
AVStream *st, AVFrame *frame, AVPacket *pkt)
{
int ret;
// Conditional jump or move depends on uninitialised value
// Use of uninitialised value of size 8
// send the frame to the encoder
// Error is about c.
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
std::cout << "Error sending a frame to the encoder: " << ret << std::endl;
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
else if (ret < 0) {
std::cout << "Error encoding a frame: " << ret << std::endl;
exit(1);
}
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, c->time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
//log_packet(fmt_ctx, pkt);
//std::cout << "Packet: " << pkt << std::endl;
ret = av_interleaved_write_frame(fmt_ctx, pkt);
/* pkt is now blank (av_interleaved_write_frame() takes ownership of
* its contents and resets pkt), so that no unreferencing is necessary.
* This would be different if one used av_write_frame(). */
if (ret < 0) {
std::cout << "Error while writing output packet: " << ret << std::endl;
exit(1);
}
}
return ret == AVERROR_EOF ? 1 : 0;
}
/* Add an output stream. */
void video_encoder::add_stream(OutputStream *ost, AVFormatContext *oc,
const AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->tmp_pkt = av_packet_alloc();
if (!ost->tmp_pkt) {
fprintf(stderr, "Could not allocate AVPacket\n");
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
...
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 10000;
/* Resolution must be a multiple of two. */
c->width = width;
c->height = height;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE }; // *frame_rate
c->time_base = ost->st->time_base;
c->gop_size = 7; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
//if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO)
// c->max_b_frames = 2;
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
if ((*codec)->pix_fmts){
//c->pix_fmt = (*codec)->pix_fmts[0];
std::cout << "NEW FORMAT : " << c->pix_fmt << std::endl;
}
int ret;
avcodec_find_best_pix_fmt_of_list((*codec)->pix_fmts, AV_PIX_FMT_RGBA, 1, &ret);
std::cout << "Desired format is: " << ret << std::endl;
break;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* video output */
AVFrame* video_encoder::alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
void video_encoder::open_video(AVFormatContext *oc, const AVCodec *codec,
OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", ret);
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
void video_encoder::set_frame_yuv_from_rgb(AVFrame *frame, struct SwsContext *sws_context) {
const int in_linesize[1] = { 4 * width };
//uint8_t* dest[4] = { rgb_data, NULL, NULL, NULL };
sws_context = sws_getContext(
width, height, AV_PIX_FMT_RGBA,
width, height, STREAM_PIX_FMT,
SCALE_FLAGS, 0, 0, 0);
sws_scale(sws_context, (const uint8_t * const *)&rgb_data, in_linesize, 0,
height, frame->data, frame->linesize);
}
AVFrame* video_encoder::get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
(float) STREAM_DURATION / 1000, (AVRational){ 1, 1 }) > 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
set_frame_yuv_from_rgb(ost->frame, ost->sws_ctx);
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
int video_encoder::write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
}
void video_encoder::close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
av_packet_free(&ost->tmp_pkt);
//sws_freeContext(ost->sws_ctx);
//swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
void video_encoder::set_encode_framebuffer(uint8_t* data, bool audio_only)
{
rgb_data = data;
}
video_encoder::~video_encoder()
{
av_write_trailer(enc_inf.oc);
/* Close each codec. */
if (enc_inf.have_video)
close_stream(enc_inf.oc, &enc_inf.video_st);
if (!(enc_inf.fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&enc_inf.oc->pb);
/* free the stream */
avformat_free_context(enc_inf.oc);
std::cout << "Done, closing." << std::endl;
}
bool video_encoder::encode_one_frame()
{
if (enc_inf.encode_video || enc_inf.encode_audio) {
/* select the stream to encode */
if (enc_inf.encode_video &&
(!enc_inf.encode_audio || av_compare_ts(enc_inf.video_st.next_pts, enc_inf.video_st.enc->time_base,
enc_inf.audio_st.next_pts, enc_inf.audio_st.enc->time_base) <= 0)) {
enc_inf.encode_video = !write_video_frame(enc_inf.oc, &enc_inf.video_st);
return true;
}
}
return false;
}
video_encoder::video_encoder(int w, int h, float fps, unsigned int duration)
:width(w), height(h), STREAM_FRAME_RATE(fps), STREAM_DURATION(duration)
{
//std::filesystem::create_directory("media");
//std::string as_str = "./output/" + std::string(getenv ("OUTPUT_UUID")) + ".mp4";
std::string as_str = "./output/video.mp4";
char* filename = const_cast<char*>(as_str.c_str());
enc_inf.video_st, enc_inf.audio_st = (struct OutputStream) { 0 };
enc_inf.video_st.next_pts = 1;
enc_inf.audio_st.next_pts = 1;
enc_inf.encode_audio, enc_inf.encode_video = 0;
int ret;
int i;
//rgb_data = (uint8_t*)malloc( 48 * sizeof(uint8_t) );
/* allocate the output media context */
avformat_alloc_output_context2(&enc_inf.oc, NULL, NULL, filename);
if (!enc_inf.oc) {
//VI_ERROR("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&enc_inf.oc, NULL, "mpeg", filename);
}
if (!enc_inf.oc)
std::cout << "FAILED" << std::endl;
//return 1;
enc_inf.fmt = enc_inf.oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (enc_inf.fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&enc_inf.video_st, enc_inf.oc, &video_codec, enc_inf.fmt->video_codec);
enc_inf.have_video = 1;
enc_inf.encode_video = 1;
}
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (enc_inf.have_video)
open_video(enc_inf.oc, video_codec, &enc_inf.video_st, opt);
/* open the output file, if needed */
if (!(enc_inf.fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&enc_inf.oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
//VI_ERROR("Could not open '%s': %s\n", filename, ret);
//return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(enc_inf.oc, &opt);
if (ret < 0) {
VI_ERROR("Error occurred when opening output file:");
//return 1;
}
//return 0;
}
video_encoder.h:
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_SPLINE
/* The output bit rate in bit/s */
#define OUTPUT_BIT_RATE 96000
/* The number of output channels */
#define OUTPUT_CHANNELS 2
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
AVPacket *tmp_pkt;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
typedef struct {
OutputStream video_st, audio_st;
const AVOutputFormat *fmt;
AVFormatContext *oc;
int have_video, have_audio, encode_video, encode_audio;
std::string name;
} encode_info;
Again, changing STREAM_PIX_FMT anything other than AV_PIX_FMT_YUV420P causes the program to give the error.
What is the cause of this and how can I fix this? Also am I on the right track for fixing the pixelation problem? I'm using ubuntu.

Reading mp3 file using ffmpeg caues memory leaks, even after freeing it in main

i am continuously reading mp3 files and processing them, but the memory keeps getting build up even though i freed it.
At the bottom read_audio_mp3(), they are already freeing some variable.
why do i still face a memory build up and how do i deal with it ?
following this code : https://rodic.fr/blog/libavcodec-tutorial-decode-audio-file/, i read mp3 using this function
int read_audio_mp3(string filePath_str, const int sample_rate,
double** output_buffer, int &AUDIO_DURATION){
const char* path = filePath_str.c_str();
/* Reads the file header and stores information about the file format. */
AVFormatContext* format = avformat_alloc_context();
if (avformat_open_input(&format, path, NULL, NULL) != 0) {
fprintf(stderr, "Could not open file '%s'\n", path);
return -1;
}
/* Check out the stream information in the file. */
if (avformat_find_stream_info(format, NULL) < 0) {
fprintf(stderr, "Could not retrieve stream info from file '%s'\n", path);
return -1;
}
/* find an audio stream. */
int stream_index =- 1;
for (unsigned i=0; i<format->nb_streams; i++) {
if (format->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
stream_index = i;
break;
}
}
if (stream_index == -1) {
fprintf(stderr, "Could not retrieve audio stream from file '%s'\n", path);
return -1;
}
AVStream* stream = format->streams[stream_index];
// find & open codec
AVCodecContext* codec = stream->codec;
if (avcodec_open2(codec, avcodec_find_decoder(codec->codec_id), NULL) < 0) {
fprintf(stderr, "Failed to open decoder for stream #%u in file '%s'\n", stream_index, path);
return -1;
}
// prepare resampler
struct SwrContext* swr = swr_alloc();
av_opt_set_int(swr, "in_channel_count", codec->channels, 0);
av_opt_set_int(swr, "out_channel_count", 1, 0);
av_opt_set_int(swr, "in_channel_layout", codec->channel_layout, 0);
av_opt_set_int(swr, "out_channel_layout", AV_CH_LAYOUT_MONO, 0);
av_opt_set_int(swr, "in_sample_rate", codec->sample_rate, 0);
av_opt_set_int(swr, "out_sample_rate", sample_rate, 0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", codec->sample_fmt, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_DBL, 0);
swr_init(swr);
if (!swr_is_initialized(swr)) {
fprintf(stderr, "Resampler has not been properly initialized\n");
return -1;
}
/* Allocate an audio frame. */
AVPacket packet;
av_init_packet(&packet);
AVFrame* frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Error allocating the frame\n");
return -1;
}
// iterate through frames
*output_buffer = NULL;
AUDIO_DURATION = 0;
while (av_read_frame(format, &packet) >= 0) {
// decode one frame
int gotFrame;
if (avcodec_decode_audio4(codec, frame, &gotFrame, &packet) < 0) {
// free packet
av_free_packet(&packet);
break;
}
if (!gotFrame) {
// free packet
av_free_packet(&packet);
continue;
}
// resample frames
double* buffer;
av_samples_alloc((uint8_t**) &buffer, NULL, 1, frame->nb_samples, AV_SAMPLE_FMT_DBL, 0);
int frame_count = swr_convert(swr, (uint8_t**) &buffer, frame->nb_samples, (const uint8_t**) frame->data, frame->nb_samples);
// append resampled frames to output_buffer
*output_buffer = (double*) realloc(*output_buffer,
(AUDIO_DURATION + frame->nb_samples) * sizeof(double));
memcpy(*output_buffer + AUDIO_DURATION, buffer, frame_count * sizeof(double));
AUDIO_DURATION += frame_count;
// free buffer & packet
av_free_packet(&packet);
av_free( buffer );
}
// clean up
av_frame_free(&frame);
swr_free(&swr);
avcodec_close(codec);
avformat_free_context(format);
return 0;
}
Main Script : MemoryLeak.cpp
// imports
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <iostream>
#include <sstream>
#include <vector>
#include <sys/time.h>
extern "C"
{
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
}
using namespace std;
int main (int argc, char ** argv) {
string wavpath = argv[1];
printf("wavpath=%s\n", wavpath.c_str());
printf("\n==== Params =====\n");
// Init
int AUDIO_DURATION;
int sample_rate = 8000;
av_register_all();
printf("\n==== Reading MP3 =====\n");
while (true) {
// Read mp3
double* buffer;
if (read_audio_mp3(wavpath, sample_rate, &buffer, AUDIO_DURATION) != 0) {
printf("Cannot read %s\n", wavpath.c_str());
continue;
}
/*
Process the buffer for down stream tasks.
*/
// Freeing the buffer
free(buffer);
}
return 0 ;
}
Compiling
g++ -o ./MemoryLeak.out -Ofast -Wall -Wextra \
-std=c++11 "./MemoryLeak.cpp" \
-lavformat -lavcodec -lavutil -lswresample
Running, by right my input an argument wav.scp that reads text file of all the mp3s.
But for easy to replicate purpose, i only read 1 file song.mp3 in and i keep re-reading it
./MemoryLeak.out song.mp3
Why do i know i have memory leaks?
I was running up 32 jobs in parallel for 14 million files, and when i wake up in the morning, they were abruptly killed.
I run htop and i monitor the progress when i re-run it, and i saw that the VIRT & RES & Mem are continuously increasing.
Edit 1:
My setup :
ffmpeg version 2.8.15-0ubuntu0.16.04.1
built with gcc 5.4.0

Cast void* to memcpy to get a float value

Here is an example of the problem I'm trying to solve I get a buffer from the microphone and try and process it content. as kindly guided from this question Im trying to convert a char* to float*
the logic I declare a vector to hold my desired float then resize it to that of ArBuffer() and then copy to the vector.
ArBuffer() is a void gonna have to cast this to memcpy?
#include "Lib_api.h"
#include <alsa/asoundlib.h>
#include <stdio.h>
#include "audiorecorder.h"
#include "Globals.h"
#include <iostream>
#include <inttypes.h>
#include <string.h>
#include <stdlib.h>
#include <vector>
#include <cstring>
using namespace std;
//Declare Creation
void* mCore;
int main(void)
{
// recorder
int rc;
int mode = 3;
const float sampleRate = 44100; //max 22Hz
int bufferSize = 1024; //Check this should be good 1024
//initialise
mCore = OXY_Create();
//initialise audio recorder
rc = arInitialise();
OXY_Configure(mode, sampleRate, bufferSize, mCore);
//initialise check hardware
if(rc)
{
std::cerr << "Fatal error: Audio could not be initialised" << rc << std::endl <<std::endl;
arFree();
exit(1);
}
//start recording
rc = arStartRecording();
//application loop
while(arIsRunning())
{
//declare vector
std::vector<float> values;
//resize values to size of arbuffersize
values.resize(arBufferSize(), sizeof(float));
//arBufferSize()/sizeof(float);
//need to cast this arBuffer() to memcpy?
std::memcpy(arBuffer(), &values[0], sizeof(values[0]));
// values[0] this will hold the latest data from the microphone?
int ret = OXY_DecodeAudioBuffer(&values[0], values.size(), mCore);
if (ret == -2)
{
std::cerr << "FOUND_TOKEN ---> -2 " << std::endl << std::endl;
}
else if(ret>=0)
{
std::cerr << "Decode started ---> -2 " << ret << std::endl << std::endl;
}
else if (ret == -3)
{
//int sizeStringDecoded = OXY_GetDecodedData(mStringDecoded, mCore);
std::cerr << "STRING DECODED ---> -2 " << std::endl << std::endl;
// ...
}
else
{
std::cerr << "No data found in this buffer" << std::endl << std::endl;
//no data found in this buffer
}
}
//Clean up
arFree();
return 0;
}
I change the format to SND_PCM_FORMAT_FLOAT_LE from SND_PCM_FORMAT_S16_LE as kindly suggested from another SO question.
* Use the newer ALSA API */
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <pthread.h>
#include "settings.h"
#include "audiorecorder.h"
pthread_t thr;
pthread_mutex_t mutex;
snd_pcm_t *handle;
snd_pcm_uframes_t frames;
unsigned char* buffer;
BOOL running;
size_t buffersize;
BOOL arIsRunning(void)
{
return running;
}
void arAcquireBuffer(void)
{
//printf("Acquired buffer\n");
pthread_mutex_lock(&mutex);
}
void arReleaseBuffer(void)
{
//printf("Released buffer\n");
pthread_mutex_unlock(&mutex);
}
const unsigned char* arBuffer(void)
{
return buffer;
}
const size_t arBufferSize(void)
{
return buffersize;
}
void* entry_point(void *arg)
{
int rc;
fprintf(stderr, "Listening...\n");
while (running)
{
arAcquireBuffer();
rc = snd_pcm_readi(handle, buffer, frames);
//stream to stdout - useful for testing/debugging
//write(1, buffer, buffersize);
arReleaseBuffer();
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "overrun occurred\n");
snd_pcm_prepare(handle);
}
else if (rc < 0) {
fprintf(stderr, "error from read: %s\n", snd_strerror(rc));
running = FALSE;
}
else if (rc != (int)frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
}
return NULL;
}
int arInitialise(void)
{
snd_pcm_hw_params_t *params;
unsigned int val;
int rc, dir;
running = FALSE;
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, RECORDER_DEVICE, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0) {
fprintf(stderr, "unable to open pcm device: %s\n", snd_strerror(rc));
return rc;
}
else
{
fprintf(stderr, "Successfully opened default capture device.\n");
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_FLOAT_LE)
/* Channels */
snd_pcm_hw_params_set_channels(handle, params, NUM_CHANNELS);
fprintf(stderr, "Channels set to %d.\n", NUM_CHANNELS);
/* sampling rate */
val = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &val, &dir);
fprintf(stderr, "Samplerate set to %d.\n", val);
/* Set period to FRAMES_PER_BUFFER frames. */
frames = FRAMES_PER_BUFFER;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0) {
fprintf(stderr, "unable to set hw parameters: %s\n", snd_strerror(rc));
return rc;
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
buffersize = frames * 2 * NUM_CHANNELS; /* 2 bytes/sample * channels */
buffer = (unsigned char*) malloc(buffersize);
/* We want to loop forever */
//snd_pcm_hw_params_get_period_time(params, &val, &dir);
return 0;
}
int arStartRecording(void)
{
if(running) return 1;
if(pthread_mutex_init(&mutex, NULL))
{
printf("Unable to initialize mutex\n");
return -1;
}
if(pthread_create(&thr, NULL, &entry_point, NULL))
{
fprintf(stderr, "Could not create recorder thread!\n");
running = FALSE;
return -1;
}
running = TRUE;
return 0;
}
void arStopRecording(void)
{
running = FALSE;
}
void arFree(void)
{
running = FALSE;
sleep(500);
snd_pcm_drain(handle);
snd_pcm_close(handle);
pthread_mutex_destroy(&mutex);
free(buffer);
}
values.resize(arBufferSize(), sizeof(float))
Well, that wasn't what I wrote in the other comment. You need to divide the buffersize (in bytes) by the number of bytes per float to get the number of floats: arBufferSize() / sizeof(float)
std::memcpy(arBuffer(), &values[0], sizeof(values[0]));
memcpy for historical reasons has its destination and source reversed. The const* error is because you're asking memcpy to write to arBuffer.
Also, sizeof(values[0]) is the size of one float, in bytes. You already have arBufferSize(), which is exactly the size that memcpy needs.

ALSA card not outputting any data

I'm running the following program from this website:
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
int main() {
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int val;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, "default",
SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0) {
fprintf(stderr,
"unable to open pcm device: %s\n",
snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params,
SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params,
SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 2);
/* 44100 bits/second sampling rate (CD quality) */
val = 44100;
snd_pcm_hw_params_set_rate_near(handle, params,
&val, &dir);
/* Set period size to 32 frames. */
frames = 32;
snd_pcm_hw_params_set_period_size_near(handle,
params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0) {
fprintf(stderr,
"unable to set hw parameters: %s\n",
snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params,
&frames, &dir);
size = frames * 4; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params,
&val, &dir);
loops = 5000000 / val;
while (loops > 0) {
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "overrun occurred\n");
snd_pcm_prepare(handle);
} else if (rc < 0) {
fprintf(stderr,
"error from read: %s\n",
snd_strerror(rc));
} else if (rc != (int)frames) {
fprintf(stderr, "short read, read %d frames\n", rc);
}
rc = write(1, buffer, size);
if (rc != size)
fprintf(stderr,
"short write: wrote %d bytes\n", rc);
}
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
I have tested this program on a few computers and each time the program output some "stuff" to the screen. However when I moved the program to a desktop (which does have a sound card, just no output device plugged into it) it no longer displays anything. I thought it might be the fault of the missing audio output device so I instantiated "snd-aloop" a virtual sound card but when I ran it again specifying that sound card this time there was no output.
Any help would be greatly appreciated! Thank you very much for your time.