I am trying to decode a video stream from the browser using the ffmpeg API. The stream is produced by the webcam and recorded with MediaRecorder as webm format. What I ultimately need is a vector of opencv cv::Mat objects for further processing.
I have written a C++ webserver using the uWebsocket library. The video stream is sent via websocket from the browser to the server once per second. On the server, I append the received data to my custom buffer and decode it with the ffmpeg API.
If I just save the data on the disk and later I play it with a media player, it works fine. So, whatever the browser sends is a valid video.
I do not think that I correctly understand how should the custom IO behave with network streaming as nothing seems to be working.
The custom buffer:
struct Buffer
{
std::vector<uint8_t> data;
int currentPos = 0;
};
The readAVBuffer method for custom IO
int MediaDecoder::readAVBuffer(void* opaque, uint8_t* buf, int buf_size)
{
MediaDecoder::Buffer* mbuf = (MediaDecoder::Buffer*)opaque;
int count = 0;
for(int i=0;i<buf_size;i++)
{
int index = i + mbuf->currentPos;
if(index >= (int)mbuf->data.size())
{
break;
}
count++;
buf[i] = mbuf->data.at(index);
}
if(count > 0) mbuf->currentPos+=count;
std::cout << "read : "<<count<<" "<<mbuf->currentPos<<", buff size:"<<mbuf->data.size() << std::endl;
if(count <= 0) return AVERROR(EAGAIN); //is this error that should be returned? It cannot be EOF since we're not done yet, most likely
return count;
}
The big decode method, that's supposed to return whatever frames it could read
std::vector<cv::Mat> MediaDecoder::decode(const char* data, size_t length)
{
std::vector<cv::Mat> frames;
//add data to the buffer
for(size_t i=0;i<length;i++) {
buf.data.push_back(data[i]);
}
//do not invoke the decoders until we have 1MB of data
if(((buf.data.size() - buf.currentPos) < 1*1024*1024) && !initializedCodecs) return frames;
std::cout << "decoding data length "<<length<<std::endl;
if(!initializedCodecs) //initialize ffmpeg objects. Custom I/O, format, decoder, etc.
{
//these are just members of the class
avioCtxPtr = std::unique_ptr<AVIOContext,avio_context_deleter>(
avio_alloc_context((uint8_t*)av_malloc(4096),4096,0,&buf,&readAVBuffer,nullptr,nullptr),
avio_context_deleter());
if(!avioCtxPtr)
{
std::cerr << "Could not create IO buffer" << std::endl;
return frames;
}
fmt_ctx = std::unique_ptr<AVFormatContext,avformat_context_deleter>(avformat_alloc_context(),
avformat_context_deleter());
fmt_ctx->pb = avioCtxPtr.get();
fmt_ctx->flags |= AVFMT_FLAG_CUSTOM_IO ;
//fmt_ctx->max_analyze_duration = 2 * AV_TIME_BASE; // read 2 seconds of data
{
AVFormatContext *fmtCtxRaw = fmt_ctx.get();
if (avformat_open_input(&fmtCtxRaw, "", nullptr, nullptr) < 0) {
std::cerr << "Could not open movie" << std::endl;
return frames;
}
}
if (avformat_find_stream_info(fmt_ctx.get(), nullptr) < 0) {
std::cerr << "Could not find stream information" << std::endl;
return frames;
}
if((video_stream_idx = av_find_best_stream(fmt_ctx.get(), AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0)) < 0)
{
std::cerr << "Could not find video stream" << std::endl;
return frames;
}
AVStream *video_stream = fmt_ctx->streams[video_stream_idx];
AVCodec *dec = avcodec_find_decoder(video_stream->codecpar->codec_id);
video_dec_ctx = std::unique_ptr<AVCodecContext,avcodec_context_deleter> (avcodec_alloc_context3(dec),
avcodec_context_deleter());
if (!video_dec_ctx)
{
std::cerr << "Failed to allocate the video codec context" << std::endl;
return frames;
}
avcodec_parameters_to_context(video_dec_ctx.get(),video_stream->codecpar);
video_dec_ctx->thread_count = 1;
/* video_dec_ctx->max_b_frames = 0;
video_dec_ctx->frame_skip_threshold = 10;*/
AVDictionary *opts = nullptr;
av_dict_set(&opts, "refcounted_frames", "1", 0);
av_dict_set(&opts, "deadline", "1", 0);
av_dict_set(&opts, "auto-alt-ref", "0", 0);
av_dict_set(&opts, "lag-in-frames", "1", 0);
av_dict_set(&opts, "rc_lookahead", "1", 0);
av_dict_set(&opts, "drop_frame", "1", 0);
av_dict_set(&opts, "error-resilient", "1", 0);
int width = video_dec_ctx->width;
videoHeight = video_dec_ctx->height;
if(avcodec_open2(video_dec_ctx.get(), dec, &opts) < 0)
{
std::cerr << "Failed to open the video codec context" << std::endl;
return frames;
}
AVPixelFormat pFormat = AV_PIX_FMT_BGR24;
img_convert_ctx = std::unique_ptr<SwsContext,swscontext_deleter>(sws_getContext(width, videoHeight,
video_dec_ctx->pix_fmt, width, videoHeight, pFormat,
SWS_BICUBIC, nullptr, nullptr,nullptr),swscontext_deleter());
frame = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
frameRGB = std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());
int numBytes = av_image_get_buffer_size(pFormat, width, videoHeight,32 /*https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning*/);
std::unique_ptr<uint8_t,avbuffer_deleter> imageBuffer((uint8_t *) av_malloc(numBytes*sizeof(uint8_t)),avbuffer_deleter());
av_image_fill_arrays(frameRGB->data,frameRGB->linesize,imageBuffer.get(),pFormat,width,videoHeight,32);
frameRGB->width = width;
frameRGB->height = videoHeight;
initializedCodecs = true;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
int read_frame_return = 0;
while ( (read_frame_return=av_read_frame(fmt_ctx.get(), &pkt)) >= 0)
{
readFrame(&frames,&pkt,video_dec_ctx.get(),frame.get(),img_convert_ctx.get(),
videoHeight,frameRGB.get());
//if(cancelled) break;
}
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
//flush
// readFrame(frames.get(),nullptr,video_dec_ctx.get(),frame.get(),
// img_convert_ctx.get(),videoHeight,frameRGB.get());
avioCtxPtr->eof_reached = 0;
avioCtxPtr->error = 0;
if(frames->size() <= 0)
{
std::cout << "buffer pos: "<<buf.currentPos<<", buff size:"<<buf.data.size()
<<",read_frame_return:"<<read_frame_return<< std::endl;
}
return frames;
}
What I would expect to happen would be for a continuous extraction of cv::Mat frames as I feed it more and more data. What actually happens is that after the the buffer is fully read I see:
[matroska,webm # 0x507b450] Read error at pos. 1278266 (0x13813a)
[matroska,webm # 0x507b450] Seek to desired resync point failed. Seeking to earliest point available instead.
And then no more bytes are read from the buffer even if later I increase the size of it.
There is something terribly wrong I'm doing here and I don't understand what.
What I ended up doing was to do the reading of the incoming data and actual decoding in a different thread. The read method, however, will just block if there are no more bytes available, waiting until anything is coming.
When new bytes are arriving, they're added to the buffer and the conditional_variable signals the waiting thread to wake up and start reading data again from the buffer.
It works well enough.
Related
Imagine that in my project, I receive RTP packets with the payload type-8, for later saving this load as the Nth part of the audio track. I extract this load from the RTP packet and save it to a temporary buffer:
...
while ((rtp = receiveRtpPackets()).withoutErrors()) {
payloadData.push(rtp.getPayloadData());
}
audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();
...
After filling a temporary buffer of a certain size with this payload, I process this buffer, namely, extract the entire payload and encode it using ffmpeg for further saving to an audio file in Matroska format. But I have a problem. Since the payload of the RTP packet is type 8, I have to save the raw audio data of the pcm_alaw format to mka audio format. But when saving raw data pcm_alaw to an audio file, I get these messages from the library:
...
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
[libopus # 0x18eff60] Queue input is backward in time
...
When you open an audio file in vlc, nothing is played (the audio track timestamp is missing).
The task of my project is to simply take pcm_alaw data and pack it in a container, in mka format. The best way to determine the codec is to use the av_guess_codec() function, which in turn automatically selects the desired codec ID. But how do I pack the raw data into the container correctly, I do not know.
It is important to note that I can get as raw data any format of this data (audio formats only) defined by the RTP packet type (All types of RTP packet payload). All I know is that in any case, I have to pack the audio data in an mka container.
I also attach the code (borrowed from this resource) that I use:
audiogenerater.h
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}
class AudioGenerater
{
public:
AudioGenerater();
~AudioGenerater() = default;
void generateAudioFileWithOptions(
QString fileName,
QByteArray pcmData,
int channel,
int bitRate,
int sampleRate,
AVSampleFormat format);
private:
// init Format
bool initFormat(QString audioFileName);
private:
AVCodec *m_AudioCodec = nullptr;
AVCodecContext *m_AudioCodecContext = nullptr;
AVFormatContext *m_FormatContext = nullptr;
AVOutputFormat *m_OutputFormat = nullptr;
};
audiogenerater.cpp
AudioGenerater::AudioGenerater()
{
av_register_all();
avcodec_register_all();
}
AudioGenerater::~AudioGenerater()
{
// ...
}
bool AudioGenerater::initFormat(QString audioFileName)
{
// Create an output Format context
int result = avformat_alloc_output_context2(&m_FormatContext, nullptr, nullptr, audioFileName.toLocal8Bit().data());
if (result < 0) {
return false;
}
m_OutputFormat = m_FormatContext->oformat;
// Create an audio stream
AVStream* audioStream = avformat_new_stream(m_FormatContext, m_AudioCodec);
if (audioStream == nullptr) {
avformat_free_context(m_FormatContext);
return false;
}
// Set the parameters in the stream
audioStream->id = m_FormatContext->nb_streams - 1;
audioStream->time_base = { 1, 8000 };
result = avcodec_parameters_from_context(audioStream->codecpar, m_AudioCodecContext);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
// Print FormatContext information
av_dump_format(m_FormatContext, 0, audioFileName.toLocal8Bit().data(), 1);
// Open file IO
if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
result = avio_open(&m_FormatContext->pb, audioFileName.toLocal8Bit().data(), AVIO_FLAG_WRITE);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
}
return true;
}
void AudioGenerater::generateAudioFileWithOptions(
QString _fileName,
QByteArray _pcmData,
int _channel,
int _bitRate,
int _sampleRate,
AVSampleFormat _format)
{
AVFormatContext* oc;
if (avformat_alloc_output_context2(
&oc, nullptr, nullptr, _fileName.toStdString().c_str())
< 0) {
qDebug() << "Error in line: " << __LINE__;
return;
}
if (!oc) {
printf("Could not deduce output format from file extension: using mka.\n");
avformat_alloc_output_context2(
&oc, nullptr, "mka", _fileName.toStdString().c_str());
}
if (!oc) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVOutputFormat* fmt = oc->oformat;
if (fmt->audio_codec == AV_CODEC_ID_NONE) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVCodecID codecID = av_guess_codec(
fmt, nullptr, _fileName.toStdString().c_str(), nullptr, AVMEDIA_TYPE_AUDIO);
// Find Codec
m_AudioCodec = avcodec_find_encoder(codecID);
if (m_AudioCodec == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Create an encoder context
m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
if (m_AudioCodecContext == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Setting parameters
m_AudioCodecContext->bit_rate = _bitRate;
m_AudioCodecContext->sample_rate = _sampleRate;
m_AudioCodecContext->sample_fmt = _format;
m_AudioCodecContext->channels = _channel;
m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Turn on the encoder
int result = avcodec_open2(m_AudioCodecContext, m_AudioCodec, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create a package
if (!initFormat(_fileName)) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// write to the file header
result = avformat_write_header(m_FormatContext, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create Frame
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
int nb_samples = 0;
if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
nb_samples = 10000;
}
else {
nb_samples = m_AudioCodecContext->frame_size;
}
// Set the parameters of the Frame
frame->nb_samples = nb_samples;
frame->format = m_AudioCodecContext->sample_fmt;
frame->channel_layout = m_AudioCodecContext->channel_layout;
// Apply for data memory
result = av_frame_get_buffer(frame, 0);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
// Set the Frame to be writable
result = av_frame_make_writable(frame);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
int perFrameDataSize = frame->linesize[0];
int count = _pcmData.size() / perFrameDataSize;
bool needAddOne = false;
if (_pcmData.size() % perFrameDataSize != 0) {
count++;
needAddOne = true;
}
int frameCount = 0;
for (int i = 0; i < count; ++i) {
// Create a Packet
AVPacket* pkt = av_packet_alloc();
if (pkt == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
av_init_packet(pkt);
if (i == count - 1)
perFrameDataSize = _pcmData.size() % perFrameDataSize;
// Synthesize WAV files
memset(frame->data[0], 0, perFrameDataSize);
memcpy(frame->data[0], &(_pcmData.data()[perFrameDataSize * i]), perFrameDataSize);
frame->pts = frameCount++;
// send Frame
result = avcodec_send_frame(m_AudioCodecContext, frame);
if (result < 0)
continue;
// Receive the encoded Packet
result = avcodec_receive_packet(m_AudioCodecContext, pkt);
if (result < 0) {
av_packet_free(&pkt);
continue;
}
// write to file
av_packet_rescale_ts(pkt, m_AudioCodecContext->time_base, m_FormatContext->streams[0]->time_base);
pkt->stream_index = 0;
result = av_interleaved_write_frame(m_FormatContext, pkt);
if (result < 0)
continue;
av_packet_free(&pkt);
}
// write to the end of the file
av_write_trailer(m_FormatContext);
// Close file IO
avio_closep(&m_FormatContext->pb);
// Release Frame memory
av_frame_free(&frame);
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
}
main.cpp
int main(int argc, char **argv)
{
av_log_set_level(AV_LOG_TRACE);
QFile file("rawDataOfPcmAlawType.bin");
if (!file.open(QIODevice::ReadOnly)) {
return EXIT_FAILURE;
}
QByteArray rawData(file.readAll());
AudioGenerater generator;
generator.generateAudioFileWithOptions(
"test.mka",
rawData,
1,
64000,
8000,
AV_SAMPLE_FMT_S16);
return 0;
}
It is IMPORTANT you help me find the most appropriate way to record pcm_alaw or a different data format in an MKA audio file.
I ask everyone who knows anything to help (there is too little time left to implement this project)
These useful links will help you:
A good overview of the data processing sequence in libav: ffmpeg-libav-tutorial
Examples from the ffmpeg developers themselves: avio_reading, resampling_audio, transcode_aac
I am using libx264 compiled from source. It was configured to get both .dll and .lib by this command
./configure --disable-cli --enable-shared --extra-ldflags=-Wl,--output-def=libx264.def`
I am using the libx264 API in my screen-sharing program with the preset - "veryfast", tune - "zerolatency", profile - "high" and also the following settings.
param.i_csp = X264_CSP_BGRA;
param.i_threads = 1;
param.i_width = width;
param.i_height = height;
param.i_fps_num = fps;
param.i_fps_den = 1;
param.rc.i_bitrate = bitrate;
param.rc.i_rc_method = X264_RC_ABR;
param.rc.b_filler = true;
param.rc.f_rf_constant = (float)0;
param.rc.i_vbv_max_bitrate = param.rc.i_bitrate;
param.rc.i_vbv_buffer_size = param.rc.i_bitrate;
param.b_repeat_headers = 0;
param.b_annexb = 1;
For these settings the program works fine. I specified it as single threaded by setting param.i_threads = 1.
If this is removed, x264 defaults to using multiple threads and sets param.i_threads as 1.5x of number of cores in the CPU automatically. This will give faster performance than running in single thread.
But when I remove the param.i_threads = 1 to make it multi-threaded, the generated output is fully grey. I cannot see any output when I view the live stream with VLC or some times I can view a weird output.
I am using this bitmap image as an example (https://imgur.com/a/l8LCd1l). Only this same image is being encoded multiple times. When it is saved into .h264 video, it is viewable clearly. But when the encoded payload is sent through rtmp, the live stream produces very bad and weird output (or sometimes no output). This is the weird output which im seeing most of the time for this image: https://imgur.com/a/VdyX1Zm
This is the full example code in which I am both streaming and writing video file of the same picture. This is using the srs librtmp library. There is no error but the stream has weird output.
In this code if you set add param.i_threads = 1; then only the output stream will be viewable. The problem is that it should be viewable in both single-threaded and multi-threaded encoding.
#include <iostream>
#include <stdio.h>
#include <sstream>
#include <x264.h>
#include "srs_librtmp.h"
#pragma comment(lib, "C:/Softwares/x264/libx264.lib")
using namespace std;
int check_ret(int ret);
int main()
{
int dts = 0;
x264_param_t param;
x264_t* h;
x264_nal_t* nals;
int i_nal;
int pts = 0;
int i_frame_size;
x264_picture_t picIn;
x264_picture_t picOut;
x264_param_default_preset(¶m, "veryfast", "zerolatency");
//x264 settings
param.i_csp = X264_CSP_BGRA;
param.i_width = 1920;
param.i_height = 1080;
param.i_fps_num = 30;
param.i_fps_den = 1;
param.rc.i_bitrate = 2500;
param.rc.i_rc_method = X264_RC_ABR;
param.rc.b_filler = true;
param.rc.f_rf_constant = (float)0;
param.rc.i_vbv_max_bitrate = param.rc.i_bitrate;
param.rc.i_vbv_buffer_size = param.rc.i_bitrate;
param.b_repeat_headers = 0;
param.b_annexb = 1;
x264_param_apply_profile(¶m, "high");
h = x264_encoder_open(¶m);
//allocate picture
x264_picture_alloc(&picIn, param.i_csp, param.i_width, param.i_height);
//picture settings
picIn.img.i_plane = 1;
picIn.img.i_stride[0] = 4 * param.i_width;
picIn.i_type = X264_TYPE_AUTO;
int header_size = x264_encoder_headers(h, &nals, &i_nal);
FILE* fptr;
fopen_s(&fptr, "example1.h264", "wb");
// write sps and pps in the video file
fwrite(nals->p_payload, header_size, 1, fptr);
int size = 1920 * 1080 * 4;
char* bmp = new char[size];
FILE* bitptr;
errno_t err = fopen_s(&bitptr, "flower.bmp", "rb");
fseek(bitptr, 54, SEEK_SET);
fread(bmp, size, 1, bitptr);
fclose(bitptr);
srs_rtmp_t rtmp = srs_rtmp_create("127.0.0.1:1935/live/test");
if (srs_rtmp_handshake(rtmp) != 0)
{
std::cout << "Simple handshake failed.";
return -1;
}
std::cout << "Handshake completed successfully.\n";
if (srs_rtmp_connect_app(rtmp) != 0) {
std::cout << "Connecting to host failed.";
return -1;
}
std::cout << "Connected to host successfully.\n";
if (srs_rtmp_publish_stream(rtmp) != 0) {
std::cout << "Publish signal failed.";
}
std::cout << "Publish signal success\n";
// write sps and pps in the live stream
int ret = srs_h264_write_raw_frames(rtmp, reinterpret_cast<char*>(nals->p_payload), header_size, 0, 0);
ret = check_ret(ret);
if (!ret)
return -1;
std::cout << "SPS and PPS sent.\n";
// main loop
std::cout << "Now streaming and encoding\n";
int i = 1800;
while (i--)
{
picIn.img.plane[0] = reinterpret_cast<uint8_t*>(bmp);
picIn.i_pts = pts++;
i_frame_size = x264_encoder_encode(h, &nals, &i_nal, &picIn, &picOut);
if (i_frame_size)
{
for (int j = 0; j < i_nal; j++)
{
x264_nal_t* nal = nals + j;
// write data in the video file
fwrite(nal->p_payload, nal->i_payload, 1, fptr);
// write data in the live stream
ret = srs_h264_write_raw_frames(rtmp, reinterpret_cast<char*>(nal->p_payload), nal->i_payload, dts, dts);
ret = check_ret(ret);
if (!ret)
{
return -1;
}
}
}
else
{
std::cout << "i_frame_size = 0 (encoder failed)\n";
}
dts += 33;
}
while (x264_encoder_delayed_frames(h))
{
i_frame_size = x264_encoder_encode(h, &nals, &i_nal, NULL, &picOut);
if (i_frame_size)
{
fwrite(nals->p_payload, i_frame_size, 1, fptr);
}
}
std::cout << "\nAll done\n";
std::cout << "Output video is example1.h264 and it is viewable in VLC";
return 0;
}
int check_ret(int ret)
{
if (ret != 0) {
if (srs_h264_is_dvbsp_error(ret)) {
srs_human_trace("ignoring drop video error, code=%d", ret);
}
else if (srs_h264_is_duplicated_sps_error(ret)) {
srs_human_trace("ignoring duplicated sps, code=%d", ret);
}
else if (srs_h264_is_duplicated_pps_error(ret)) {
srs_human_trace("ignoring duplicated pps, code=%d", ret);
}
else {
srs_human_trace("sending h264 raw data failed. ret=%d", ret);
return 0;
}
}
return 1;
}
If you would like to download the original flower.bmp file, here is the link: https://gofile.io/d/w2kX56
This error can be reproduced in any other bmp file also.
Please tell me what is causing this problem when multi-threading is enabled. Am I setting wrong values? Is the code in which I am streaming the encoded data wrong?
I'm trying to create a C++ program with Waveform Audio library that would be playing AudioFrames (raw audio data, each frame consists of about 1920 bytes) provided by another program (right now I'm just simulating that by reading file as AudioFrames). Modifying code from this thread I was able to make SoundPlayer class that does the job, but the output I get is extremely choppy. It's gets better with bigger frame sizes, but even with frames as big as 96000 bytes the audio still glitches every second or so (and I need the frames too be much smaller than that).
How can I fix this issue?
Here is the test file I'm using. And here is the code itself:
#include <windows.h>
#include <iostream>
#pragma comment(lib, "Winmm.lib")
constexpr int FRAME_SIZE_IN_BYTES = 1920;
struct AudioFrame
{
char *Data;
int DataSize;
};
class SoundPlayer
{
public:
SoundPlayer()
{
// Initialize the sound format we will request from sound card
m_waveFormat.wFormatTag = WAVE_FORMAT_PCM; // Uncompressed sound format
m_waveFormat.nChannels = 1; // 1 = Mono, 2 = Stereo
m_waveFormat.wBitsPerSample = 16; // Bits per sample per channel
m_waveFormat.nSamplesPerSec = 48000; // Sample Per Second
m_waveFormat.nBlockAlign = m_waveFormat.nChannels * m_waveFormat.wBitsPerSample / 8;
m_waveFormat.nAvgBytesPerSec = m_waveFormat.nSamplesPerSec * m_waveFormat.nBlockAlign;
m_waveFormat.cbSize = 0;
}
void Play(AudioFrame* af)
{
// Create our "Sound is Done" event
m_done = CreateEvent(0, FALSE, FALSE, 0);
// Open the audio device
if (waveOutOpen(&m_waveOut, 0, &m_waveFormat, (DWORD)m_done, 0, CALLBACK_EVENT) != MMSYSERR_NOERROR)
{
std::cout << "Sound card cannot be opened." << std::endl;
return;
}
// Create the wave header for our sound buffer
m_waveHeader.lpData = af->Data;
m_waveHeader.dwBufferLength = af->DataSize;
m_waveHeader.dwFlags = 0;
m_waveHeader.dwLoops = 0;
// Prepare the header for playback on sound card
if (waveOutPrepareHeader(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error preparing Header!" << std::endl;
return;
}
ResetEvent(m_done); // Reset our Event so it is non-signaled, it will be signaled again with buffer finished
// Play the sound!
if (waveOutWrite(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error writing to sound card!" << std::endl;
return;
}
// Wait until sound finishes playing
if (WaitForSingleObject(m_done, INFINITE) != WAIT_OBJECT_0)
{
std::cout << "Error waiting for sound to finish" << std::endl;
return;
}
// Unprepare our wav header
if (waveOutUnprepareHeader(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error unpreparing header!" << std::endl;
return;
}
// Close the wav device
if (waveOutClose(m_waveOut) != MMSYSERR_NOERROR)
{
std::cout << "Sound card cannot be closed!" << std::endl;
return;
}
// Release our event handle
CloseHandle(m_done);
}
private:
HWAVEOUT m_waveOut; // Handle to sound card output
WAVEFORMATEX m_waveFormat; // The sound format
WAVEHDR m_waveHeader; // WAVE header for our sound data
HANDLE m_done; // Event Handle that tells us the sound has finished being played.
// This is a very efficient way to put the program to sleep
// while the sound card is processing the sound buffer
};
int main()
{
FILE * fileDes;
fopen_s(&fileDes, "Ducksauce.raw", "rb");
if (fileDes == nullptr)
std::cout << "File opening failed.\n";
int bufferSize = FRAME_SIZE_IN_BYTES;
char *buffer = new char[bufferSize];
SoundPlayer sp;
while (fread(buffer, sizeof(char), bufferSize, fileDes) > 0)
{
AudioFrame af;
af.Data = buffer;
af.DataSize = bufferSize;
sp.Play(&af);
}
fclose(fileDes);
delete[] buffer;
return 0;
}
Edit: Version number 2. Still doesn't work as intended.
#include <windows.h>
#include <iostream>
#pragma comment(lib, "Winmm.lib")
constexpr int FRAME_SIZE_IN_BYTES = 1920;
struct AudioFrame
{
char *Data;
int DataSize;
};
class SoundPlayer
{
public:
SoundPlayer()
{
// Initialize the sound format we will request from sound card
m_waveFormat.wFormatTag = WAVE_FORMAT_PCM; // Uncompressed sound format
m_waveFormat.nChannels = 1; // 1 = Mono, 2 = Stereo
m_waveFormat.wBitsPerSample = 16; // Bits per sample per channel
m_waveFormat.nSamplesPerSec = 48000; // Sample Per Second
m_waveFormat.nBlockAlign = m_waveFormat.nChannels * m_waveFormat.wBitsPerSample / 8;
m_waveFormat.nAvgBytesPerSec = m_waveFormat.nSamplesPerSec * m_waveFormat.nBlockAlign;
m_waveFormat.cbSize = 0;
// Create our "Sound is Done" event
m_done = CreateEvent(0, FALSE, FALSE, 0);
// Open the audio device
if (waveOutOpen(&m_waveOut, 0, &m_waveFormat, (DWORD)m_done, 0, CALLBACK_EVENT) != MMSYSERR_NOERROR)
{
std::cout << "Sound card cannot be opened." << std::endl;
return;
}
}
~SoundPlayer()
{
// Close the wav device
if (waveOutClose(m_waveOut) != MMSYSERR_NOERROR)
{
std::cout << "Sound card cannot be closed!" << std::endl;
return;
}
// Release our event handle
CloseHandle(m_done);
}
void StartPlaying(AudioFrame* af)
{
// Create the wave header for our sound buffer
m_waveHeader.lpData = af->Data;
m_waveHeader.dwBufferLength = af->DataSize;
m_waveHeader.dwFlags = 0;
m_waveHeader.dwLoops = 0;
// Prepare the header for playback on sound card
if (waveOutPrepareHeader(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error preparing Header!" << std::endl;
return;
}
ResetEvent(m_done); // Reset our Event so it is non-signaled, it will be signaled again with buffer finished
// Play the sound!
if (waveOutWrite(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error writing to sound card!" << std::endl;
return;
}
}
void WaitUntilFrameFinishes()
{
// Wait until sound finishes playing
if (WaitForSingleObject(m_done, INFINITE) != WAIT_OBJECT_0)
{
std::cout << "Error waiting for sound to finish" << std::endl;
return;
}
// Unprepare our wav header
if (waveOutUnprepareHeader(m_waveOut, &m_waveHeader, sizeof(m_waveHeader)) != MMSYSERR_NOERROR)
{
std::cout << "Error unpreparing header!" << std::endl;
return;
}
}
private:
HWAVEOUT m_waveOut; // Handle to sound card output
WAVEFORMATEX m_waveFormat; // The sound format
WAVEHDR m_waveHeader; // WAVE header for our sound data
HANDLE m_done; // Event Handle that tells us the sound has finished being played.
// This is a very efficient way to put the program to sleep
// while the sound card is processing the sound buffer
};
int main()
{
FILE * fileDes;
fopen_s(&fileDes, "Ducksauce.raw", "rb");
if (fileDes == nullptr)
std::cout << "File opening failed.\n";
int bufferSize = FRAME_SIZE_IN_BYTES;
char *buffer = new char[bufferSize];
SoundPlayer sp;
// Read first time
fread(buffer, sizeof(char), bufferSize, fileDes);
while (true)
{
AudioFrame af;
af.Data = buffer;
af.DataSize = bufferSize;
// Start playing, but don't block
sp.StartPlaying(&af);
// Prepare the next chunk
if (fread(buffer, sizeof(char), bufferSize, fileDes) <= 0)
break;
// Now block the code, waiting with next chunk already loaded
// and ready to be played in the next iteration.
sp.WaitUntilFrameFinishes();
}
fclose(fileDes);
delete[] buffer;
return 0;
}
Edit 2: It works if I add this before while:
for (int i = 0; i < 3; i++ )
{
fread(buffer, sizeof(char), bufferSize, fileDes);
af.Data = buffer;
af.DataSize = bufferSize;
sp.StartPlaying(&af);
}
Also I modified while a bit too:
while (true)
{
// Prepare the next chunk
if (fread(buffer, sizeof(char), bufferSize, fileDes) <= 0)
break;
// Now block the code, waiting with next chunk already loaded
// and ready to be played in the next iteration.
sp.WaitUntilFrameFinishes();
af.Data = buffer;
af.DataSize = bufferSize;
sp.StartPlaying(&af);
}
You should read the data from disk while the sound plays, not in between buffers!
If you can't read the whole file at once, you should change your Play function so that it doesn't just call WaitForSingleObject. Using it makes your code block and wait until the sound stops playing.
What you need instead is to start playing, then go back to your reading loop, prepare the next buffer, and then wait for the music to end, like so (in SoundPlayer):
void WaitUntilFrameFinishes() {
// Wait until sound finishes playing
if (WaitForSingleObject(m_done, INFINITE) != WAIT_OBJECT_0)
// ... move all the code from Play till the end here
}
Then back in the main loop:
// Read first frame
fread(buffer, sizeof(char), bufferSize, fileDes);
while (true)
{
AudioFrame af;
af.Data = buffer;
af.DataSize = bufferSize;
// Start playing, but don't block
sp.Play(&af);
// Prepare the next chunk
if (fread(buffer, sizeof(char), bufferSize, fileDes) <= 0) {
break;
// Now block the code, waiting with next chunk already loaded
// and ready to be played in the next iteration.
sp.WaitUntilFrameFinishes();
}
Ideally you'd also wrap the fread calls into something that can provide chunks in a nicer way.
After day of trying to figure out how to go about audio playback based on the documentation alone I found this excellent tutorial. If anyone found this thread while trying to create audio playback using Waveform audio it's a very good point of reference (surely much better than my buggy code above).
About my code I suspect it doesn't work correctly because one is supposed to keep the AudioFrames queue using waveOutWrite() with at least several frames at all time to prevent situation where sound card would have to wait for another AudioFrame.
I want to read and open a video in encoded domain without decoding. I have written the code up to now and it works without errors. But the output of the method av_read_frame() just gives number of zeros and same negative integer value is repeating.
I'm not sure whether I passed the parameters correctly to the method. Please help.
void CFfmpegmethods::VideoRead(){
av_register_all();
const char *url = "H:\\Sanduni_projects\\ad_1.mp4";
AVDictionary *options = NULL;
AVFormatContext *s = avformat_alloc_context(); //NULL;
//AVFormatContext *avfmt = NULL;
//avformat_alloc_context();
AVPacket pkt;
//AVFormatContext *avformat_alloc_context();
//AVIOContext *avio_alloc_context();
//open an input stream and read the header
int ret = avformat_open_input(&s, url, NULL, NULL);
//avformat_find_stream_info(s, &options); //finding the missing information
if (ret < 0)
abort();
av_dict_set(&options, "video_size", "640x480", 0);
av_dict_set(&options, "pixel_format", "rgb24", 0);
if (avformat_open_input(&s, url, NULL, &options) < 0){
abort();
}
av_dict_free(&options);
AVDictionaryEntry *e;
if (e = av_dict_get(options, "", NULL, AV_DICT_IGNORE_SUFFIX)) {
fprintf(stderr, "Option %s not recognized by the demuxer.\n", e->key);
abort();
}
//int i = 0;
while (1){
//Split what is stored in the file into frames and return one for each call
//returns the next frame of the stream
int frame = av_read_frame(s, &pkt);
//cout <<i << " " << frame << endl;
waitKey(30);
//i++;
}
//make the packet free
av_packet_unref(&pkt);
//Close the file after reading
avformat_close_input(&s);
}
Method av_read_frame() output zeros while reading the packets and after that gives negative values. In my code loop runs infinitely. Therefore gives infinite number of negative values.
This is the modified code
while (1){
//Split what is stored in the file into frames and return one for each call
//returns the next frame of the stream
int frame = av_read_frame(s, pkt);
duration = pkt->duration;
size = pkt->size;
total_size = total_size + size;
total_duration = total_duration + duration;
i++;
if (frame < 0) break;
cout << "frame" << i << " " << size << " "<< duration << endl;
}
im reading a udp-mjpeg-stream with the ffmpeg-API. When i read and display the Stream with an ARM-Processor i have 2 Problems:
1- The Applikation is too slow and there is a big delay between network cam and displayed video.
2- the memory usage increases every time when i call the function av_read_frame().
The Source code
const char *cam1_url = "udp://192.168.1.1:1234";
AVCodec *pCodec;
AVFrame *pFrame, *pFrameRGB;
AVCodecContext *pCodecCon;
AVDictionary *pUdpStreamOptions = NULL;
AVInputFormat *pMjpegFormat = av_find_input_format("mjpeg");
av_dict_set(&pUdpStreamOptions, "fifo_size", "5000000", 0);
av_register_all();
avdevice_register_all();
avcodec_register_all();
avformat_network_init();
AVFormatContext *pFormatCont = avformat_alloc_context();
if(avformat_open_input(&pFormatCont,cam1_url,pMjpegFormat,&pUdpStreamOptions) < 0)
{
cout << "!! Error !! - avformat_open_input(): failed to open input URL" << endl;
}
if(avformat_find_stream_info(pFormatCont,NULL) < 0)
{
cout << "!! Error !! - avformat_find_stream_info(), Failed to retrieve stream info" << endl;
}
av_dump_format(pFormatCont, 0, cam1_url, 0);
int videoStream;
for(int i=0; i< pFormatCont->nb_streams; i++)
{
if(pFormatCont->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
videoStream=i;
cout << " videoStream = " << videoStream << endl;
}
}
pCodecCon = pFormatCont->streams[videoStream]->codec;
pCodec = avcodec_find_decoder(pCodecCon->codec_id);
if(NULL == pCodec)
{
cout << "couldnt find codec" << endl;
return EXIT_FAILURE;
}
if(avcodec_open2(pCodecCon,pCodec,NULL) < 0)
{
cout << "!! Error !! - in avcodec_open2()" << endl;
return EXIT_FAILURE;
}
uint8_t *frameBuffer;
int numRxBytes = 0;
AVPixelFormat pFormat =AV_PIX_FMT_BGR24;
int width_rgb = (int)((float)pCodecCon->width);
int height_rgb = (int)((float)pCodecCon->height);
numRxBytes = avpicture_get_size(pFormat,width_rgb,height_rgb);
frameBuffer = (uint8_t *) av_malloc(numRxBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *) pFrameRGB, frameBuffer, pFormat,width_rgb,height_rgb);
AVPacket rx_pkt; // received packet
int frameFinished = 0;
struct SwsContext *imgConvertCtx;
av_init_packet(&rx_pkt);
while(av_read_frame(pFormatCont, &rx_pkt) >= 0)
{
if(rx_pkt.stream_index == videoStream)
{
av_frame_free(&pFrame);
pFrame = av_frame_alloc();
av_frame_free(&pFrameRGB);
pFrameRGB = av_frame_alloc();
avcodec_decode_video2(pCodecCon, pFrame, &frameFinished,&rx_pkt);
if(frameFinished)
{
imgConvertCtx = sws_getCachedContext(NULL, pFrame->width,pFrame->height, AV_PIX_FMT_YUVJ420P,width_rgb,height_rgb,AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL,NULL);
sws_scale(imgConvertCtx, ((AVPicture*)pFrame)->data, ((AVPicture*)pFrame)->linesize, 0, pCodecCon->height, ((AVPicture *)pFrameRGB)->data, ((AVPicture *)pFrameRGB)->linesize);
av_frame_unref(pFrame);
av_frame_unref(pFrameRGB);
}
}
av_free_packet(&rx_pkt);
av_packet_unref(&rx_pkt);
}
//cvDestroyWindow("Cam1Video");
av_free_packet(&rx_pkt);
avcodec_close(pCodecCon);
av_free(pFrame);
av_free(pFrameRGB);
avformat_close_input(&pFormatCont);
I have read, the reason could be that the ffmpeg-Libs saves the incomming frames in the cache but the arm-processor isnt fast enough to process them. After like 4 minutes the system craches.
How could i solve the Problem.
one option could be to tell ffmpeg to act as frame grabber, also to read frames in real time, with the flag "-re". How can i set this Flag in the c++ source code. Or can anybody help me to solve that Problem.
Thank you very much