ALSA driver's buffer is TOO large - c++

I'm using a USB sound card to create a real-time Linux player. The APP is developed based on ALSA's API.
I found that the driver's buffer is too large. I don't know how to reduce the buffer size via userspace.
For example, when I initialize ALSA by 16-bit stereo 44100Hz, the total buffer size will be 256 frames(snd_pcm_hw_params_get_period_size) * 1024 periods(snd_pcm_hw_params_get_periods). So that the total buffer time is 256*1024/44100=5.94(seconds). That's too large for my REAL-TIME use case!
my exp 01: I've been trying to reduce to periods counts by snd_pcm_hw_params_test_periods, but the driver's buffer size (frames*periods) is ALL THE SAME!
here's my sample code
bool init(const std::string &PCM_DEVICE, const int channels, const int rate) {
unsigned int pcm;
unsigned int tmp;
snd_pcm_hw_params_t* params = nullptr;
/* Open the PCM device in playback mode */
if (pcm = snd_pcm_open(&pcm_handle_, PCM_DEVICE.c_str(), SND_PCM_STREAM_PLAYBACK, 0) < 0) {
printf("ERROR: Can't open \"%s\" PCM device. %s\n", PCM_DEVICE, snd_strerror(pcm));
return false;
}
/* Allocate parameters object and fill it with default values*/
snd_pcm_hw_params_alloca(&params);
snd_pcm_hw_params_any(pcm_handle_, params);
/* Set parameters */
if (pcm = snd_pcm_hw_params_set_access(pcm_handle_, params,
SND_PCM_ACCESS_RW_INTERLEAVED) < 0)
printf("ERROR: Can't set interleaved mode. %s\n", snd_strerror(pcm));
if (pcm = snd_pcm_hw_params_set_format(pcm_handle_, params,
SND_PCM_FORMAT_S16_LE) < 0)
printf("ERROR: Can't set format. %s\n", snd_strerror(pcm));
if (pcm = snd_pcm_hw_params_set_channels(pcm_handle_, params, channels) < 0)
printf("ERROR: Can't set channels number. %s\n", snd_strerror(pcm));
if (pcm = snd_pcm_hw_params_set_rate_near(pcm_handle_, params, (unsigned int*)&rate, 0) < 0)
printf("ERROR: Can't set rate. %s\n", snd_strerror(pcm));
/* Write parameters */
if (pcm = snd_pcm_hw_params(pcm_handle_, params) < 0)
printf("ERROR: Can't set harware parameters. %s\n", snd_strerror(pcm));
/* Resume information */
printf("PCM name: '%s'\n", snd_pcm_name(pcm_handle_));
printf("PCM state: %s\n", snd_pcm_state_name(snd_pcm_state(pcm_handle_)));
snd_pcm_hw_params_get_channels(params, &tmp);
printf("channels: %i ", tmp);
if (tmp == 1)
printf("(mono)\n");
else if (tmp == 2)
printf("(stereo)\n");
snd_pcm_hw_params_get_rate(params, &tmp, 0);
printf("rate: %d bps\n", tmp);
/* Allocate buffer to hold single period */
snd_pcm_hw_params_get_period_size(params, &frames_, 0);
LOGI << "frames_ = " << frames_;
buffSize_ = frames_ * channels * 2 /* 2 -> sample size */;
buff_.resize(buffSize_);
snd_pcm_hw_params_get_period_time(params, &tmp, NULL);
snd_pcm_hw_params_get_periods(params, &tmp, NULL); // https://www.alsa-project.org/wiki/FramesPeriods
LOGI << "periods_ =" << tmp;
return true;
}

Related

ESP32 i2s_read returns empty buffer after calling this function

I am trying to record audio from an INMP441 which is connected to a ESP32 but returning the buffer containing the bytes the microphone read always leads to something which is NULL.
The code for setting up i2s and the microphone is this:
// i2s config
const i2s_config_t i2s_config = {
.mode = i2s_mode_t(I2S_MODE_MASTER | I2S_MODE_RX), // receive
.sample_rate = SAMPLE_RATE, // 44100 (44,1KHz)
.bits_per_sample = I2S_BITS_PER_SAMPLE_32BIT, // 32 bits per sample
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT, // use right channel
.communication_format = i2s_comm_format_t(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, // interrupt level 1
.dma_buf_count = 64, // number of buffers
.dma_buf_len = SAMPLES_PER_BUFFER}; // 512
// pin config
const i2s_pin_config_t pin_config = {
.bck_io_num = gpio_sck, // serial clock, sck (gpio 33)
.ws_io_num = gpio_ws, // word select, ws (gpio 32)
.data_out_num = I2S_PIN_NO_CHANGE, // only used for speakers
.data_in_num = gpio_sd // serial data, sd (gpio 34)
};
// config i2s driver and pins
// fct must be called before any read/write
esp_err_t err = i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
if (err != ESP_OK)
{
Serial.printf("Failed installing the driver: %d\n", err);
}
err = i2s_set_pin(I2S_PORT, &pin_config);
if (err != ESP_OK)
{
Serial.printf("Failed setting pin: %d\n", err);
}
Serial.println("I2S driver installed! :-)");
Setting up the i2s stuff is no problem at all. The tricky part for me is reading from the i2s:
// 44KHz * Byte per sample * time in seconds = total size in bytes
const size_t recordSize = (SAMPLE_RATE * I2S_BITS_PER_SAMPLE_32BIT / 8) * recordTime; //recordTime = 5s
// size in bytes
size_t totalReadSize = 0;
// 32 bits per sample set in config * 1024 samples per buffers = total bits per buffer
char *samples = (char *)calloc(totalBitsPerBuffer, sizeof(char));
// number of bytes read
size_t bytesRead;
Serial.println("Start recording...");
// read until wanted size is reached
while (totalReadSize < recordSize)
{
// read to buffer
esp_err_t err = i2s_read(I2S_PORT, (void *)samples, totalBitsPerBuffer, &bytesRead, portMAX_DELAY);
// check if error occurd, if so stop recording
if (err != ESP_OK)
{
Serial.println("Error while recording!");
break;
}
// check if bytes read works → yes
/*
for (int i = 0; i < bytesRead; i++)
{
uint8_t sample = (uint8_t) samples[i];
Serial.print(sample);
} */
// add read size to total read size
totalReadSize += bytesRead;
// Serial.printf("Currently recorded %d%% \n", totalReadSize * 100 / recordSize);
}
// convert bytes to mb
double_t totalReadSizeMB = (double_t)totalReadSize / 1e+6;
Serial.printf("Total read size: %fMb\n", totalReadSizeMB);
Serial.println("Samples deref");
Serial.println(*samples);
Serial.println("Samples");
Serial.println(samples);
return samples;
Using this code leads to the following output:
I2S driver installed! :-)
Start recording...
Total read size: 0.884736Mb
Samples deref
␀
Samples
When I uncomment the part where I iterate over the bytes read part I get something like this:
200224231255255224210022418725525522493000902552550238002241392542552241520020425225508050021624525501286700194120022461104022421711102242271030018010402242510000188970224141930022291022410185022487830021679001127500967200666902241776600246610224895902244757022418353002224802242274302249741022419339009435001223102242432602243322022412120001241402245911022418580084402248325525522461252255044249255224312452552242212372552241272352550342302552241212262552242112212550252216255014621325501682092550112205255224161202255224237198255224235194255224231922552248518725501141832550421812552241951762550144172255018168255034164255224173157255018215525522455152255028148255021014425505214025522487137255014613225522412112825502361252550180120255018011725522451172550252113255224133111255061082550248105255224891042552249910125522439972550138942552242279225503287255224101832552242478125522410178255224231732552244970255224336525501766225501426125502325625522424553255224109492550186[...]
This shows that the microphone is able to record, but I cant return the actual value of the buffer.
While programming this code I looked up at the official doku and some code which seems to work elsewhere.
I am also new to C++ and am not used to work with pointers.
Does anyone know what the problem could be?

Why does adding audio stream to ffmpeg's libavcodec output container cause a crash?

As it stands, my project correctly uses libavcodec to decode a video, where each frame is manipulated (it doesn't matter how) and output to a new video. I've cobbled this together from examples found online, and it works. The result is a perfect .mp4 of the manipulated frames, minus the audio.
My problem is, when I try to add an audio stream to the output container, I get a crash in mux.c that I can't explain. It's in static int compute_muxer_pkt_fields(AVFormatContext *s, AVStream *st, AVPacket *pkt). Where st->internal->priv_pts->val = pkt->dts; is attempted, priv_pts is nullptr.
I don't recall the version number, but this is from a November 4, 2020 ffmpeg build from git.
My MediaContentMgr is much bigger than what I have here. I'm stripping out everything to do with the frame manipulation, so if I'm missing anything, please let me know and I'll edit.
The code that, when added, triggers the nullptr exception, is called out inline
The .h:
#ifndef _API_EXAMPLE_H
#define _API_EXAMPLE_H
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include "glm/glm.hpp"
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
}
#include "shader_s.h"
class MediaContainerMgr {
public:
MediaContainerMgr(const std::string& infile, const std::string& vert, const std::string& frag,
const glm::vec3* extents);
~MediaContainerMgr();
void render();
bool recording() { return m_recording; }
// Major thanks to "shi-yan" who helped make this possible:
// https://github.com/shi-yan/videosamples/blob/master/libavmp4encoding/main.cpp
bool init_video_output(const std::string& video_file_name, unsigned int width, unsigned int height);
bool output_video_frame(uint8_t* buf);
bool finalize_output();
private:
AVFormatContext* m_format_context;
AVCodec* m_video_codec;
AVCodec* m_audio_codec;
AVCodecParameters* m_video_codec_parameters;
AVCodecParameters* m_audio_codec_parameters;
AVCodecContext* m_codec_context;
AVFrame* m_frame;
AVPacket* m_packet;
uint32_t m_video_stream_index;
uint32_t m_audio_stream_index;
void init_rendering(const glm::vec3* extents);
int decode_packet();
// For writing the output video:
void free_output_assets();
bool m_recording;
AVOutputFormat* m_output_format;
AVFormatContext* m_output_format_context;
AVCodec* m_output_video_codec;
AVCodecContext* m_output_video_codec_context;
AVFrame* m_output_video_frame;
SwsContext* m_output_scale_context;
AVStream* m_output_video_stream;
AVCodec* m_output_audio_codec;
AVStream* m_output_audio_stream;
AVCodecContext* m_output_audio_codec_context;
};
#endif
And, the hellish .cpp:
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "media_container_manager.h"
MediaContainerMgr::MediaContainerMgr(const std::string& infile, const std::string& vert, const std::string& frag,
const glm::vec3* extents) :
m_video_stream_index(-1),
m_audio_stream_index(-1),
m_recording(false),
m_output_format(nullptr),
m_output_format_context(nullptr),
m_output_video_codec(nullptr),
m_output_video_codec_context(nullptr),
m_output_video_frame(nullptr),
m_output_scale_context(nullptr),
m_output_video_stream(nullptr)
{
// AVFormatContext holds header info from the format specified in the container:
m_format_context = avformat_alloc_context();
if (!m_format_context) {
throw "ERROR could not allocate memory for Format Context";
}
// open the file and read its header. Codecs are not opened here.
if (avformat_open_input(&m_format_context, infile.c_str(), NULL, NULL) != 0) {
throw "ERROR could not open input file for reading";
}
printf("format %s, duration %lldus, bit_rate %lld\n", m_format_context->iformat->name, m_format_context->duration, m_format_context->bit_rate);
//read avPackets (?) from the avFormat (?) to get stream info. This populates format_context->streams.
if (avformat_find_stream_info(m_format_context, NULL) < 0) {
throw "ERROR could not get stream info";
}
for (unsigned int i = 0; i < m_format_context->nb_streams; i++) {
AVCodecParameters* local_codec_parameters = NULL;
local_codec_parameters = m_format_context->streams[i]->codecpar;
printf("AVStream->time base before open coded %d/%d\n", m_format_context->streams[i]->time_base.num, m_format_context->streams[i]->time_base.den);
printf("AVStream->r_frame_rate before open coded %d/%d\n", m_format_context->streams[i]->r_frame_rate.num, m_format_context->streams[i]->r_frame_rate.den);
printf("AVStream->start_time %" PRId64 "\n", m_format_context->streams[i]->start_time);
printf("AVStream->duration %" PRId64 "\n", m_format_context->streams[i]->duration);
printf("duration(s): %lf\n", (float)m_format_context->streams[i]->duration / m_format_context->streams[i]->time_base.den * m_format_context->streams[i]->time_base.num);
AVCodec* local_codec = NULL;
local_codec = avcodec_find_decoder(local_codec_parameters->codec_id);
if (local_codec == NULL) {
throw "ERROR unsupported codec!";
}
if (local_codec_parameters->codec_type == AVMEDIA_TYPE_VIDEO) {
if (m_video_stream_index == -1) {
m_video_stream_index = i;
m_video_codec = local_codec;
m_video_codec_parameters = local_codec_parameters;
}
m_height = local_codec_parameters->height;
m_width = local_codec_parameters->width;
printf("Video Codec: resolution %dx%d\n", m_width, m_height);
}
else if (local_codec_parameters->codec_type == AVMEDIA_TYPE_AUDIO) {
if (m_audio_stream_index == -1) {
m_audio_stream_index = i;
m_audio_codec = local_codec;
m_audio_codec_parameters = local_codec_parameters;
}
printf("Audio Codec: %d channels, sample rate %d\n", local_codec_parameters->channels, local_codec_parameters->sample_rate);
}
printf("\tCodec %s ID %d bit_rate %lld\n", local_codec->name, local_codec->id, local_codec_parameters->bit_rate);
}
m_codec_context = avcodec_alloc_context3(m_video_codec);
if (!m_codec_context) {
throw "ERROR failed to allocate memory for AVCodecContext";
}
if (avcodec_parameters_to_context(m_codec_context, m_video_codec_parameters) < 0) {
throw "ERROR failed to copy codec params to codec context";
}
if (avcodec_open2(m_codec_context, m_video_codec, NULL) < 0) {
throw "ERROR avcodec_open2 failed to open codec";
}
m_frame = av_frame_alloc();
if (!m_frame) {
throw "ERROR failed to allocate AVFrame memory";
}
m_packet = av_packet_alloc();
if (!m_packet) {
throw "ERROR failed to allocate AVPacket memory";
}
}
MediaContainerMgr::~MediaContainerMgr() {
avformat_close_input(&m_format_context);
av_packet_free(&m_packet);
av_frame_free(&m_frame);
avcodec_free_context(&m_codec_context);
glDeleteVertexArrays(1, &m_VAO);
glDeleteBuffers(1, &m_VBO);
}
bool MediaContainerMgr::advance_frame() {
while (true) {
if (av_read_frame(m_format_context, m_packet) < 0) {
// Do we actually need to unref the packet if it failed?
av_packet_unref(m_packet);
continue;
//return false;
}
else {
if (m_packet->stream_index == m_video_stream_index) {
//printf("AVPacket->pts %" PRId64 "\n", m_packet->pts);
int response = decode_packet();
av_packet_unref(m_packet);
if (response != 0) {
continue;
//return false;
}
return true;
}
else {
printf("m_packet->stream_index: %d\n", m_packet->stream_index);
printf(" m_packet->pts: %lld\n", m_packet->pts);
printf(" mpacket->size: %d\n", m_packet->size);
if (m_recording) {
int err = 0;
//err = avcodec_send_packet(m_output_video_codec_context, m_packet);
printf(" encoding error: %d\n", err);
}
}
}
// We're done with the packet (it's been unpacked to a frame), so deallocate & reset to defaults:
/*
if (m_frame == NULL)
return false;
if (m_frame->data[0] == NULL || m_frame->data[1] == NULL || m_frame->data[2] == NULL) {
printf("WARNING: null frame data");
continue;
}
*/
}
}
int MediaContainerMgr::decode_packet() {
// Supply raw packet data as input to a decoder
// https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga58bc4bf1e0ac59e27362597e467efff3
int response = avcodec_send_packet(m_codec_context, m_packet);
if (response < 0) {
char buf[256];
av_strerror(response, buf, 256);
printf("Error while receiving a frame from the decoder: %s\n", buf);
return response;
}
// Return decoded output data (into a frame) from a decoder
// https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html#ga11e6542c4e66d3028668788a1a74217c
response = avcodec_receive_frame(m_codec_context, m_frame);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
return response;
} else if (response < 0) {
char buf[256];
av_strerror(response, buf, 256);
printf("Error while receiving a frame from the decoder: %s\n", buf);
return response;
} else {
printf(
"Frame %d (type=%c, size=%d bytes) pts %lld key_frame %d [DTS %d]\n",
m_codec_context->frame_number,
av_get_picture_type_char(m_frame->pict_type),
m_frame->pkt_size,
m_frame->pts,
m_frame->key_frame,
m_frame->coded_picture_number
);
}
return 0;
}
bool MediaContainerMgr::init_video_output(const std::string& video_file_name, unsigned int width, unsigned int height) {
if (m_recording)
return true;
m_recording = true;
advance_to(0L); // I've deleted the implmentation. Just seeks to beginning of vid. Works fine.
if (!(m_output_format = av_guess_format(nullptr, video_file_name.c_str(), nullptr))) {
printf("Cannot guess output format.\n");
return false;
}
int err = avformat_alloc_output_context2(&m_output_format_context, m_output_format, nullptr, video_file_name.c_str());
if (err < 0) {
printf("Failed to allocate output context.\n");
return false;
}
//TODO(P0): Break out the video and audio inits into their own methods.
m_output_video_codec = avcodec_find_encoder(m_output_format->video_codec);
if (!m_output_video_codec) {
printf("Failed to create video codec.\n");
return false;
}
m_output_video_stream = avformat_new_stream(m_output_format_context, m_output_video_codec);
if (!m_output_video_stream) {
printf("Failed to find video format.\n");
return false;
}
m_output_video_codec_context = avcodec_alloc_context3(m_output_video_codec);
if (!m_output_video_codec_context) {
printf("Failed to create video codec context.\n");
return(false);
}
m_output_video_stream->codecpar->codec_id = m_output_format->video_codec;
m_output_video_stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
m_output_video_stream->codecpar->width = width;
m_output_video_stream->codecpar->height = height;
m_output_video_stream->codecpar->format = AV_PIX_FMT_YUV420P;
// Use the same bit rate as the input stream.
m_output_video_stream->codecpar->bit_rate = m_format_context->streams[m_video_stream_index]->codecpar->bit_rate;
m_output_video_stream->avg_frame_rate = m_format_context->streams[m_video_stream_index]->avg_frame_rate;
avcodec_parameters_to_context(m_output_video_codec_context, m_output_video_stream->codecpar);
m_output_video_codec_context->time_base = m_format_context->streams[m_video_stream_index]->time_base;
//TODO(P1): Set these to match the input stream?
m_output_video_codec_context->max_b_frames = 2;
m_output_video_codec_context->gop_size = 12;
m_output_video_codec_context->framerate = m_format_context->streams[m_video_stream_index]->r_frame_rate;
//m_output_codec_context->refcounted_frames = 0;
if (m_output_video_stream->codecpar->codec_id == AV_CODEC_ID_H264) {
av_opt_set(m_output_video_codec_context, "preset", "ultrafast", 0);
} else if (m_output_video_stream->codecpar->codec_id == AV_CODEC_ID_H265) {
av_opt_set(m_output_video_codec_context, "preset", "ultrafast", 0);
} else {
av_opt_set_int(m_output_video_codec_context, "lossless", 1, 0);
}
avcodec_parameters_from_context(m_output_video_stream->codecpar, m_output_video_codec_context);
m_output_audio_codec = avcodec_find_encoder(m_output_format->audio_codec);
if (!m_output_audio_codec) {
printf("Failed to create audio codec.\n");
return false;
}
I've commented out all of the audio stream init beyond this next line, because this is where
the trouble begins. Creating this output stream causes the null reference I mentioned. If I
uncomment everything below here, I still get the null deref. If I comment out this line, the
deref exception vanishes. (IOW, I commented out more and more code until I found that this
was the trigger that caused the problem.)
I assume that there's something I'm doing wrong in the rest of the commented out code, that,
when fixed, will fix the nullptr and give me a working audio stream.
m_output_audio_stream = avformat_new_stream(m_output_format_context, m_output_audio_codec);
if (!m_output_audio_stream) {
printf("Failed to find audio format.\n");
return false;
}
/*
m_output_audio_codec_context = avcodec_alloc_context3(m_output_audio_codec);
if (!m_output_audio_codec_context) {
printf("Failed to create audio codec context.\n");
return(false);
}
m_output_audio_stream->codecpar->codec_id = m_output_format->audio_codec;
m_output_audio_stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
m_output_audio_stream->codecpar->format = m_format_context->streams[m_audio_stream_index]->codecpar->format;
m_output_audio_stream->codecpar->bit_rate = m_format_context->streams[m_audio_stream_index]->codecpar->bit_rate;
m_output_audio_stream->avg_frame_rate = m_format_context->streams[m_audio_stream_index]->avg_frame_rate;
avcodec_parameters_to_context(m_output_audio_codec_context, m_output_audio_stream->codecpar);
m_output_audio_codec_context->time_base = m_format_context->streams[m_audio_stream_index]->time_base;
*/
//TODO(P2): Free assets that have been allocated.
err = avcodec_open2(m_output_video_codec_context, m_output_video_codec, nullptr);
if (err < 0) {
printf("Failed to open codec.\n");
return false;
}
if (!(m_output_format->flags & AVFMT_NOFILE)) {
err = avio_open(&m_output_format_context->pb, video_file_name.c_str(), AVIO_FLAG_WRITE);
if (err < 0) {
printf("Failed to open output file.");
return false;
}
}
err = avformat_write_header(m_output_format_context, NULL);
if (err < 0) {
printf("Failed to write header.\n");
return false;
}
av_dump_format(m_output_format_context, 0, video_file_name.c_str(), 1);
return true;
}
//TODO(P2): make this a member. (Thanks to https://emvlo.wordpress.com/2016/03/10/sws_scale/)
void PrepareFlipFrameJ420(AVFrame* pFrame) {
for (int i = 0; i < 4; i++) {
if (i)
pFrame->data[i] += pFrame->linesize[i] * ((pFrame->height >> 1) - 1);
else
pFrame->data[i] += pFrame->linesize[i] * (pFrame->height - 1);
pFrame->linesize[i] = -pFrame->linesize[i];
}
}
This is where we take an altered frame and write it to the output container. This works fine
as long as we haven't set up an audio stream in the output container.
bool MediaContainerMgr::output_video_frame(uint8_t* buf) {
int err;
if (!m_output_video_frame) {
m_output_video_frame = av_frame_alloc();
m_output_video_frame->format = AV_PIX_FMT_YUV420P;
m_output_video_frame->width = m_output_video_codec_context->width;
m_output_video_frame->height = m_output_video_codec_context->height;
err = av_frame_get_buffer(m_output_video_frame, 32);
if (err < 0) {
printf("Failed to allocate output frame.\n");
return false;
}
}
if (!m_output_scale_context) {
m_output_scale_context = sws_getContext(m_output_video_codec_context->width, m_output_video_codec_context->height,
AV_PIX_FMT_RGB24,
m_output_video_codec_context->width, m_output_video_codec_context->height,
AV_PIX_FMT_YUV420P, SWS_BICUBIC, nullptr, nullptr, nullptr);
}
int inLinesize[1] = { 3 * m_output_video_codec_context->width };
sws_scale(m_output_scale_context, (const uint8_t* const*)&buf, inLinesize, 0, m_output_video_codec_context->height,
m_output_video_frame->data, m_output_video_frame->linesize);
PrepareFlipFrameJ420(m_output_video_frame);
//TODO(P0): Switch m_frame to be m_input_video_frame so I don't end up using the presentation timestamp from
// an audio frame if I threadify the frame reading.
m_output_video_frame->pts = m_frame->pts;
printf("Output PTS: %d, time_base: %d/%d\n", m_output_video_frame->pts,
m_output_video_codec_context->time_base.num, m_output_video_codec_context->time_base.den);
err = avcodec_send_frame(m_output_video_codec_context, m_output_video_frame);
if (err < 0) {
printf(" ERROR sending new video frame output: ");
switch (err) {
case AVERROR(EAGAIN):
printf("AVERROR(EAGAIN): %d\n", err);
break;
case AVERROR_EOF:
printf("AVERROR_EOF: %d\n", err);
break;
case AVERROR(EINVAL):
printf("AVERROR(EINVAL): %d\n", err);
break;
case AVERROR(ENOMEM):
printf("AVERROR(ENOMEM): %d\n", err);
break;
}
return false;
}
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
pkt.flags |= AV_PKT_FLAG_KEY;
int ret = 0;
if ((ret = avcodec_receive_packet(m_output_video_codec_context, &pkt)) == 0) {
static int counter = 0;
printf("pkt.key: 0x%08x, pkt.size: %d, counter:\n", pkt.flags & AV_PKT_FLAG_KEY, pkt.size, counter++);
uint8_t* size = ((uint8_t*)pkt.data);
printf("sizes: %d %d %d %d %d %d %d %d %d\n", size[0], size[1], size[2], size[2], size[3], size[4], size[5], size[6], size[7]);
av_interleaved_write_frame(m_output_format_context, &pkt);
}
printf("push: %d\n", ret);
av_packet_unref(&pkt);
return true;
}
bool MediaContainerMgr::finalize_output() {
if (!m_recording)
return true;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = nullptr;
pkt.size = 0;
for (;;) {
avcodec_send_frame(m_output_video_codec_context, nullptr);
if (avcodec_receive_packet(m_output_video_codec_context, &pkt) == 0) {
av_interleaved_write_frame(m_output_format_context, &pkt);
printf("final push:\n");
} else {
break;
}
}
av_packet_unref(&pkt);
av_write_trailer(m_output_format_context);
if (!(m_output_format->flags & AVFMT_NOFILE)) {
int err = avio_close(m_output_format_context->pb);
if (err < 0) {
printf("Failed to close file. err: %d\n", err);
return false;
}
}
return true;
}
EDIT
The call stack on the crash (which I should have included in the original question):
avformat-58.dll!compute_muxer_pkt_fields(AVFormatContext * s, AVStream * st, AVPacket * pkt) Line 630 C
avformat-58.dll!write_packet_common(AVFormatContext * s, AVStream * st, AVPacket * pkt, int interleaved) Line 1122 C
avformat-58.dll!write_packets_common(AVFormatContext * s, AVPacket * pkt, int interleaved) Line 1186 C
avformat-58.dll!av_interleaved_write_frame(AVFormatContext * s, AVPacket * pkt) Line 1241 C
CamBot.exe!MediaContainerMgr::output_video_frame(unsigned char * buf) Line 553 C++
CamBot.exe!main() Line 240 C++
If I move the call to avformat_write_header so it's immediately before the audio stream initialization, I still get a crash, but in a different place. The crash happens on line 6459 of movenc.c, where we have:
/* Non-seekable output is ok if using fragmentation. If ism_lookahead
* is enabled, we don't support non-seekable output at all. */
if (!(s->pb->seekable & AVIO_SEEKABLE_NORMAL) && // CRASH IS HERE
(!(mov->flags & FF_MOV_FLAG_FRAGMENT) || mov->ism_lookahead)) {
av_log(s, AV_LOG_ERROR, "muxer does not support non seekable output\n");
return AVERROR(EINVAL);
}
The exception is a nullptr exception, where s->pb is NULL. The call stack is:
avformat-58.dll!mov_init(AVFormatContext * s) Line 6459 C
avformat-58.dll!init_muxer(AVFormatContext * s, AVDictionary * * options) Line 407 C
[Inline Frame] avformat-58.dll!avformat_init_output(AVFormatContext *) Line 489 C
avformat-58.dll!avformat_write_header(AVFormatContext * s, AVDictionary * * options) Line 512 C
CamBot.exe!MediaContainerMgr::init_video_output(const std::string & video_file_name, unsigned int width, unsigned int height) Line 424 C++
CamBot.exe!main() Line 183 C++
Please note that you should always try to provide a self-contained minimal working example to make it easier for others to help. With the actual code, the matching FFmpeg version, and an input video that triggers the segmentation fault (to be sure), the issue would be a matter of analyzing the control flow to identify why st->internal->priv_pts was not allocated. Without the full scenario, I have to report to making assumptions that may or may not correspond to your actual code.
Based on your description, I attempted to reproduce the issue by cloning https://github.com/FFmpeg/FFmpeg.git and creating a new branch from commit b52e0d95 (November 4, 2020) to approximate your FFmpeg version.
I recreated your scenario using the provided code snippets by
including the avformat_new_stream() call for the audio stream
keeping the remaining audio initialization commented out
including the original avformat_write_header() call site (unchanged order)
With that scenario, the video write with MP4 video/audio input fails in avformat_write_header():
[mp4 # 0x2b39f40] sample rate not set 0
The call stack of the error location:
#0 0x00007ffff75253d7 in raise () from /lib64/libc.so.6
#1 0x00007ffff7526ac8 in abort () from /lib64/libc.so.6
#2 0x000000000094feca in init_muxer (s=0x2b39f40, options=0x0) at libavformat/mux.c:309
#3 0x00000000009508f4 in avformat_init_output (s=0x2b39f40, options=0x0) at libavformat/mux.c:490
#4 0x0000000000950a10 in avformat_write_header (s=0x2b39f40, options=0x0) at libavformat/mux.c:514
[...]
In init_muxer(), the sample rate in the stream parameters is checked unconditionally:
case AVMEDIA_TYPE_AUDIO:
if (par->sample_rate <= 0) {
av_log(s, AV_LOG_ERROR, "sample rate not set %d\n", par->sample_rate); abort();
ret = AVERROR(EINVAL);
goto fail;
}
That condition has been in effect since 2014-06-18 at the very least (didn't go back any further) and still exists. With a version from November 2020, the check must be active and the parameter must be set accordingly.
If I uncomment the remaining audio initialization, the situation remains unchanged (as expected). So, satisfy the condition, I added the missing parameter as follows:
m_output_audio_stream->codecpar->sample_rate =
m_format_context->streams[m_audio_stream_index]->codecpar->sample_rate;
With that, the check succeeds, avformat_write_header() succeeds, and the actual video write succeeds.
As you indicated in your question, the segmentation fault is caused by st->internal->priv_pts being NULL at this location:
#0 0x00000000009516db in compute_muxer_pkt_fields (s=0x2b39f40, st=0x2b3a580, pkt=0x7fffffffe2d0) at libavformat/mux.c:632
#1 0x0000000000953128 in write_packet_common (s=0x2b39f40, st=0x2b3a580, pkt=0x7fffffffe2d0, interleaved=1) at libavformat/mux.c:1125
#2 0x0000000000953473 in write_packets_common (s=0x2b39f40, pkt=0x7fffffffe2d0, interleaved=1) at libavformat/mux.c:1188
#3 0x0000000000953634 in av_interleaved_write_frame (s=0x2b39f40, pkt=0x7fffffffe2d0) at libavformat/mux.c:1243
[...]
In the FFmpeg code base, the allocation of priv_pts is handled by init_pts() for all streams referenced by the context. init_pts() has two call sites:
libavformat/mux.c:496:
if (s->oformat->init && ret) {
if ((ret = init_pts(s)) < 0)
return ret;
return AVSTREAM_INIT_IN_INIT_OUTPUT;
}
libavformat/mux.c:530:
if (!s->internal->streams_initialized) {
if ((ret = init_pts(s)) < 0)
goto fail;
}
In both cases, the calls are triggered by avformat_write_header() (indirectly via avformat_init_output() for the first, directly for the second). According to control flow analysis, there's no success case that would leave priv_pts unallocated.
Considering a high probability that our versions of FFmpeg are compatible in terms of behavior, I have to assume that 1) the sample rate must be provided for audio streams and 1) priv_pts is always allocated by avformat_write_header() in the absence of errors. Therefore, two possible root causes come to mind:
Your stream is not an audio stream (unlikely; the type is based on the codec, which in turn is based on the output file extension - assuming mp4)
You do not call avformat_write_header() (unlikely) or do not handle the error in the caller of your C++ member function (the return value of avformat_write_header() is checked but I do not have code corresponding to the caller of the C++ member function; your actual code might differ significantly from the code provided, so it's possible and the only plausible conclusion that can be drawn from available data)
The solution: Ensure that processing does not continue if avformat_write_header() fails. By adding the audio stream, avformat_write_header() starts to fail unless you set the stream sample rate. If the error is ignored, av_interleaved_write_frame() triggers a segmentation fault by accessing the unallocated st->internal->priv_pts.
As mentioned initially, scenario is incomplete. If you do call avformat_write_header() and stop processing in case of an error (meaning you do not call av_interleaved_write_frame()), more information is needed. As it stands now, that is unlikely. For further analysis, the executable output (stdout, stderr) is required to see your traces and FFmpeg log messages. If that does not reveal new information, a self-contained minimal working example and the video input are needed to get all the full picture.

FFmpeg Opus choppy sound UPDATED DESCRIPTION

I'm using FFmpeg and try to encode and decode a raw PCM sound to Opus using a built-in FFmpeg "opus" codec. My input samples are raw PCM 8000 Hz 16 bit mono, in AV_SAMPLE_FMT_S16 format. Since Opus requires sample format AV_SAMPLE_FMT_FLTP and sample rate 48000 Hz only, so I resample my samples before encode them.
I have two instances of ResamplerAudio class that does the work of resampling audio samples and has a member of SwrContext, I use the first instance of ResamplerAudio for resampling a raw PCM input audio before encoding and the second for resampling decoded audio to get it's format and sample rate the same as source values of input raw audio.
ResamplerAudio class has a function that init it's SwrContext member like this:
void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
swrContext = swr_alloc();
if (!swrContext)
{
LOGE(TAG, "[init] Couldn't allocate swr context");
return;
}
av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt, 0);
int ret = swr_init(swrContext);
if (ret < 0)
{
LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
return;
}
LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}
And I call ResamplerAudio::init function for the first instance of ResamplerAudio (this instance do resamping a raw PCM input audio before encoding and I called it resamplerEncoder) with the following args:
resamplerEncoder->init(contextEncoder, 8000, 48000, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP);
The second instance of ResamplerAudio (this instance do resamping after decoding audio from Opus and I called it resamplerDecoder) I init with the following args:
resamplerDecoder->init(contextDecoder, 48000, 8000, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16);
The function of ResamplerAudio that does resampling looks like this:
std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
std::vector<uint8_t> result;
uint8_t *dstData = NULL;
const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);
if (dstBufSize <= 0) return result;
std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));
return result;
}
And I call ResamplerAudio::convert function before encoding with the following args:
// data - an array of raw pcm audio
// dataLength - the length of data array
// getSamplesCount() - function that calculates samples count
// frameEncode - AVFrame that using for encode audio
std::vector<uint8_t> resampledData = resamplerEncoder->convert(&data, getSamplesCount(dataLength, frameEncode->channels, AV_SAMPLE_FMT_S16), frameEncode->channels, frameEncode->format);
getSamplesCount() function looks like this:
getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
return bytesCount / av_get_bytes_per_sample(format) / channels;
}
After that I fill my frameEncode with resampled samples:
memcpy(&frame->data[0][0], &resampledData[0], sizeof(uint8_t) * resampledDataLength);
And pass frameEncode to encoding like this encodeFrame(resampledDataLength):
void encodeFrame(int dataLength)
{
/* send the frame for encoding */
int ret = avcodec_send_frame(contextEncoder, frameEncode);
if (ret < 0)
{
LOGE(TAG, "[encodeFrame] avcodec_send_frame error: %s", av_err2str(ret));
return;
}
/* read all the available output packets (in general there may be any number of them */
while (ret >= 0)
{
ret = avcodec_receive_packet(contextEncoder, packetEncode);
if (ret < 0 && ret != AVERROR(EAGAIN)) LOGE(TAG, "[encodeFrame] error in avcodec_receive_packet: %s", av_err2str(ret));
if (ret < 0) break;
// encodedData - std::vector<uint8_t> that stores encoded data
std::copy(&packetEncode->data[0], &packetEncode->data[dataLength], std::back_inserter(encodedData));
av_packet_unref(packetEncode);
}
}
Then I decode my encoded samples and do resampling to get back them in source sample format and sample rate so I call ResamplerAudio::convert function for resamplerDecoder with the following args:
// frameDecode - AVFrame that holds decoded audio
std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
And result sound is choppy and I also noticed that the decoded array size is bigger than the source array size with raw pcm audio.
Please any ideas what I'm doing wrong?
UPD 18.05.2020
I tested my resampling logic, I did resampling of raw pcm sound without any encoding and decoding routines. First I tried to convert the sample rate of input sound from 8000 Hz to 48000 Hz than I took resampled samples from step above and convert it's sample rate from 48000 Hz to 8000 Hz and the result sound is perfect and clean, also I did the same steps but I converted not a sample rate but a sample format from AV_SAMPLE_FMT_S16 to AV_SAMPLE_FMT_FLTP and vice versa and again the result sound is perfect and clean, also I got the same result when I coverted both a sample rate and a sample format.
So I assume that the problem of distorted and choppy sound is in my encoding or decoding routine, I think most likely in decoding routine because after decoding I ALWAYS get AVFrame with 960 nb_samples despite what was the size of input sound.
My decoding routine looks like this:
std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
decodedData.clear();
int dataSize = dataLength;
while (dataSize > 0)
{
if (!frameDecode)
{
frameDecode = av_frame_alloc();
if (!frameDecode)
{
LOGE(TAG, "[decode] Couldn't allocate the frame");
return EMPTY_DATA;
}
}
ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
return EMPTY_DATA;
}
data += ret;
dataSize -= ret;
doDecode();
}
return decodedData;
}
void doDecode()
{
if (packetDecode->size) {
/* send the packet with the compressed data to the decoder */
int ret = avcodec_send_packet(contextDecoder, packetDecode);
if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));
/* read all the output frames (in general there may be any number of them */
while (ret >= 0)
{
ret = avcodec_receive_frame(contextDecoder, frameDecode);
if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
if (ret < 0) break;
std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
if (!resampledData.size()) continue;
std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
}
}
}
UPD 30.05.2020
I decided to refuse to use FFmpeg in my project and use libopus 1.3.1 instead, so I made a wrapper around it and it works fine.

Signed 16-bit ALSA PCM data to U8 Conversion on Linux

I'm attempting to convert 16-bit ALSA PCM Samples to Unsigned 8-bit PCM samples for wireless transmission on Linux. The receiving machine is playing the transmitted data successfully and the recorded voice is there and recognizable, but the quality is terrible and noisy. I've tried ALSA mixer on both ends to tune the stream but it doesn't seem to get much better with that. I believe there is something wrong with my conversion of the samples to 8-bit PCM but its just a simple shift so I'm not sure what could be the error. Does anyone have any suggestions or see anything wrong with my conversion code? Thanks.
Conversion Code:
// This byte array needs to be the packet size we wish to send
QByteArray prepareToSend;
prepareToSend.clear();
// Keep reading from ALSA until we fill one full frame
int frames = 1;
while ( prepareToSend.size() < TARGET_TX_BUFFER_SIZE ) {
// Create a ByteArray
QByteArray readBytes;
readBytes.resize(size);
// Read with ALSA
short sample[1]; // Data is signed 16-bit
int rc = snd_pcm_readi(m_PlaybackHandle, sample, frames);
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred\n");
snd_pcm_prepare(m_PlaybackHandle);
} else if (rc < 0) {
fprintf(stderr,
"Error from read: %s\n",
snd_strerror(rc));
} else if (rc != (int)frames) {
fprintf(stderr, "Short read, read %d frames\n", rc);
}
else {
// Copy bytes to the prepare to send buffer
//qDebug() << "Bytes for sample buffer: " << sizeof(sample);
prepareToSend.append((qint16)(sample[0]) >> 8); // signed 16-bit becomes u8
}
}
ALSA Configuration:
// Setup parameters
int size;
snd_pcm_t *m_PlaybackHandle;
snd_pcm_hw_params_t *m_HwParams;
char *buffer;
qDebug() << "Desire to Transmit Data - Setting up ALSA Now....";
// Error handling
int err;
// Device to Write to
const char *snd_device_in = "hw:1,0";
if ((err = snd_pcm_open (&m_PlaybackHandle, snd_device_in, SND_PCM_STREAM_CAPTURE, 0)) < 0) {
fprintf (stderr, "Cannot open audio device %s (%s)\n",
snd_device_in,
snd_strerror (err));
exit (1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&m_HwParams);
if ((err = snd_pcm_hw_params_malloc (&m_HwParams)) < 0) {
fprintf (stderr, "Cannot allocate hardware parameter structure (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_any (m_PlaybackHandle, m_HwParams)) < 0) {
fprintf (stderr, "Cannot initialize hardware parameter structure (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_access (m_PlaybackHandle, m_HwParams, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) {
fprintf (stderr, "Cannot set access type (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_format(m_PlaybackHandle, m_HwParams, SND_PCM_FORMAT_S16)) < 0) { // Has to be 16 bit
fprintf (stderr, "Cannot set sample format (%s)\n",
snd_strerror (err));
exit (1);
}
uint sample_rate = 8000;
if ((err = snd_pcm_hw_params_set_rate (m_PlaybackHandle, m_HwParams, sample_rate, 0)) < 0) { // 8 KHz
fprintf (stderr, "Cannot set sample rate (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_channels (m_PlaybackHandle, m_HwParams, 1)) < 0) { // 1 Channel Mono
fprintf (stderr, "Cannot set channel count (%s)\n",
snd_strerror (err));
exit (1);
}
/*
Frames: samples x channels (i.e: stereo frames are composed of two samples, mono frames are composed of 1 sample,...)
Period: Number of samples tranferred after which the device acknowledges the transfer to the apllication (usually via an interrupt).
*/
/* Submit params to device */
if ((err = snd_pcm_hw_params(m_PlaybackHandle, m_HwParams)) < 0) {
fprintf (stderr, "Cannot set parameters (%s)\n",
snd_strerror (err));
exit (1);
}
/* Free the Struct */
snd_pcm_hw_params_free(m_HwParams);
// Flush handle prepare for record
snd_pcm_drop(m_PlaybackHandle);
if ((err = snd_pcm_prepare (m_PlaybackHandle)) < 0) {
fprintf (stderr, "cannot prepare audio interface for use (%s)\n",
snd_strerror (err));
exit (1);
}
qDebug() << "Done Setting up ALSA....";
// Prepare the device
if ((err = snd_pcm_prepare (m_PlaybackHandle)) < 0) {
fprintf (stderr, "cannot prepare audio interface for use (%s)\n",
snd_strerror (err));
exit (1);
}
(qint16)(sample[0]) >> 8 will convert signed linear 16-bit PCM to signed linear 8-bit PCM. If you want unsigned linear 8-bit then it would be ((quint16)sample[0] ^ 0x8000) >> 8.
Although 16-bit PCM is almost always on a linear scale, 8-bit PCM is more commonly on a log scale (either µ-law or A-law), and a lookup table is usually used for conversion. If you really do want linear 8-bit then you may want to first adjust the gain so that the peak is at 0 dBFS and use audio compression to reduce the dynamic range so that it will fit in 8 bits.
If you use plughw:1,0 instead of hw:1,0, you can just tell the device that you want SND_PCM_FORMAT_U8, and the samples will be converted automatically.
(This works also for µ-Law and A-Law.)

Convert raw PCM to FLAC?

EDIT: I've updated the code below to resemble the progress I have made. I'm trying to write the .wav header myself. The code does not work properly as of now, the audio is not being written to the file properly. The code does not contain any attempts to convert it to a .flac file yet.
I am using a Raspberry Pi (Debian Linux) to record audio with the ALSA library. The recording works fine, but I need to encode the input audio into the FLAC codec.
This is where I get lost. I have spent a considerable amount of time trying to figure out how to convert this raw data into FLAC, but I keep coming up with examples of how to convert .wav files into .flac files.
Here is the current (updated) code I have for recording audio with ALSA (it may be a bit rough, I'm still picking up C++):
// Use the newer ALSA API
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Riff
{
char chunkId[4]; // "RIFF" (assuming char is 8 bits)
int chunkSize; // (assuming int is 32 bits)
char format[4]; // "WAVE"
};
struct Format
{
char chunkId[4]; // "fmt "
int chunkSize;
short format; // assuming short is 16 bits
short numChannels;
int sampleRate;
int byteRate;
short align;
short bitsPerSample;
};
struct Data
{
char chunkId[4]; // "data"
int chunkSize; // length of data
char* data;
};
struct Wave // Actual structure of a PCM WAVE file
{
Riff riffHeader;
Format formatHeader;
Data dataHeader;
};
int main(int argc, char *argv[])
{
void saveWaveFile(struct Wave *waveFile);
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int sampleRate = 44100;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
char *device = (char*) "plughw:1,0";
//char *device = (char*) "default";
printf("Capture device is %s\n", device);
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, device, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0)
{
fprintf(stderr, "Unable to open PCM device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 2);
/* 44100 bits/second sampling rate (CD quality) */
snd_pcm_hw_params_set_rate_near(handle, params, &sampleRate, &dir);
/* Set period size to 32 frames. */
frames = 32;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "Unable to set HW parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
size = frames * 4; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params, &sampleRate, &dir);
loops = 5000000 / sampleRate;
while (loops > 0)
{
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE)
{
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred.\n");
snd_pcm_prepare(handle);
} else if (rc < 0)
{
fprintf(stderr, "Error from read: %s\n", snd_strerror(rc));
} else if (rc != (int)frames)
{
fprintf(stderr, "Short read, read %d frames.\n", rc);
}
if (rc != size) fprintf(stderr, "Short write: wrote %d bytes.\n", rc);
}
Wave wave;
strcpy(wave.riffHeader.chunkId, "RIFF");
wave.riffHeader.chunkSize = 36 + size;
strcpy(wave.riffHeader.format, "WAVE");
strcpy(wave.formatHeader.chunkId, "fmt");
wave.formatHeader.chunkSize = 16;
wave.formatHeader.format = 1; // PCM, other value indicates compression
wave.formatHeader.numChannels = 2; // Stereo
wave.formatHeader.sampleRate = sampleRate;
wave.formatHeader.byteRate = sampleRate * 2 * 2;
wave.formatHeader.align = 2 * 2;
wave.formatHeader.bitsPerSample = 16;
strcpy(wave.dataHeader.chunkId, "data");
wave.dataHeader.chunkSize = size;
wave.dataHeader.data = buffer;
saveWaveFile(&wave);
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
void saveWaveFile(struct Wave *waveFile)
{
FILE *file = fopen("test.wav", "wb");
size_t written;
if (file == NULL)
{
fprintf(stderr, "Cannot open file for writing.\n");
exit(1);
}
written = fwrite(waveFile, sizeof waveFile[0], 1, file);
fclose(file);
if (written < 1);
{
fprintf(stderr, "Writing to file failed, error %d.\n", written);
exit(1);
}
}
How would I go about converting the PCM data into the FLAC and save it to disk for later use? I have downloaded libflac-dev already and just need an example to go off of.
The way I am doing it right now:
./capture > test.raw // or ./capture > test.flac
The way it should be (program does everything for me):
./capture
If I understand the FLAC::Encoder::File documentation, you can do something like
#include <FLAC++/encoder.h>
FLAC::Encoder::File encoder;
encoder.init("outfile.flac");
encoder.process(buffer, samples);
encoder.finish();
where buffer is an array (of size samples) of 32-bit integer pointers.
Unfortunately, I know next to nothing about audio encoding so I can't speak for any other options. Good luck!
Please refer to the below code :
FLAC Encoder Test Code
This example is using a wav file as an input and then encodes it into FLAC.
As I understand, there is no major difference b/w WAV file and your RAW data, I think you can modify this code to directly read the "buffer" and convert it. You already have all the related information (Channel/Bitrate etc) so it should not be much of a problem to remove the WAV header reading code.
Please note: this is a modified version of the Flac Encoder sample from their git repo.
It includes some comments and hints on how to change it to OP's requirements, entire source for this will be a little bit long.
And do note that this is the C API, which tends to be a bit more complex than the C++ one. But it is fairly easy to convert between the two once you get the idea.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "share/compat.h"
#include "FLAC/metadata.h"
#include "FLAC/stream_encoder.h"
/* this call back is what tells your program the progress that the encoder has made */
static void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data);
#define READSIZE 1024
static unsigned total_samples = 0; /* can use a 32-bit number due to WAVE size limitations */
/* buffer is where we record to, in your case what ALSA writes to */
/* Note the calculation here to take the total bytes that the buffer takes */
static FLAC__byte buffer[READSIZE/*samples*/ * 2/*bytes_per_sample*/ * 2/*channels*/];
/* pcm is input to FLAC encoder */
/* the PCM data should be here, bps is 4 here...but we are allocating ints! */
static FLAC__int32 pcm[READSIZE/*samples*/ * 2/*channels*/];
int main(int argc, char *argv[])
{
FLAC__bool ok = true;
FLAC__StreamEncoder *encoder = 0;
FLAC__StreamEncoderInitStatus init_status;
FLAC__StreamMetadata *metadata[2];
FLAC__StreamMetadata_VorbisComment_Entry entry;
FILE *fin;
unsigned sample_rate = 0;
unsigned channels = 0;
unsigned bps = 0;
if((fin = fopen(argv[1], "rb")) == NULL) {
fprintf(stderr, "ERROR: opening %s for output\n", argv[1]);
return 1;
}
/* set sample rate, bps, total samples to encode here, these are dummy values */
sample_rate = 44100;
channels = 2;
bps = 16;
total_samples = 5000;
/* allocate the encoder */
if((encoder = FLAC__stream_encoder_new()) == NULL) {
fprintf(stderr, "ERROR: allocating encoder\n");
fclose(fin);
return 1;
}
ok &= FLAC__stream_encoder_set_verify(encoder, true);
ok &= FLAC__stream_encoder_set_compression_level(encoder, 5);
ok &= FLAC__stream_encoder_set_channels(encoder, channels);
ok &= FLAC__stream_encoder_set_bits_per_sample(encoder, bps);
ok &= FLAC__stream_encoder_set_sample_rate(encoder, sample_rate);
ok &= FLAC__stream_encoder_set_total_samples_estimate(encoder, total_samples);
/* sample adds meta data here I've removed it for clarity */
/* initialize encoder */
if(ok) {
/* client data is whats the progress_callback is called with, any objects you need to update on callback can be passed thru this pointer */
init_status = FLAC__stream_encoder_init_file(encoder, argv[2], progress_callback, /*client_data=*/NULL);
if(init_status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[init_status]);
ok = false;
}
}
/* read blocks of samples from WAVE file and feed to encoder */
if(ok) {
size_t left = (size_t)total_samples;
while(ok && left) {
/* record using ALSA and set SAMPLES_IN_BUFFER */
/* convert the packed little-endian 16-bit PCM samples from WAVE into an interleaved FLAC__int32 buffer for libFLAC */
/* why? because bps=2 means that we are dealing with short int(16 bit) samples these are usually signed if you do not explicitly say that they are unsigned */
size_t i;
for(i = 0; i < SAMPLES_IN_BUFFER*channels; i++) {
/* THIS. this isn't the only way to convert between formats, I do not condone this because at first the glance the code seems like it's processing two channels here, but it's not it's just copying 16bit data to an int array, I prefer to use proper type casting, none the less this works so... */
pcm[i] = (FLAC__int32)(((FLAC__int16)(FLAC__int8)buffer[2*i+1] << 8) | (FLAC__int16)buffer[2*i]);
}
/* feed samples to encoder */
ok = FLAC__stream_encoder_process_interleaved(encoder, pcm, SAMPLES_IN_BUFFER);
left-=SAMPLES_IN_BUFFER;
}
}
ok &= FLAC__stream_encoder_finish(encoder);
fprintf(stderr, "encoding: %s\n", ok? "succeeded" : "FAILED");
fprintf(stderr, " state: %s\n", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(encoder)]);
FLAC__stream_encoder_delete(encoder);
fclose(fin);
return 0;
}
/* the updates from FLAC's encoder system comes here */
void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data)
{
(void)encoder, (void)client_data;
fprintf(stderr, "wrote %" PRIu64 " bytes, %" PRIu64 "/%u samples, %u/%u frames\n", bytes_written, samples_written, total_samples, frames_written, total_frames_estimate);
}