Opus audio, opus_decode() always fills output buffer with zeros - c++

I slightly modified the opus provide sample file "trivial_example.c"
I modified it so instead of loading a pcm file from disk, I just create some noise using random numbers. Maybe that is the problem, but I dont see how.
I am using Visual Studio 2019. Windows SDK 10. Windows7 x64.
none of the opus functions return an error.
opus_encode() seems to work. It fills cbits, as expected.
But opus_decode() only fills the output buffer with zeros.
This is opus provided sample code, but it doesnt work. Is there some computer setup I needed to do before using opus.
Ive been trying to get opus to work for days. Please help. Thank you.
#define FRAME_SIZE 960
#define SAMPLE_RATE 48000
#define CHANNELS 2
#define APPLICATION OPUS_APPLICATION_AUDIO
#define BITRATE 64000
#define MAX_FRAME_SIZE 6*960
#define MAX_PACKET_SIZE (3*1276)
int main(int argc, char **argv)
{
char *inFile;
FILE *fin;
char *outFile;
FILE *fout;
opus_int16 in[FRAME_SIZE*CHANNELS];
opus_int16 out[MAX_FRAME_SIZE*CHANNELS];
unsigned char cbits[MAX_PACKET_SIZE];
int nbBytes;
/*Holds the state of the encoder and decoder */
OpusEncoder *encoder;
OpusDecoder *decoder;
int err;
/*Create a new encoder state */
encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &err);
if (err<0)
{
fprintf(stderr, "failed to create an encoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
/* Set the desired bit-rate. You can also set other parameters if needed.
The Opus library is designed to have good defaults, so only set
parameters you know you need. Doing otherwise is likely to result
in worse quality, but better. */
err = opus_encoder_ctl(encoder, OPUS_SET_BITRATE(BITRATE));
if (err<0)
{
fprintf(stderr, "failed to set bitrate: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
/* Create a new decoder state. */
decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
if (err<0)
{
fprintf(stderr, "failed to create decoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
////////////////////////////////////////////////////////////////////// create audio noise
#define RandZeroToOne ((float)rand() / RAND_MAX)
#define RandNegOneToOne (((float)rand() / RAND_MAX)*2.0-1.0)
opus_int16* pcmdata = (opus_int16*)calloc(65536, sizeof(opus_int16));
opus_int16* pDstData = (opus_int16*)calloc(65536, sizeof(opus_int16));
for(short i=0;i<6553;i++)
{
pcmdata[i] = (opus_int16)(RandZeroToOne*32767.0);
if((i&0x1)==0)
pcmdata[i] = -pcmdata[i];
int T=0;
}
//////////////////////////////////////////////////////////////////////
//while (1)
{
int i;
int frame_size;
/* Convert from little-endian ordering. */
for (i=0;i<CHANNELS*FRAME_SIZE;i++)
in[i]=pcmdata[2*i+1]<<8|pcmdata[2*i];
/* Encode the frame. */
nbBytes = opus_encode(encoder, in, FRAME_SIZE, cbits, MAX_PACKET_SIZE);;//<---------------
if (nbBytes<0)
{
fprintf(stderr, "encode failed: %s\n", opus_strerror(nbBytes));
return EXIT_FAILURE;
}
/* Decode the data. In this example, frame_size will be constant because
the encoder is using a constant frame size. However, that may not
be the case for all encoders, so the decoder must always check
the frame size returned. */
opus_decoder_ctl(decoder, OPUS_RESET_STATE);
frame_size = opus_decode(decoder, cbits, nbBytes, out, MAX_FRAME_SIZE, 0);//<---------------
if (frame_size<0)
{
fprintf(stderr, "decoder failed: %s\n", opus_strerror(frame_size));
return EXIT_FAILURE;
}
/* Convert to little-endian ordering. */
for(i=0;i<CHANNELS*frame_size;i++)
{
pcmdata[2*i]=out[i]&0xFF;
pcmdata[2*i+1]=(out[i]>>8)&0xFF;
}
}
opus_encoder_destroy(encoder);
opus_decoder_destroy(decoder);
return EXIT_SUCCESS;
}

Related

FFmpeg Opus choppy sound UPDATED DESCRIPTION

I'm using FFmpeg and try to encode and decode a raw PCM sound to Opus using a built-in FFmpeg "opus" codec. My input samples are raw PCM 8000 Hz 16 bit mono, in AV_SAMPLE_FMT_S16 format. Since Opus requires sample format AV_SAMPLE_FMT_FLTP and sample rate 48000 Hz only, so I resample my samples before encode them.
I have two instances of ResamplerAudio class that does the work of resampling audio samples and has a member of SwrContext, I use the first instance of ResamplerAudio for resampling a raw PCM input audio before encoding and the second for resampling decoded audio to get it's format and sample rate the same as source values of input raw audio.
ResamplerAudio class has a function that init it's SwrContext member like this:
void ResamplerAudio::init(AVCodecContext *codecContext, int inSampleRate, int outSampleRate, AVSampleFormat inSampleFmt, AVSampleFormat outSampleFmt)
{
swrContext = swr_alloc();
if (!swrContext)
{
LOGE(TAG, "[init] Couldn't allocate swr context");
return;
}
av_opt_set_int(swrContext, "in_channel_layout", (int64_t) codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "out_channel_layout", (int64_t) codecContext->channel_layout, 0);
av_opt_set_int(swrContext, "in_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "out_channel_count", codecContext->channels, 0);
av_opt_set_int(swrContext, "in_sample_rate", inSampleRate, 0);
av_opt_set_int(swrContext, "out_sample_rate", outSampleRate, 0);
av_opt_set_sample_fmt(swrContext, "in_sample_fmt", inSampleFmt, 0);
av_opt_set_sample_fmt(swrContext, "out_sample_fmt", outSampleFmt, 0);
int ret = swr_init(swrContext);
if (ret < 0)
{
LOGE(TAG, "[init] swr_init error: %s", av_err2str(ret));
return;
}
LOGD(TAG, "[init] success codecContext->channel_layout: %d; inSampleRate: %d; outSampleRate: %d; inSampleFmt: %d; outSampleFmt: %d", (int) codecContext->channel_layout, inSampleRate, outSampleRate, inSampleFmt, outSampleFmt);
}
And I call ResamplerAudio::init function for the first instance of ResamplerAudio (this instance do resamping a raw PCM input audio before encoding and I called it resamplerEncoder) with the following args:
resamplerEncoder->init(contextEncoder, 8000, 48000, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP);
The second instance of ResamplerAudio (this instance do resamping after decoding audio from Opus and I called it resamplerDecoder) I init with the following args:
resamplerDecoder->init(contextDecoder, 48000, 8000, AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_S16);
The function of ResamplerAudio that does resampling looks like this:
std::vector<uint8_t> ResamplerAudio::convert(uint8_t **inData, int inSamplesCount, int outChannels, int outFormat)
{
std::vector<uint8_t> result;
uint8_t *dstData = NULL;
const int dstNbSamples = swr_get_out_samples(swrContext, inSamplesCount);
av_samples_alloc(&dstData, NULL, outChannels, dstNbSamples, AVSampleFormat(outFormat), 1);
int resampledSize = swr_convert(swrContext, &dstData, dstNbSamples, (const uint8_t **)inData, inSamplesCount);
int dstBufSize = av_samples_get_buffer_size(NULL, outChannels, resampledSize, AVSampleFormat(outFormat), 1);
if (dstBufSize <= 0) return result;
std::copy(&dstData[0], &dstData[dstBufSize], std::back_inserter(result));
return result;
}
And I call ResamplerAudio::convert function before encoding with the following args:
// data - an array of raw pcm audio
// dataLength - the length of data array
// getSamplesCount() - function that calculates samples count
// frameEncode - AVFrame that using for encode audio
std::vector<uint8_t> resampledData = resamplerEncoder->convert(&data, getSamplesCount(dataLength, frameEncode->channels, AV_SAMPLE_FMT_S16), frameEncode->channels, frameEncode->format);
getSamplesCount() function looks like this:
getSamplesCount(int bytesCount, int channels, AVSampleFormat format)
{
return bytesCount / av_get_bytes_per_sample(format) / channels;
}
After that I fill my frameEncode with resampled samples:
memcpy(&frame->data[0][0], &resampledData[0], sizeof(uint8_t) * resampledDataLength);
And pass frameEncode to encoding like this encodeFrame(resampledDataLength):
void encodeFrame(int dataLength)
{
/* send the frame for encoding */
int ret = avcodec_send_frame(contextEncoder, frameEncode);
if (ret < 0)
{
LOGE(TAG, "[encodeFrame] avcodec_send_frame error: %s", av_err2str(ret));
return;
}
/* read all the available output packets (in general there may be any number of them */
while (ret >= 0)
{
ret = avcodec_receive_packet(contextEncoder, packetEncode);
if (ret < 0 && ret != AVERROR(EAGAIN)) LOGE(TAG, "[encodeFrame] error in avcodec_receive_packet: %s", av_err2str(ret));
if (ret < 0) break;
// encodedData - std::vector<uint8_t> that stores encoded data
std::copy(&packetEncode->data[0], &packetEncode->data[dataLength], std::back_inserter(encodedData));
av_packet_unref(packetEncode);
}
}
Then I decode my encoded samples and do resampling to get back them in source sample format and sample rate so I call ResamplerAudio::convert function for resamplerDecoder with the following args:
// frameDecode - AVFrame that holds decoded audio
std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
And result sound is choppy and I also noticed that the decoded array size is bigger than the source array size with raw pcm audio.
Please any ideas what I'm doing wrong?
UPD 18.05.2020
I tested my resampling logic, I did resampling of raw pcm sound without any encoding and decoding routines. First I tried to convert the sample rate of input sound from 8000 Hz to 48000 Hz than I took resampled samples from step above and convert it's sample rate from 48000 Hz to 8000 Hz and the result sound is perfect and clean, also I did the same steps but I converted not a sample rate but a sample format from AV_SAMPLE_FMT_S16 to AV_SAMPLE_FMT_FLTP and vice versa and again the result sound is perfect and clean, also I got the same result when I coverted both a sample rate and a sample format.
So I assume that the problem of distorted and choppy sound is in my encoding or decoding routine, I think most likely in decoding routine because after decoding I ALWAYS get AVFrame with 960 nb_samples despite what was the size of input sound.
My decoding routine looks like this:
std::vector<uint8_t> decode(uint8_t *data, unsigned int dataLength)
{
decodedData.clear();
int dataSize = dataLength;
while (dataSize > 0)
{
if (!frameDecode)
{
frameDecode = av_frame_alloc();
if (!frameDecode)
{
LOGE(TAG, "[decode] Couldn't allocate the frame");
return EMPTY_DATA;
}
}
ret = av_parser_parse2(parser, contextDecoder, &packetDecode->data, &packetDecode->size, &data[0], dataSize, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
LOGE(TAG, "[decode] av_parser_parse2 error: %s", av_err2str(ret));
return EMPTY_DATA;
}
data += ret;
dataSize -= ret;
doDecode();
}
return decodedData;
}
void doDecode()
{
if (packetDecode->size) {
/* send the packet with the compressed data to the decoder */
int ret = avcodec_send_packet(contextDecoder, packetDecode);
if (ret < 0) LOGE(TAG, "[decode] avcodec_send_packet error: %s", av_err2str(ret));
/* read all the output frames (in general there may be any number of them */
while (ret >= 0)
{
ret = avcodec_receive_frame(contextDecoder, frameDecode);
if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) LOGE(TAG, "[decode] avcodec_receive_frame error: %s", av_err2str(ret));
if (ret < 0) break;
std::vector<uint8_t> resampledData = resamplerDecoder->convert(frameDecode->data, frameDecode->nb_samples, frameDecode->channels, AV_SAMPLE_FMT_S16);
if (!resampledData.size()) continue;
std::copy(&resampledData.data()[0], &resampledData.data()[resampledData.size()], std::back_inserter(decodedData));
}
}
}
UPD 30.05.2020
I decided to refuse to use FFmpeg in my project and use libopus 1.3.1 instead, so I made a wrapper around it and it works fine.

How to get a well formed opus file with libopus?

I have .wav files and I would to encode them using opus, write everything in an .opus file then read it with vlc for example. I have done some code using the opus trivial example but the quality is very poor. In fact, there is a problem, I never write any header, is that normal ? What do I have forgotten ?
#define FRAME_SIZE 960
#define SAMPLE_RATE 48000 //frequence
#define CHANNELS 1 // up to 255
#define APPLICATION OPUS_APPLICATION_AUDIO
#define MAX_FRAME_SIZE 6*960
#define MAX_PACKET_SIZE (3*1276)
#define BUFFER_LEN 1024
int main(int argc, char **argv)
{
char *inFile;
FILE *fin;
char *outFile;
FILE *fout;
opus_int16 in[FRAME_SIZE*CHANNELS];
unsigned char cbits[MAX_PACKET_SIZE];
OpusEncoder *encoder;
int err;
/*Create a new encoder state */
encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &err);
if (err<0)
{
fprintf(stderr, "failed to create an encoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
opus_encoder_ctl(encoder, OPUS_SET_BITRATE(512000)); //500 to 512 000 // 350 000 pas trop mal
if (err<0)
{
fprintf(stderr, "failed to set bitrate: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
inFile = "toencode.wav";
fin = fopen(inFile, "r");
if (fin==NULL)
{
fprintf(stderr, "in: failed to open file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
if (err<0)
{
fprintf(stderr, "failed to create decoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
outFile = "encoded.opus";
fout = fopen(outFile, "w");
if (fout==NULL)
{
fprintf(stderr, "failed to open file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
while (1)
{
int i;
unsigned char pcm_bytes[MAX_FRAME_SIZE*CHANNELS*2];
/* Read a 16 bits/sample audio frame. */
fread(pcm_bytes, sizeof(short)*CHANNELS, FRAME_SIZE, fin);
if (feof(fin))
break;
/* Convert from little-endian ordering. */
for (i=0;i<CHANNELS*FRAME_SIZE;i++)
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
/* Encode the frame. */
if (opus_encode(encoder, in, FRAME_SIZE, cbits, MAX_PACKET_SIZE)<0)
{
// fprintf(stderr, "encode failed: %s\n", opus_strerror(nbBytes));
return EXIT_FAILURE;
}
fwrite(in,sizeof(short),sizeof(in),fout);
}
/*Destroy the encoder state*/
opus_encoder_destroy(encoder);
fclose(fin);
fclose(fout);
return EXIT_SUCCESS;
}
I think there is a real problem of how I write the file but I dont't where it comes from, can you please help me ?
To make a playable .opus file you need to construct headers and encapsulate them in a sequence of Ogg pages before writing the stream out. See https://git.xiph.org/?p=opus-tools.git;a=blob;f=src/opusenc.c for an open source implementation.
Note you'll get better results if you use 960 samples for MAX_FRAME_SIZE. Bumping the bitrate to the maximum won't make much of a audible difference either.

Link / use external library in QtCreator

With mingw's Msys tool I've successfully build the opus-codec from source 1.1.tar.gz. The build produced some files, amongst which are libopus.a and libopus-0.dll. Now I want to try the trivial-example.c in QtCreator. I added the lib to my .pro-file and included opus.h in my main file. The compiler complains that it can't find the headers that are included in opus.h Shouldn't these be included within the lib? How do I need to setup my application to run the "trivial-example"?
My folder structure is:
main.cpp
opus_lib_test.pro
opus_lib_test.pro.user
include [folder]
opus.h (from the source include folder)
libs [folder]
libopus.a
libopus-0.dll
My .pro-file looks like
QT += core
QT -= gui
TARGET = opus_lib_test
CONFIG += console
CONFIG -= app_bundle
TEMPLATE = app
INCLUDEPATH += $$PWD/include
LIBS += -L"C:/Qt/Qt5.2.1/Tools/QtCreator/bin/opus_lib_test/libs/" -llibopus
SOURCES += main.cpp
HEADERS += include/opus.h
and my main.cpp is here:
//#include <QCoreApplication>
#include "opus.h"
int main(int argc, char *argv[])
{
// QCoreApplication a(argc, argv);
// return a.exec();
// ----------------------------- trivial_example.c
char *inFile;
FILE *fin;
char *outFile;
FILE *fout;
opus_int16 in[FRAME_SIZE*CHANNELS];
opus_int16 out[MAX_FRAME_SIZE*CHANNELS];
unsigned char cbits[MAX_PACKET_SIZE];
int nbBytes;
/*Holds the state of the encoder and decoder */
OpusEncoder *encoder;
OpusDecoder *decoder;
int err;
if (argc != 3)
{
fprintf(stderr, "usage: trivial_example input.pcm output.pcm\n");
fprintf(stderr, "input and output are 16-bit little-endian raw files\n");
return EXIT_FAILURE;
}
/*Create a new encoder state */
encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &err);
if (err<0)
{
fprintf(stderr, "failed to create an encoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
/* Set the desired bit-rate. You can also set other parameters if needed.
The Opus library is designed to have good defaults, so only set
parameters you know you need. Doing otherwise is likely to result
in worse quality, but better. */
err = opus_encoder_ctl(encoder, OPUS_SET_BITRATE(BITRATE));
if (err<0)
{
fprintf(stderr, "failed to set bitrate: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
inFile = argv[1];
fin = fopen(inFile, "r");
if (fin==NULL)
{
fprintf(stderr, "failed to open file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
/* Create a new decoder state. */
decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
if (err<0)
{
fprintf(stderr, "failed to create decoder: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
outFile = argv[2];
fout = fopen(outFile, "w");
if (fout==NULL)
{
fprintf(stderr, "failed to open file: %s\n", strerror(errno));
return EXIT_FAILURE;
}
while (1)
{
int i;
unsigned char pcm_bytes[MAX_FRAME_SIZE*CHANNELS*2];
int frame_size;
/* Read a 16 bits/sample audio frame. */
fread(pcm_bytes, sizeof(short)*CHANNELS, FRAME_SIZE, fin);
if (feof(fin))
break;
/* Convert from little-endian ordering. */
for (i=0;i<CHANNELS*FRAME_SIZE;i++)
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
/* Encode the frame. */
nbBytes = opus_encode(encoder, in, FRAME_SIZE, cbits, MAX_PACKET_SIZE);
if (nbBytes<0)
{
fprintf(stderr, "encode failed: %s\n", opus_strerror(nbBytes));
return EXIT_FAILURE;
}
/* Decode the data. In this example, frame_size will be constant because
the encoder is using a constant frame size. However, that may not
be the case for all encoders, so the decoder must always check
the frame size returned. */
frame_size = opus_decode(decoder, cbits, nbBytes, out, MAX_FRAME_SIZE, 0);
if (frame_size<0)
{
fprintf(stderr, "decoder failed: %s\n", opus_strerror(err));
return EXIT_FAILURE;
}
/* Convert to little-endian ordering. */
for(i=0;i<CHANNELS*frame_size;i++)
{
pcm_bytes[2*i]=out[i]&0xFF;
pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
}
/* Write the decoded audio to file. */
fwrite(pcm_bytes, sizeof(short), frame_size*CHANNELS, fout);
}
/*Destroy the encoder state*/
opus_encoder_destroy(encoder);
opus_decoder_destroy(decoder);
fclose(fin);
fclose(fout);
return EXIT_SUCCESS;
}
The header files referenced in opus.h:
#include "opus_types.h"
#include "opus_defines.h"
They are both from source include folder, the same as opus.h. I think your problem will be solved if you copy all .h files (5 including opus.h) from source include folder into include [folder] in your folder structure.
Header files are not included in lib, only cpp files are included. You need to specify header files separately.

Signed 16-bit ALSA PCM data to U8 Conversion on Linux

I'm attempting to convert 16-bit ALSA PCM Samples to Unsigned 8-bit PCM samples for wireless transmission on Linux. The receiving machine is playing the transmitted data successfully and the recorded voice is there and recognizable, but the quality is terrible and noisy. I've tried ALSA mixer on both ends to tune the stream but it doesn't seem to get much better with that. I believe there is something wrong with my conversion of the samples to 8-bit PCM but its just a simple shift so I'm not sure what could be the error. Does anyone have any suggestions or see anything wrong with my conversion code? Thanks.
Conversion Code:
// This byte array needs to be the packet size we wish to send
QByteArray prepareToSend;
prepareToSend.clear();
// Keep reading from ALSA until we fill one full frame
int frames = 1;
while ( prepareToSend.size() < TARGET_TX_BUFFER_SIZE ) {
// Create a ByteArray
QByteArray readBytes;
readBytes.resize(size);
// Read with ALSA
short sample[1]; // Data is signed 16-bit
int rc = snd_pcm_readi(m_PlaybackHandle, sample, frames);
if (rc == -EPIPE) {
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred\n");
snd_pcm_prepare(m_PlaybackHandle);
} else if (rc < 0) {
fprintf(stderr,
"Error from read: %s\n",
snd_strerror(rc));
} else if (rc != (int)frames) {
fprintf(stderr, "Short read, read %d frames\n", rc);
}
else {
// Copy bytes to the prepare to send buffer
//qDebug() << "Bytes for sample buffer: " << sizeof(sample);
prepareToSend.append((qint16)(sample[0]) >> 8); // signed 16-bit becomes u8
}
}
ALSA Configuration:
// Setup parameters
int size;
snd_pcm_t *m_PlaybackHandle;
snd_pcm_hw_params_t *m_HwParams;
char *buffer;
qDebug() << "Desire to Transmit Data - Setting up ALSA Now....";
// Error handling
int err;
// Device to Write to
const char *snd_device_in = "hw:1,0";
if ((err = snd_pcm_open (&m_PlaybackHandle, snd_device_in, SND_PCM_STREAM_CAPTURE, 0)) < 0) {
fprintf (stderr, "Cannot open audio device %s (%s)\n",
snd_device_in,
snd_strerror (err));
exit (1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&m_HwParams);
if ((err = snd_pcm_hw_params_malloc (&m_HwParams)) < 0) {
fprintf (stderr, "Cannot allocate hardware parameter structure (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_any (m_PlaybackHandle, m_HwParams)) < 0) {
fprintf (stderr, "Cannot initialize hardware parameter structure (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_access (m_PlaybackHandle, m_HwParams, SND_PCM_ACCESS_RW_INTERLEAVED)) < 0) {
fprintf (stderr, "Cannot set access type (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_format(m_PlaybackHandle, m_HwParams, SND_PCM_FORMAT_S16)) < 0) { // Has to be 16 bit
fprintf (stderr, "Cannot set sample format (%s)\n",
snd_strerror (err));
exit (1);
}
uint sample_rate = 8000;
if ((err = snd_pcm_hw_params_set_rate (m_PlaybackHandle, m_HwParams, sample_rate, 0)) < 0) { // 8 KHz
fprintf (stderr, "Cannot set sample rate (%s)\n",
snd_strerror (err));
exit (1);
}
if ((err = snd_pcm_hw_params_set_channels (m_PlaybackHandle, m_HwParams, 1)) < 0) { // 1 Channel Mono
fprintf (stderr, "Cannot set channel count (%s)\n",
snd_strerror (err));
exit (1);
}
/*
Frames: samples x channels (i.e: stereo frames are composed of two samples, mono frames are composed of 1 sample,...)
Period: Number of samples tranferred after which the device acknowledges the transfer to the apllication (usually via an interrupt).
*/
/* Submit params to device */
if ((err = snd_pcm_hw_params(m_PlaybackHandle, m_HwParams)) < 0) {
fprintf (stderr, "Cannot set parameters (%s)\n",
snd_strerror (err));
exit (1);
}
/* Free the Struct */
snd_pcm_hw_params_free(m_HwParams);
// Flush handle prepare for record
snd_pcm_drop(m_PlaybackHandle);
if ((err = snd_pcm_prepare (m_PlaybackHandle)) < 0) {
fprintf (stderr, "cannot prepare audio interface for use (%s)\n",
snd_strerror (err));
exit (1);
}
qDebug() << "Done Setting up ALSA....";
// Prepare the device
if ((err = snd_pcm_prepare (m_PlaybackHandle)) < 0) {
fprintf (stderr, "cannot prepare audio interface for use (%s)\n",
snd_strerror (err));
exit (1);
}
(qint16)(sample[0]) >> 8 will convert signed linear 16-bit PCM to signed linear 8-bit PCM. If you want unsigned linear 8-bit then it would be ((quint16)sample[0] ^ 0x8000) >> 8.
Although 16-bit PCM is almost always on a linear scale, 8-bit PCM is more commonly on a log scale (either µ-law or A-law), and a lookup table is usually used for conversion. If you really do want linear 8-bit then you may want to first adjust the gain so that the peak is at 0 dBFS and use audio compression to reduce the dynamic range so that it will fit in 8 bits.
If you use plughw:1,0 instead of hw:1,0, you can just tell the device that you want SND_PCM_FORMAT_U8, and the samples will be converted automatically.
(This works also for µ-Law and A-Law.)

Convert raw PCM to FLAC?

EDIT: I've updated the code below to resemble the progress I have made. I'm trying to write the .wav header myself. The code does not work properly as of now, the audio is not being written to the file properly. The code does not contain any attempts to convert it to a .flac file yet.
I am using a Raspberry Pi (Debian Linux) to record audio with the ALSA library. The recording works fine, but I need to encode the input audio into the FLAC codec.
This is where I get lost. I have spent a considerable amount of time trying to figure out how to convert this raw data into FLAC, but I keep coming up with examples of how to convert .wav files into .flac files.
Here is the current (updated) code I have for recording audio with ALSA (it may be a bit rough, I'm still picking up C++):
// Use the newer ALSA API
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Riff
{
char chunkId[4]; // "RIFF" (assuming char is 8 bits)
int chunkSize; // (assuming int is 32 bits)
char format[4]; // "WAVE"
};
struct Format
{
char chunkId[4]; // "fmt "
int chunkSize;
short format; // assuming short is 16 bits
short numChannels;
int sampleRate;
int byteRate;
short align;
short bitsPerSample;
};
struct Data
{
char chunkId[4]; // "data"
int chunkSize; // length of data
char* data;
};
struct Wave // Actual structure of a PCM WAVE file
{
Riff riffHeader;
Format formatHeader;
Data dataHeader;
};
int main(int argc, char *argv[])
{
void saveWaveFile(struct Wave *waveFile);
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int sampleRate = 44100;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
char *device = (char*) "plughw:1,0";
//char *device = (char*) "default";
printf("Capture device is %s\n", device);
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, device, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0)
{
fprintf(stderr, "Unable to open PCM device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 2);
/* 44100 bits/second sampling rate (CD quality) */
snd_pcm_hw_params_set_rate_near(handle, params, &sampleRate, &dir);
/* Set period size to 32 frames. */
frames = 32;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "Unable to set HW parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
size = frames * 4; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params, &sampleRate, &dir);
loops = 5000000 / sampleRate;
while (loops > 0)
{
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE)
{
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred.\n");
snd_pcm_prepare(handle);
} else if (rc < 0)
{
fprintf(stderr, "Error from read: %s\n", snd_strerror(rc));
} else if (rc != (int)frames)
{
fprintf(stderr, "Short read, read %d frames.\n", rc);
}
if (rc != size) fprintf(stderr, "Short write: wrote %d bytes.\n", rc);
}
Wave wave;
strcpy(wave.riffHeader.chunkId, "RIFF");
wave.riffHeader.chunkSize = 36 + size;
strcpy(wave.riffHeader.format, "WAVE");
strcpy(wave.formatHeader.chunkId, "fmt");
wave.formatHeader.chunkSize = 16;
wave.formatHeader.format = 1; // PCM, other value indicates compression
wave.formatHeader.numChannels = 2; // Stereo
wave.formatHeader.sampleRate = sampleRate;
wave.formatHeader.byteRate = sampleRate * 2 * 2;
wave.formatHeader.align = 2 * 2;
wave.formatHeader.bitsPerSample = 16;
strcpy(wave.dataHeader.chunkId, "data");
wave.dataHeader.chunkSize = size;
wave.dataHeader.data = buffer;
saveWaveFile(&wave);
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
void saveWaveFile(struct Wave *waveFile)
{
FILE *file = fopen("test.wav", "wb");
size_t written;
if (file == NULL)
{
fprintf(stderr, "Cannot open file for writing.\n");
exit(1);
}
written = fwrite(waveFile, sizeof waveFile[0], 1, file);
fclose(file);
if (written < 1);
{
fprintf(stderr, "Writing to file failed, error %d.\n", written);
exit(1);
}
}
How would I go about converting the PCM data into the FLAC and save it to disk for later use? I have downloaded libflac-dev already and just need an example to go off of.
The way I am doing it right now:
./capture > test.raw // or ./capture > test.flac
The way it should be (program does everything for me):
./capture
If I understand the FLAC::Encoder::File documentation, you can do something like
#include <FLAC++/encoder.h>
FLAC::Encoder::File encoder;
encoder.init("outfile.flac");
encoder.process(buffer, samples);
encoder.finish();
where buffer is an array (of size samples) of 32-bit integer pointers.
Unfortunately, I know next to nothing about audio encoding so I can't speak for any other options. Good luck!
Please refer to the below code :
FLAC Encoder Test Code
This example is using a wav file as an input and then encodes it into FLAC.
As I understand, there is no major difference b/w WAV file and your RAW data, I think you can modify this code to directly read the "buffer" and convert it. You already have all the related information (Channel/Bitrate etc) so it should not be much of a problem to remove the WAV header reading code.
Please note: this is a modified version of the Flac Encoder sample from their git repo.
It includes some comments and hints on how to change it to OP's requirements, entire source for this will be a little bit long.
And do note that this is the C API, which tends to be a bit more complex than the C++ one. But it is fairly easy to convert between the two once you get the idea.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "share/compat.h"
#include "FLAC/metadata.h"
#include "FLAC/stream_encoder.h"
/* this call back is what tells your program the progress that the encoder has made */
static void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data);
#define READSIZE 1024
static unsigned total_samples = 0; /* can use a 32-bit number due to WAVE size limitations */
/* buffer is where we record to, in your case what ALSA writes to */
/* Note the calculation here to take the total bytes that the buffer takes */
static FLAC__byte buffer[READSIZE/*samples*/ * 2/*bytes_per_sample*/ * 2/*channels*/];
/* pcm is input to FLAC encoder */
/* the PCM data should be here, bps is 4 here...but we are allocating ints! */
static FLAC__int32 pcm[READSIZE/*samples*/ * 2/*channels*/];
int main(int argc, char *argv[])
{
FLAC__bool ok = true;
FLAC__StreamEncoder *encoder = 0;
FLAC__StreamEncoderInitStatus init_status;
FLAC__StreamMetadata *metadata[2];
FLAC__StreamMetadata_VorbisComment_Entry entry;
FILE *fin;
unsigned sample_rate = 0;
unsigned channels = 0;
unsigned bps = 0;
if((fin = fopen(argv[1], "rb")) == NULL) {
fprintf(stderr, "ERROR: opening %s for output\n", argv[1]);
return 1;
}
/* set sample rate, bps, total samples to encode here, these are dummy values */
sample_rate = 44100;
channels = 2;
bps = 16;
total_samples = 5000;
/* allocate the encoder */
if((encoder = FLAC__stream_encoder_new()) == NULL) {
fprintf(stderr, "ERROR: allocating encoder\n");
fclose(fin);
return 1;
}
ok &= FLAC__stream_encoder_set_verify(encoder, true);
ok &= FLAC__stream_encoder_set_compression_level(encoder, 5);
ok &= FLAC__stream_encoder_set_channels(encoder, channels);
ok &= FLAC__stream_encoder_set_bits_per_sample(encoder, bps);
ok &= FLAC__stream_encoder_set_sample_rate(encoder, sample_rate);
ok &= FLAC__stream_encoder_set_total_samples_estimate(encoder, total_samples);
/* sample adds meta data here I've removed it for clarity */
/* initialize encoder */
if(ok) {
/* client data is whats the progress_callback is called with, any objects you need to update on callback can be passed thru this pointer */
init_status = FLAC__stream_encoder_init_file(encoder, argv[2], progress_callback, /*client_data=*/NULL);
if(init_status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[init_status]);
ok = false;
}
}
/* read blocks of samples from WAVE file and feed to encoder */
if(ok) {
size_t left = (size_t)total_samples;
while(ok && left) {
/* record using ALSA and set SAMPLES_IN_BUFFER */
/* convert the packed little-endian 16-bit PCM samples from WAVE into an interleaved FLAC__int32 buffer for libFLAC */
/* why? because bps=2 means that we are dealing with short int(16 bit) samples these are usually signed if you do not explicitly say that they are unsigned */
size_t i;
for(i = 0; i < SAMPLES_IN_BUFFER*channels; i++) {
/* THIS. this isn't the only way to convert between formats, I do not condone this because at first the glance the code seems like it's processing two channels here, but it's not it's just copying 16bit data to an int array, I prefer to use proper type casting, none the less this works so... */
pcm[i] = (FLAC__int32)(((FLAC__int16)(FLAC__int8)buffer[2*i+1] << 8) | (FLAC__int16)buffer[2*i]);
}
/* feed samples to encoder */
ok = FLAC__stream_encoder_process_interleaved(encoder, pcm, SAMPLES_IN_BUFFER);
left-=SAMPLES_IN_BUFFER;
}
}
ok &= FLAC__stream_encoder_finish(encoder);
fprintf(stderr, "encoding: %s\n", ok? "succeeded" : "FAILED");
fprintf(stderr, " state: %s\n", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(encoder)]);
FLAC__stream_encoder_delete(encoder);
fclose(fin);
return 0;
}
/* the updates from FLAC's encoder system comes here */
void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data)
{
(void)encoder, (void)client_data;
fprintf(stderr, "wrote %" PRIu64 " bytes, %" PRIu64 "/%u samples, %u/%u frames\n", bytes_written, samples_written, total_samples, frames_written, total_frames_estimate);
}