nvJPEG: encode packed BGR - c++

Well, my goal is simple -- trying to create a JPEG encoded image from buffer with packed/interleaved BGR data (could be RGB as well).
The NVidia docs contain an example, the proper image input is essentially described here.
So I tried the following:
#include <nvjpeg.h>
// very simple
typedef struct {
int width;
int height;
unsigned char *buffer;
unsigned long data_size;
} my_bitmap_type;
std::vector<unsigned char> BitmapToJpegCUDA(const my_bitmap_type *image)
{
nvjpegHandle_t nv_handle;
nvjpegEncoderState_t nv_enc_state;
nvjpegEncoderParams_t nv_enc_params;
cudaStream_t stream = NULL;
nvjpegStatus_t er;
nvjpegCreateSimple(&nv_handle);
nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
nvjpegImage_t nv_image;
nv_image.channel[0] = image->buffer;
nv_image.pitch[0] = 3 * image->width;
// Nope, that's for planar images!
// nv_image.channel[0] = image->buffer;
// nv_image.channel[1] = image->buffer + image->width * image->height;
// nv_image.channel[2] = image->buffer + 2 * image->width * image->height;
// nv_image.pitch[0] = image->width;
// nv_image.pitch[1] = image->width;
// nv_image.pitch[2] = image->width;
er = nvjpegEncodeImage(nv_handle, nv_enc_state, nv_enc_params, &nv_image,
NVJPEG_INPUT_BGRI, image->width, image->height, stream);
LOG(ERROR) << "enc " << er;
size_t length = 0;
nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state, NULL, &length, stream);
cudaStreamSynchronize(stream);
std::vector<unsigned char> jpeg(length);
nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state, jpeg.data(), &length, 0);
nvjpegEncoderParamsDestroy(nv_enc_params);
nvjpegEncoderStateDestroy(nv_enc_state);
nvjpegDestroy(nv_handle);
return jpeg;
}
The logger says that nvjpegEncodeImage just returns NVJPEG_STATUS_INVALID_PARAMETER, meaning nothing works. In case you suspect my_bitmap_type to be filled wrong, here's the similar turbojpeg-powered encoding:
#include <turbojpeg.h>
std::vector<unsigned char> BitmapToJpegBuffer(const my_bitmap_type *image)
{
std::vector<unsigned char> out_data(3 * image->width * image->height);
cudaError_t err = cudaMemcpy(out_data.data(), image->buffer, image->data_size, cudaMemcpyDeviceToHost);
if (cudaSuccess != err) {
LOG(ERROR) << "failed to copy CUDA memory: " << err;
}
tjhandle jpeg = tjInitCompress();
unsigned char *encoded_buf = nullptr;
long unsigned int encoded_sz = 0;
int tjres = tjCompress2(jpeg,
out_data.data(),
image->width,
image->width * 3,
image->height,
TJPF_BGR,
&encoded_buf,
&encoded_sz,
TJSAMP_444,
95,
TJFLAG_FASTDCT);
if (tjres != 0) {
LOG(ERROR) << "jpeg compession failed!";
return {};
}
std::vector<unsigned char> result(encoded_buf, encoded_buf + encoded_sz);
tjFree(encoded_buf);
tjDestroy(jpeg);
return result;
}
... aaand it works pretty fine.
I'm desperate trying to figure out, what's missing in the code. Would gratefully appreciate any help or advice.
UPD:
Using CentOS 7 / libnvjpeg-11-1.x86_64 (CUDA 11.1) / gcc 4.8.5

Okaaay, that's strange a bit, but after some time spent on trial and error it occured that NVidia docs lack an essential detail:
nvjpegCreateSimple(&nv_handle);
nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
// This has to be done, default params are not sufficient
nvjpegEncoderParamsSetSamplingFactors(nv_enc_params, NVJPEG_CSS_444, stream);
Although the docs clearly state that the default subsampling for JPEG compression is 4:4:4, encoding's not working with the default encoder params, subsampling has to be explicitly set.
So, that one line of code fixes everything.

Related

Opus, can't encode / decode all data

I'm working using Opus to encode and decode some Portaudio recorded data, so i'm trying to encode a record of around 10 seconds, i've wrote the registered data to a log file and i get around 480000 lines which represent the NUM_SECONDS * SAMPLE_rate, so the time * the frequency.
Problem is when i try to use opus with it, i can encode and decode, but the size of the data returned is not the same at all, it is the FRAMES_PER_BUFFER size, and the data itself does not correspond to anything from the previous data. Here is my code :
my encode and decode methods :
typedef struct {
int size;
std::vector<unsigned char> *sound;
} AudioEnc;
typedef struct {
int size;
std::vector<float> sound;
} AudioData;
#define SAMPLE_RATE (48000)
#define FRAMES_PER_BUFFER (1920)
#define NUM_SECONDS (10)
#define NUM_CHANNELS (1)
AudioEnc &Opus::Encode(AudioData &data)
{
AudioEnc enc;
enc.sound = new std::vector<unsigned char>;
const unsigned int max_size = NUM_SECONDS * SAMPLE_RATE * sizeof(float);
float sound[max_size];
enc.sound->resize(NUM_SECONDS * SAMPLE_RATE);
if (data.size == 0)
enc.sound->resize(opus_encode_float(_encode, sound, FRAMES_PER_BUFFER, enc.sound->data(), max_size));
else
enc.sound->resize(opus_encode_float(_encode, data.sound.data(), FRAMES_PER_BUFFER, enc.sound->data(), max_size));
return (enc);
}
AudioData Opus::Decode(AudioEnc &data)
{
AudioData dec;
dec.sound.resize(NUM_SECONDS * SAMPLE_RATE);
dec.sound.resize(opus_decode_float(_decode, data.sound->data(), data.sound->size(), dec.sound.data(), FRAMES_PER_BUFFER, 0));
return (dec);
}
Record data is to big for pastebin, do you guys have any idea on what i'm missing ?

FFmpeg - resampled audio with much noise

I'm not familiar with auido resampling. I tried to resample auido streams from two videos. The first one's output was close to the original but with noise, the other one was almost full of noise.
Information for the first one
128 kb/s, 48.0kHz, 2 channels, AACLC
Information for the second one
384 kb/s, 48.0 kHz, 6channels, AACLC
I found that, when I set the sample size 16, the frist one worked quit good but still with noise. The other one worked too bad but still had sound. What and how to determine the output sample size? Although I used channels * av_get_bytes_per_sample((AVSampleFormat)output_fmt) as the output sample size because I wanted it to be the same as the original, it had no sound at all.
MyResampling.cpp
bool MyResample::open(AVCodecParameters* par) {
if (!par) {
std::cout << "par is null" << std::endl;
return false;
}
audio_context = swr_alloc_set_opts(
audio_context, av_get_default_channel_layout(2), (AVSampleFormat)output_fmt,
par->sample_rate, av_get_default_channel_layout(par->channels), (AVSampleFormat)par->format, par->sample_rate,
0, 0);
avcodec_parameters_free(&par);
int ret = swr_init(audio_context);
if (ret != 0) {
std::cout << "failed to open audio codec" << std::endl;
}
return true;
}
int MyResample::resample(AVFrame* frame, unsigned char* output)
{
if (!frame)
return 0;
if (!output)
av_frame_free(&frame);
uint8_t* data[2] = { 0 };
data[0] = output;
int ret = swr_convert(audio_context, data, frame->nb_samples, (const uint8_t**)frame->data, frame->nb_samples);
//int size = ret * frame->channels * av_get_bytes_per_sample((AVSampleFormat)output_fmt);
int size = av_samples_get_buffer_size(nullptr, frame->channels, frame->nb_samples, (AVSampleFormat)output_fmt, 1);
if (ret < 0)
return ret;
return size;
}
MyAudioPlayer.cpp
bool open()
{
close();
QAudioFormat fmt;
fmt.setSampleRate(sample_rate); // from audioStream->codecpar->sample_rate
fmt.setSampleSize(16); //
fmt.setChannelCount(channels); // from audioStream->codecpar->channels
fmt.setCodec("audio/pcm");
fmt.setByteOrder(QAudioFormat::LittleEndian);
fmt.setSampleType(QAudioFormat::UnSignedInt);
output = new QAudioOutput(fmt);
io = output->start();
if (io)
return true;
return false;
}
bool write(const unsigned char* data, int data_size)
{
if (!data || data_size <= 0)
return false;
if (!output || !io)
{
return false;
}
int size = io->write((char*)data, data_size);
if (data_size != size)
return false;
return true;
}
main.cpp
MyAudioPlayer::open();
unsigned char* pcm = new unsigned char[1024 * 1024];
if (demux.get_media_type() == 1) { // audio
audio_decode.sendPacket(pkt);
AVFrame* frame = audio_decode.receiveFrame();
int len = resample.resample(frame, pcm);
while (len > 0) {
if (MyAudioPlayer::check_space() >= len) {
MyAudioPlayer::write(pcm, len);
break;
}
msleep(1);
}
}
If you have troubles with the final quality and noise probably you are misunderstanding the proper way to perform a resampling or there is a bug in your configuration.
Take a look into this example: libswresample-example.
I am not familiar with the FFmpeg API because to do resampling I tend to use libsamplerate.
Regarding old example, those are the steps to perform a basic resample with FFMPEG:
Start by configuring your resampling context:
//Set up resampling context
SwrContext *swr = swr_alloc();
av_opt_set_channel_layout(swr, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(swr, "in_sample_rate", 44100, 0);
av_opt_set_int(swr, "out_sample_rate", 22050, 0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
swr_init(swr);
Depending on your input data types and the format you expect as an output, you will need to specify the right format. This is the equivalence in C++ standard:
----------------------------------------
| *AV_SAMPLE_FMT_S16* | `std::int16_t` |
| *AV_SAMPLE_FMT_S32* | `std::int32_t` |
| *AV_SAMPLE_FMT_FLT* | `float` |
| *AV_SAMPLE_FMT_DBL | `double` |
| *AV_SAMPLE_FMT_U8P* | `std::uint8_t` |
| ... | |
Get your data from whatever place in the right format and estimate your sampling count.
After that, you can perform the resampling in few steps:
Estimate the number of output samples
uint8_t* out_samples;
int out_num_samples = av_rescale_rnd(swr_get_delay(swr, in_samplerate) + in_num_samples, out_samplerate, in_samplerate, AV_ROUND_UP);
Allocate the memory for the output file
av_samples_alloc(&out_samples, NULL, out_num_channels, out_num_samples, AV_SAMPLE_FMT_FLT, 0);
Convert the input data into the expected output format
out_num_samples = swr_convert(swr, &out_samples, out_num_samples, &in_samples, in_num_samples);
Do not forget to free your memory
av_freep(&out_samples);
swr_free(&swr);
If you have noise, probably the input formats and output formats are not the proper one or the resampling quality is low.
For instance, do not panic if you get fewer samples than what you expected. It is the common behavior because of the way the filtering works. To get the remaining trailing you can perform the step 5 with NULL as input, which will flush the internal data.

RtAudio - Playing samples from wav file

I am currently trying to learn audio programming. My goal is to open a wav file, extract everything and play the samples with RtAudio.
I made a WaveLoader class which let's me extract the samples and meta data. I used this guide to do that and I checked that everything is correct with 010 editor. Here is a snapshot of 010 editor showing the structure and data.
And this is how i store the raw samples inside WaveLoader class:
data = new short[wave_data.payloadSize]; // - Allocates memory size of chunk size
if (!fread(data, 1, wave_data.payloadSize, sound_file))
{
throw ("Could not read wav data");
}
If i print out each sample I get : 1, -3, 4, -5 ... which seems ok.
The problem is that I am not sure how I can play them. This is what I've done:
/*
* Using PortAudio to play samples
*/
bool Player::Play()
{
ShowDevices();
rt.showWarnings(true);
RtAudio::StreamParameters oParameters; //, iParameters;
oParameters.deviceId = rt.getDefaultOutputDevice();
oParameters.firstChannel = 0;
oParameters.nChannels = mAudio.channels;
//iParameters.deviceId = rt.getDefaultInputDevice();
//iParameters.nChannels = 2;
unsigned int sampleRate = mAudio.sampleRate;
// Use a buffer of 512, we need to feed callback with 512 bytes everytime!
unsigned int nBufferFrames = 512;
RtAudio::StreamOptions options;
options.flags = RTAUDIO_SCHEDULE_REALTIME;
options.flags = RTAUDIO_NONINTERLEAVED;
//&parameters, NULL, RTAUDIO_FLOAT64,sampleRate, &bufferFrames, &mCallback, (void *)&rawData
try {
rt.openStream(&oParameters, NULL, RTAUDIO_SINT16, sampleRate, &nBufferFrames, &mCallback, (void*) &mAudio);
rt.startStream();
}
catch (RtAudioError& e) {
std::cout << e.getMessage() << std::endl;
return false;
}
return true;
}
/*
* RtAudio Callback
*
*/
int mCallback(void * outputBuffer, void * inputBuffer, unsigned int nBufferFrames, double streamTime, RtAudioStreamStatus status, void * userData)
{
unsigned int i = 0;
short *out = static_cast<short*>(outputBuffer);
auto *data = static_cast<Player::AUDIO_DATA*>(userData);
// if i is more than our data size, we are done!
if (i > data->dataSize) return 1;
// First time callback is called data->ptr is 0, this means that the offset is 0
// Second time data->ptr is 1, this means offset = nBufferFrames (512) * 1 = 512
unsigned int offset = nBufferFrames * data->ptr++;
printf("Offset: %i\n", offset);
// First time callback is called offset is 0, we are starting from 0 and looping nBufferFrames (512) times, this gives us 512 bytes
// Second time, the offset is 1, we are starting from 512 bytes and looping to 512 + 512 = 1024
for (i = offset; i < offset + nBufferFrames; ++i)
{
short sample = data->rawData[i]; // Get raw sample from our struct
*out++ = sample; // Pass to output buffer for playback
printf("Current sample value: %i\n", sample); // this is showing 1, -3, 4, -5 check 010 editor
}
printf("Current time: %f\n", streamTime);
return 0;
}
Inside callback function, when I print out sample values I get exactly like 010 editor? Why isnt rtaudio playing them. What is wrong here? Do I need to normalize sample values to between -1 and 1?
Edit:
The wav file I am trying to play:
Chunksize: 16
Format: 1
Channel: 1
SampleRate: 48000
ByteRate: 96000
BlockAlign: 2
BitPerSample: 16
Size of raw samples total: 2217044 bytes
For some reason it works when I pass input parameters to the openStream()
RtAudio::StreamParameters oParameters, iParameters;
oParameters.deviceId = rt.getDefaultOutputDevice();
oParameters.firstChannel = 0;
//oParameters.nChannels = mAudio.channels;
oParameters.nChannels = mAudio.channels;
iParameters.deviceId = rt.getDefaultInputDevice();
iParameters.nChannels = 1;
unsigned int sampleRate = mAudio.sampleRate;
// Use a buffer of 512, we need to feed callback with 512 bytes everytime!
unsigned int nBufferFrames = 512;
RtAudio::StreamOptions options;
options.flags = RTAUDIO_SCHEDULE_REALTIME;
options.flags = RTAUDIO_NONINTERLEAVED;
//&parameters, NULL, RTAUDIO_FLOAT64,sampleRate, &bufferFrames, &mCallback, (void *)&rawData
try {
rt.openStream(&oParameters, &iParameters, RTAUDIO_SINT16, sampleRate, &nBufferFrames, &mCallback, (void*) &mAudio);
rt.startStream();
}
catch (RtAudioError& e) {
std::cout << e.getMessage() << std::endl;
return false;
}
return true;
It was so random when I was trying to playback my mic. I left input parameters and my wav file was suddenly playing. Is this is a bug?

Converting a short[] from SoundTouch audio library for playback

I'm attempting to use the SoundTouch C++ library for audio speed and pitch changes in an Android app. I have successfully pushed a Java byte[] array (from a .wav) through JNI, returned it, and played it back with an AudioTrack.
The next step is attempting to push a sample byte[] through the SoundTouch pipeline. I have dissected the source of the SoundStretch console program included with the library and have attempted to adapt it. I am using a stereo, 16-bit source for testing purposes.
With my current temporary setup I am ignoring the RIFF header and converting it along with the .wav data because the Java AudioTrack object does not need to read the header, it just plays raw PCM. Playing the raw byte[] without sending through SoundTouch just results in a small click where the header is.
After sending through the SoundTouch pipeline, I am playing back white noise where the beginning of the audio is supposed to be. I assume I am having a problem at the end of my write() function, where I am casting short's to signed chars. Here, the console app is writing to a file, instead of pushing to a vector:
int res = (int)fwrite(temp, 1, numBytes, fptr);
I have read the documentation for fwrite but I don't know enough about bit twiddling or audio processing to know what to do here to correctly get this information in a char[] instead of writing to a file. I know I am loosing information with the cast, but I am unsure of how to correct it.
In case anyone is extra motivated, the SoundStretch source can be found here: http://www.surina.net/soundtouch/sourcecode.html
extern "C" DLL_PUBLIC jbyteArray
Java_net_surina_soundtouch_SoundTouch_getMutatedBytes
(JNIEnv *env, jobject thiz, jbyteArray input, jint length)
{
const int BUFF_SIZE = 2048000;
SoundTouch soundTouch;
jboolean isCopy;
jbyte* ar = env->GetByteArrayElements(input, &isCopy);
signed char* cBufferIn = (signed char*)ar;
SAMPLETYPE* fBufferIn = new SAMPLETYPE[length];
vector<signed char> fBufferOut;
//converts the chars to floats per the SoundTouch console app.
convertInput16(cBufferIn, fBufferIn, length);
//channels, sampling rate, speed, pitch change
setup(&soundTouch, 2, 44100, 1.0, 0);
//transform floats from fBufferIn to fBufferout
process(&soundTouch, fBufferIn, fBufferOut, BUFF_SIZE);
signed char* res = &fBufferOut[0];
jbyteArray result = env->NewByteArray(length);
env->SetByteArrayRegion(result, 0, fBufferOut.size(), res);
LOGV("fBufferOut Size: %d", fBufferOut.size());
delete[] fBufferIn;
return result;
}
process():
static void process(SoundTouch* soundTouch, SAMPLETYPE* fBufferIn, vector<signed char>& fBufferOut, int BUFF_SIZE)
{
int nSamples = BUFF_SIZE / 2; //2 bytes per sample, using 16 bit sample for testing
int buffSizeSamples = BUFF_SIZE / 2; //2 channel stereo
soundTouch->putSamples(fBufferIn, nSamples);
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2); //2 channels
} while (nSamples != 0);
soundTouch->flush();
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2);
LOGV("NUMBER OF SAMPLES: %d", nSamples);
} while (nSamples != 0);
}
write():
static void write(const float *bufferIn, vector<signed char>& bufferOut, int numElems)
{
int numBytes;
int bytesPerSample;
if (numElems == 0) return;
bytesPerSample = 16 / 8; //16 bit test sample / bits in a byte
numBytes = numElems * bytesPerSample;
short *temp = (short*)getConvBuffer(numBytes);
switch (bytesPerSample)
{
case 2: //16 bit encoding per the SoundStretch console app
{
short *temp2 = (short *)temp;
for (int i = 0; i < numElems; i++)
{
short value = (short)saturate(bufferIn[i] * 32768.0f, -32768.0f, 32767.0f); //magic to me
temp2[i] = value; //works for little endian only.
}
break;
}
default:
assert(false);
}
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back((signed char)temp[i]); //I think my problem is here.
}
delete[] temp;
//bytesWritten += numBytes;
}
I just needed to get all the bits in char[]:
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back(temp[i] & 0xff);
bufferOut.push_back((temp[i] >> 8) & 0xff);
}

Convert raw PCM to FLAC?

EDIT: I've updated the code below to resemble the progress I have made. I'm trying to write the .wav header myself. The code does not work properly as of now, the audio is not being written to the file properly. The code does not contain any attempts to convert it to a .flac file yet.
I am using a Raspberry Pi (Debian Linux) to record audio with the ALSA library. The recording works fine, but I need to encode the input audio into the FLAC codec.
This is where I get lost. I have spent a considerable amount of time trying to figure out how to convert this raw data into FLAC, but I keep coming up with examples of how to convert .wav files into .flac files.
Here is the current (updated) code I have for recording audio with ALSA (it may be a bit rough, I'm still picking up C++):
// Use the newer ALSA API
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Riff
{
char chunkId[4]; // "RIFF" (assuming char is 8 bits)
int chunkSize; // (assuming int is 32 bits)
char format[4]; // "WAVE"
};
struct Format
{
char chunkId[4]; // "fmt "
int chunkSize;
short format; // assuming short is 16 bits
short numChannels;
int sampleRate;
int byteRate;
short align;
short bitsPerSample;
};
struct Data
{
char chunkId[4]; // "data"
int chunkSize; // length of data
char* data;
};
struct Wave // Actual structure of a PCM WAVE file
{
Riff riffHeader;
Format formatHeader;
Data dataHeader;
};
int main(int argc, char *argv[])
{
void saveWaveFile(struct Wave *waveFile);
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int sampleRate = 44100;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
char *device = (char*) "plughw:1,0";
//char *device = (char*) "default";
printf("Capture device is %s\n", device);
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, device, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0)
{
fprintf(stderr, "Unable to open PCM device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 2);
/* 44100 bits/second sampling rate (CD quality) */
snd_pcm_hw_params_set_rate_near(handle, params, &sampleRate, &dir);
/* Set period size to 32 frames. */
frames = 32;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "Unable to set HW parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
size = frames * 4; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params, &sampleRate, &dir);
loops = 5000000 / sampleRate;
while (loops > 0)
{
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE)
{
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred.\n");
snd_pcm_prepare(handle);
} else if (rc < 0)
{
fprintf(stderr, "Error from read: %s\n", snd_strerror(rc));
} else if (rc != (int)frames)
{
fprintf(stderr, "Short read, read %d frames.\n", rc);
}
if (rc != size) fprintf(stderr, "Short write: wrote %d bytes.\n", rc);
}
Wave wave;
strcpy(wave.riffHeader.chunkId, "RIFF");
wave.riffHeader.chunkSize = 36 + size;
strcpy(wave.riffHeader.format, "WAVE");
strcpy(wave.formatHeader.chunkId, "fmt");
wave.formatHeader.chunkSize = 16;
wave.formatHeader.format = 1; // PCM, other value indicates compression
wave.formatHeader.numChannels = 2; // Stereo
wave.formatHeader.sampleRate = sampleRate;
wave.formatHeader.byteRate = sampleRate * 2 * 2;
wave.formatHeader.align = 2 * 2;
wave.formatHeader.bitsPerSample = 16;
strcpy(wave.dataHeader.chunkId, "data");
wave.dataHeader.chunkSize = size;
wave.dataHeader.data = buffer;
saveWaveFile(&wave);
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
void saveWaveFile(struct Wave *waveFile)
{
FILE *file = fopen("test.wav", "wb");
size_t written;
if (file == NULL)
{
fprintf(stderr, "Cannot open file for writing.\n");
exit(1);
}
written = fwrite(waveFile, sizeof waveFile[0], 1, file);
fclose(file);
if (written < 1);
{
fprintf(stderr, "Writing to file failed, error %d.\n", written);
exit(1);
}
}
How would I go about converting the PCM data into the FLAC and save it to disk for later use? I have downloaded libflac-dev already and just need an example to go off of.
The way I am doing it right now:
./capture > test.raw // or ./capture > test.flac
The way it should be (program does everything for me):
./capture
If I understand the FLAC::Encoder::File documentation, you can do something like
#include <FLAC++/encoder.h>
FLAC::Encoder::File encoder;
encoder.init("outfile.flac");
encoder.process(buffer, samples);
encoder.finish();
where buffer is an array (of size samples) of 32-bit integer pointers.
Unfortunately, I know next to nothing about audio encoding so I can't speak for any other options. Good luck!
Please refer to the below code :
FLAC Encoder Test Code
This example is using a wav file as an input and then encodes it into FLAC.
As I understand, there is no major difference b/w WAV file and your RAW data, I think you can modify this code to directly read the "buffer" and convert it. You already have all the related information (Channel/Bitrate etc) so it should not be much of a problem to remove the WAV header reading code.
Please note: this is a modified version of the Flac Encoder sample from their git repo.
It includes some comments and hints on how to change it to OP's requirements, entire source for this will be a little bit long.
And do note that this is the C API, which tends to be a bit more complex than the C++ one. But it is fairly easy to convert between the two once you get the idea.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "share/compat.h"
#include "FLAC/metadata.h"
#include "FLAC/stream_encoder.h"
/* this call back is what tells your program the progress that the encoder has made */
static void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data);
#define READSIZE 1024
static unsigned total_samples = 0; /* can use a 32-bit number due to WAVE size limitations */
/* buffer is where we record to, in your case what ALSA writes to */
/* Note the calculation here to take the total bytes that the buffer takes */
static FLAC__byte buffer[READSIZE/*samples*/ * 2/*bytes_per_sample*/ * 2/*channels*/];
/* pcm is input to FLAC encoder */
/* the PCM data should be here, bps is 4 here...but we are allocating ints! */
static FLAC__int32 pcm[READSIZE/*samples*/ * 2/*channels*/];
int main(int argc, char *argv[])
{
FLAC__bool ok = true;
FLAC__StreamEncoder *encoder = 0;
FLAC__StreamEncoderInitStatus init_status;
FLAC__StreamMetadata *metadata[2];
FLAC__StreamMetadata_VorbisComment_Entry entry;
FILE *fin;
unsigned sample_rate = 0;
unsigned channels = 0;
unsigned bps = 0;
if((fin = fopen(argv[1], "rb")) == NULL) {
fprintf(stderr, "ERROR: opening %s for output\n", argv[1]);
return 1;
}
/* set sample rate, bps, total samples to encode here, these are dummy values */
sample_rate = 44100;
channels = 2;
bps = 16;
total_samples = 5000;
/* allocate the encoder */
if((encoder = FLAC__stream_encoder_new()) == NULL) {
fprintf(stderr, "ERROR: allocating encoder\n");
fclose(fin);
return 1;
}
ok &= FLAC__stream_encoder_set_verify(encoder, true);
ok &= FLAC__stream_encoder_set_compression_level(encoder, 5);
ok &= FLAC__stream_encoder_set_channels(encoder, channels);
ok &= FLAC__stream_encoder_set_bits_per_sample(encoder, bps);
ok &= FLAC__stream_encoder_set_sample_rate(encoder, sample_rate);
ok &= FLAC__stream_encoder_set_total_samples_estimate(encoder, total_samples);
/* sample adds meta data here I've removed it for clarity */
/* initialize encoder */
if(ok) {
/* client data is whats the progress_callback is called with, any objects you need to update on callback can be passed thru this pointer */
init_status = FLAC__stream_encoder_init_file(encoder, argv[2], progress_callback, /*client_data=*/NULL);
if(init_status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[init_status]);
ok = false;
}
}
/* read blocks of samples from WAVE file and feed to encoder */
if(ok) {
size_t left = (size_t)total_samples;
while(ok && left) {
/* record using ALSA and set SAMPLES_IN_BUFFER */
/* convert the packed little-endian 16-bit PCM samples from WAVE into an interleaved FLAC__int32 buffer for libFLAC */
/* why? because bps=2 means that we are dealing with short int(16 bit) samples these are usually signed if you do not explicitly say that they are unsigned */
size_t i;
for(i = 0; i < SAMPLES_IN_BUFFER*channels; i++) {
/* THIS. this isn't the only way to convert between formats, I do not condone this because at first the glance the code seems like it's processing two channels here, but it's not it's just copying 16bit data to an int array, I prefer to use proper type casting, none the less this works so... */
pcm[i] = (FLAC__int32)(((FLAC__int16)(FLAC__int8)buffer[2*i+1] << 8) | (FLAC__int16)buffer[2*i]);
}
/* feed samples to encoder */
ok = FLAC__stream_encoder_process_interleaved(encoder, pcm, SAMPLES_IN_BUFFER);
left-=SAMPLES_IN_BUFFER;
}
}
ok &= FLAC__stream_encoder_finish(encoder);
fprintf(stderr, "encoding: %s\n", ok? "succeeded" : "FAILED");
fprintf(stderr, " state: %s\n", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(encoder)]);
FLAC__stream_encoder_delete(encoder);
fclose(fin);
return 0;
}
/* the updates from FLAC's encoder system comes here */
void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data)
{
(void)encoder, (void)client_data;
fprintf(stderr, "wrote %" PRIu64 " bytes, %" PRIu64 "/%u samples, %u/%u frames\n", bytes_written, samples_written, total_samples, frames_written, total_frames_estimate);
}