How to encode and decode audio data with opus?

How to encode and decode audio data with opus? - c++

I'm working on a voice chat and I need to compress my audio data. I record and play the audio data via the Qt Framework. If I record and play the audio data without compressing it everything is fine. If I compress,decompress and play the audio data I just hear a cracking sound.
Edit: I had a look at the demo code and I tried to use that code.
I can hear something but it is very laggy. If I increase the size of pcm_bytes to e.g 40000 it sounds better but my voice has still lags and cracking sounds.
This is the line (audioinput.cpp at the bottom):
speaker->write((const char*)pcm_bytes,3840);
codecopus.cpp:
#include "codecopus.h"
CodecOpus::CodecOpus()
{
}
void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
int error;
decoderState = opus_decoder_create(samplingRate,channels,&error);
if(error == OPUS_OK){
std::cout << "Created Opus Decoder struct" << std::endl;
}
}
void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
int error;
encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
if(error == OPUS_OK){
std::cout << "Created Opus Encoder struct" << std::endl;
}
}
opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
return i;
}
int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{
int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);
return i;
}
CodecOpus::~CodecOpus()
{
opus_decoder_destroy(this->decoderState);
opus_encoder_destroy(this->encoderState);
}
audioinput.h:
#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
Q_OBJECT
public:
AudioInput();
~AudioInput();
void startRecording();
void CreateNewAudioThread();
private:
CodecOpus opus;
unsigned char cbits[4000] = {};
opus_int16 in[960*2*sizeof(opus_int16)] = {};
opus_int16 out[5760*2] = {};
unsigned char *pcm_bytes;
int MAX_FRAME_SIZE;
QAudioFormat audioFormat;
QAudioInput *audioInput;
QIODevice *mic;
QByteArray data;
int micFrameSize;
QAudioOutput *audioOutput;
QIODevice *speaker;
QAudioFormat speakerAudioFormat;
public slots:
void OnAudioNotfiy();
};
#endif // AUDIOINPUT_H
audioinput.cpp:
#include "audioinput.h"
AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
audioFormat.setSampleRate(48000);
audioFormat.setChannelCount(2);
audioFormat.setSampleSize(16);
audioFormat.setSampleType(QAudioFormat::SignedInt);
audioFormat.setByteOrder(QAudioFormat::LittleEndian);
audioFormat.setCodec("audio/pcm");
speakerAudioFormat.setSampleRate(48000);
speakerAudioFormat.setChannelCount(2);
speakerAudioFormat.setSampleSize(16);
speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
speakerAudioFormat.setCodec("audio/pcm");
QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
if(!info.isFormatSupported(audioFormat)){
std::cout << "Mic Format not supported!" << std::endl;
audioFormat = info.nearestFormat(audioFormat);
}
QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
std::cout << "Speaker Format is not supported!" << std::endl;
speakerAudioFormat = info.nearestFormat(speakerAudioFormat);
}
std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
audioInput = new QAudioInput(audioFormat);
audioOutput = new QAudioOutput(speakerAudioFormat);
audioInput->setNotifyInterval(20);
micFrameSize = (audioFormat.sampleRate()/1000)*20;
opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());
MAX_FRAME_SIZE = 6*960;
connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}
AudioInput::~AudioInput()
{
}
void AudioInput::startRecording()
{
mic = audioInput->start();
speaker = audioOutput->start();
std::cout << "Recording started!" << std::endl;
}
void AudioInput::CreateNewAudioThread()
{
std::thread t1(&AudioInput::startRecording,this);
t1.detach();
}
void AudioInput::OnAudioNotfiy()
{
data = mic->readAll();
std::cout << "data size" <<data.size() << std::endl;
if(data.size() > 0){
pcm_bytes = reinterpret_cast<unsigned char*>(data.data());
//convert
for(int i=0;i<2*960;i++){ //TODO HARDCODED
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
}
opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);
opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);
for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
{
pcm_bytes[2*i]=out[i]&0xFF;
pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
}
speaker->write((const char*)pcm_bytes,3840);
}
}

1)You encode only 960 bytes, while the buffer is much larger. You must split the buffer into several equal parts and pass them to the encoder. The size of the part is 120, 240, 480, 960, 1920, and 2880.
2)Use qFromLittleEndian()/qToLittleEndian() functions or type casting when converting from char array to opus_int16 array/from opus_int16 array to char array. This will prevent cracking and poor sound quality.
Example:
void voice::slot_read_audio_input()
{
// Audio settings:
// Sample Rate=48000
// Sample Size=16
// Channel Count=1
// Byte Order=Little Endian
// Sample Type= UnSignedInt
// Encoder settings:
// Sample Rate=48000
// Channel Count=1
// OPUS_APPLICATION_VOIP
// Decoder settings:
// Sample Rate=48000
// Channel Count=1
QByteArray audio_buffer;//mic
QByteArray output_audio_buffer;//speaker
int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
audio_buffer.resize(voice_input->bytesReady());
output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
input->read(audio_buffer.data(),audio_buffer.size());
for(int i=0;i<FRAME_COUNT;i++)
{
// convert from LittleEndian
for(int j=0;j<FRAME_SIZE;j++)
{
input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
}
opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);
// conver to LittleEndian
for(int j = 0; j<decompressedBytes;j++)
{
qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
// decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
}
audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
// or use this:
// output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
}
}

I had a long answer ready about how you are misinterpreting the return value of opus.decodeData as the number of bytes, where the correct interpretation is "number of decoded samples per channel". But it still looks like you account for that in the byte conversion routine later on. So I'm not precisely sure where the bug is.
In general I think you are making the conversion from unsigned char <-> int16 more complicated than it needs to be. You should be able to just pass the audio buffer directly to / from opus and reinterpret its pointer to the needed type inline, without having to manually do bit manipulations to convert and copy between different buffers. The audio device should give you little-endian data but if there is a mismatch you can do a basic byte swapping routine
for (int c = 0; c < numSamples; c++)
{
unsigned char tmp = data[2 * c];
data[2 * c] = data[2 * c + 1];
data[2 * c + 1] = tmp;
}
I don't see it here but I assume you also have code to only consume exactly 960 samples at a time from the mic and keep the rest in the buffer for the next frame, otherwise you'll drop data.
Not that it matters much, but you can also replace 4000 in cbits with 1275, which is the maximum opus packet size.

Related

Decreasing Latency of playing sound using Playsound in C++ (windows)

Currently, we are playing 5 sounds one after another using Wave output and Fetching from the TCP socket. We are now using playBuffer to play the sounds. But there is a latency of playing one sound from another sound to. I don't want any latency in between playing the 5 audio and want to be played immediately. Is there any way to do that in playsound, or can I achieve that using any other library in C++ ? I am currently using a windows system. Would really appreciate some help, Seaching for hours for a solution.
// AudioTask.cpp : Defines the entry point for the console application.
// Adapted from http://www.cplusplus.com/forum/beginner/88542/
#include "stdafx.h"
#define _WIN32_WINNT 0x0500
#include <windows.h>
#include <mmsystem.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <math.h>
#include <stdint.h>
#define PI 3.14159265
using namespace std;
typedef struct WAV_HEADER1 {
uint8_t RIFF[4]; // = { 'R', 'I', 'F', 'F' };
uint32_t ChunkSize;
uint8_t WAVE[4]; // = { 'W', 'A', 'V', 'E' };
uint8_t fmt[4]; // = { 'f', 'm', 't', ' ' };
uint32_t Subchunk1Size = 16;
uint16_t AudioFormat = 1;
uint16_t NumOfChan = 1;
uint32_t SamplesPerSec = 16000;
uint32_t bytesPerSec = 16000 * 2;
uint16_t blockAlign = 2;
uint16_t bitsPerSample = 16;
uint8_t Subchunk2ID[4]; // = { 'd', 'a', 't', 'a' };
uint32_t Subchunk2Size;
} wav_hdr1;
void playBuffer(short* audioSamplesData1, short* audioSamplesData2, int count)
{
static_assert(sizeof(wav_hdr1) == 44, "");
wav_hdr1 wav;
wav.NumOfChan = 2;
wav.SamplesPerSec = 44100;
wav.bytesPerSec = 176400;
wav.blockAlign = 4;
wav.bitsPerSample = 16;
// Fixed values
wav.RIFF[0] = 'R';
wav.RIFF[1] = 'I';
wav.RIFF[2] = 'F';
wav.RIFF[3] = 'F';
wav.WAVE[0] = 'W';
wav.WAVE[1] = 'A';
wav.WAVE[2] = 'V';
wav.WAVE[3] = 'E';
wav.fmt[0] = 'f';
wav.fmt[1] = 'm';
wav.fmt[2] = 't';
wav.fmt[3] = ' ';
wav.Subchunk2ID[0] = 'd';
wav.Subchunk2ID[1] = 'a';
wav.Subchunk2ID[2] = 't';
wav.Subchunk2ID[3] = 'a';
wav.ChunkSize = (count * 2 * 2) + sizeof(wav_hdr1) - 8;
wav.Subchunk2Size = wav.ChunkSize - 20;
char* data = new char[44 + (count * 2 * 2)];
memcpy(data, &wav, sizeof(wav));
int index = sizeof(wav);
//constexpr double max_amplitude = 32766;
for (int i = 0; i < count; i++)
{
short value = audioSamplesData1 ? audioSamplesData1[i] : 0;
memcpy(data + index, &value, sizeof(short));
index += sizeof(short);
value = audioSamplesData2 ? audioSamplesData2[i] : 0;
memcpy(data + index, &value, sizeof(short));
index += sizeof(short);
}
PlaySound((char*)data, GetModuleHandle(0), SND_MEMORY | SND_SYNC);
}
void performAction(short audioSamplesData1[], short audioSamplesData2[], int count)
{
playBuffer(audioSamplesData1, audioSamplesData1, count);
playBuffer(audioSamplesData2, audioSamplesData2, count);
playBuffer(audioSamplesData1, NULL, count);
playBuffer(NULL, audioSamplesData2, count);
playBuffer(audioSamplesData1, audioSamplesData2, count);
}
class Wave {
public:
Wave(char * filename);
~Wave();
void play(bool async = true);
bool isok();
private:
char * buffer;
bool ok;
HINSTANCE HInstance;
int numberOfAudioBytes;
};
Wave::Wave(char * filename)
{
ok = false;
buffer = 0;
HInstance = GetModuleHandle(0);
numberOfAudioBytes = 0;
ifstream infile(filename, ios::binary);
if (!infile)
{
std::cout << "Wave::file error: " << filename << std::endl;
return;
}
infile.seekg(0, ios::end); // get length of file
int length = infile.tellg();
buffer = new char[length]; // allocate memory
infile.seekg(0, ios::beg); // position to start of file
infile.read(buffer, length); // read entire file
std::cout << "Number of elements in buffer : " << length << std::endl;
numberOfAudioBytes = length;
infile.close();
ok = true;
}
Wave::~Wave()
{
PlaySound(NULL, 0, 0); // STOP ANY PLAYING SOUND
delete[] buffer; // before deleting buffer.
}
void Wave::play(bool async)
{
if (!ok)
return;
// Create two arrays of sound data to use as a test for performing the task we need to do.
const int SAMPLE_RATE = 44100; // 44.1 kHz
const int FILE_LENGTH_IN_SECONDS = 3;
const int NUMBER_OF_SAMPLES = SAMPLE_RATE*FILE_LENGTH_IN_SECONDS; // Number of elements of audio data in the array, 132300 in this case.
std::cout << "NUMBER_OF_SAMPLES : " << NUMBER_OF_SAMPLES << std::endl;
short audioSamplesData_A[NUMBER_OF_SAMPLES];
short audioSamplesData_B[NUMBER_OF_SAMPLES];
float maxVolume = 32767.0; // 2^15 - 10.0
float frequencyHz_A = 500.0;
float frequencyHz_B = 250.0;
for (int i = 0; i < NUMBER_OF_SAMPLES; i++)
{
float pcmValue_A = sin(i*frequencyHz_A / SAMPLE_RATE * PI * 2);
float pcmValue_B = sin(i*frequencyHz_B / SAMPLE_RATE * PI * 2);
short pcmValueShort_A = (short)(maxVolume * pcmValue_A);
short pcmValueShort_B = (short)(maxVolume * pcmValue_B);
//short pcmValueShort_B = (short)(0.5*maxVolume*(pcmValue_A + pcmValue_B));
audioSamplesData_A[i] = pcmValueShort_A; // This is what you need to play.
audioSamplesData_B[i] = pcmValueShort_B; // This is what you need to play.
// waveData += pack('h', pcmValueShort_A) - Python code from Python equivalent program, perhaps we need something similar.
// See enclosed "Py Mono Stereo.py" file or visit https://swharden.com/blog/2011-07-08-create-mono-and-stereo-wave-files-with-python/
}
// The task that needs to be done for this project:
// The audio data is available in the form of an array of shorts (audioSamplesData_A and audioSamplesData_B created above).
// What needs to happen is this audio data (audioSamplesData_A and audioSamplesData_B) must each be played so we can hear them.
// When this task is over, there will be no need for any WAV file anywhere, the goal is NOT to produce a WAV file. The goal is
// to take the audio data in the form of audioSamplesData_A and play it from memory somehow.
// We need to take the input data (audioSamplesData_A and audioSamplesData_B) and play the same sounds that the 5 WAV files are currently playing, but
// in the end, we will no longer need those WAV files.
// You do NOT need to create any new files.
// In the end, you do not need to read any files either.
// In the final project, all you will need is this current main.cpp file. You run main.cpp and you hear the 5 sounds.
// The 5 sounds, are created BY C++ here in this file (see loop above).
// Display the first 100 elements for one of the audio samples array
for (int i = 0; i < 100; i++)
{
//std::cout << "i = " << i << ", audioSamplesData_B[i] : " << audioSamplesData_B[i] << std::endl;
}
// Display the first 100 elements for the serialized buffer of WAV header data + some audio data, all coming from one of the WAV files on the disk.
for (int i = 0; i < 100; i++) // Last 6 elements is where audio data begins. First 44 elements are WAV header data.
{
//std::cout << "i = " << i << ", buffer[i] : " << (int) buffer[i] << std::endl;
}
performAction(audioSamplesData_A, audioSamplesData_B, NUMBER_OF_SAMPLES);
// Play the sample sound, the one obtained from the WAV file on the disk, not the one created from the audio samples created above.
//PlaySound((char*)(&audioSamplesData_A[0]), HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)audioSamplesData_B, HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)audioSamplesData_AB, HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)buffer, HInstance, SND_MEMORY | SND_SYNC);
}
bool Wave::isok()
{
return ok;
}
int main(int argc, char *argv[]) {
std::cout << "Trying to play sound ...\n";
// Load the WAV files from them from the disk. These files are here only to help you understand what we need. In the end, we will no longer need them.
Wave outputA("outputA.WAV"); // Audio file equivalent to audioSamplesData_A curve generated in the loop above.
Wave outputB("outputB.WAV"); // Audio file equivalent to audioSamplesData_B curve generated in the loop above.
Wave outputALeftOnly("outputALeftOnly.WAV"); // Audio file that plays sound A on the left only, must be able to take audioSamplesData_A and somehow make it left only.
Wave outputBRightOnly("outputBRightOnly.WAV"); // Audio file that plays sound B on the right only, must be able to take audioSamplesData_B and somehow make it right only.
Wave outputALeftOutputBRight("outputALeftOutputBRight.WAV"); // Must be able to take both audioSamplesData_A and audioSamplesData_B and make it play different sounds in left and right.
// Play the WAV files from the disk, either all of them or a subset of them.
outputA.play(0);
//outputB.play(0);
//outputALeftOnly.play(0);
//outputBRightOnly.play(0);
//outputALeftOutputBRight.play(0);
std::cout << "press key to exit";
while (1) {} // Loop to prevent command line terminal from closing automatically.
return 0;
}

How do I generate a tone using SDL_audio?

I am trying to generate a simple, constant sine tone using SDL_audio. I have a small helper class that can be called to turn the tone on/off, change the frequency, and change the wave shape. I have followed some examples I could find on the web and got the following:
beeper.h
#pragma once
#include <SDL.h>
#include <SDL_audio.h>
#include <cmath>
#include "logger.h"
class Beeper {
private:
//Should there be sound right now
bool soundOn = true;
//Type of wave that should be generated
int waveType = 0;
//Tone that the wave will produce (may or may not be applicable based on wave type)
float waveTone = 440;
//Running index for sampling
float samplingIndex = 0;
//These are useful variables that cannot be changed outside of this file:
//Volume
const Sint16 amplitude = 32000;
//Sampling rate
const int samplingRate = 44100;
//Buffer size
const int bufferSize = 1024;
//Samples a sine wave at a given index
float sampleSine(float index);
//Samples a square wave at a given index
float sampleSquare(float index);
public:
//Initializes SDL audio, audio device, and audio specs
void initializeAudio();
//Function called by SDL audio_callback that fills stream with samples
void generateSamples(short* stream, int length);
//Turn sound on or off
void setSoundOn(bool soundOnOrOff);
//Set timbre of tone produced by beeper
void setWaveType(int waveTypeID);
//Set tone (in Hz) produced by beeper
void setWaveTone(int waveHz);
};
beeper.cpp
#include <beeper.h>
void fillBuffer(void* userdata, Uint8* _stream, int len) {
short * stream = reinterpret_cast<short*>(_stream);
int length = len;
Beeper* beeper = (Beeper*)userdata;
beeper->generateSamples(stream, length);
}
void Beeper::initializeAudio() {
SDL_AudioSpec desired, returned;
SDL_AudioDeviceID devID;
SDL_zero(desired);
desired.freq = samplingRate;
desired.format = AUDIO_S16SYS; //16-bit audio
desired.channels = 1;
desired.samples = bufferSize;
desired.callback = &fillBuffer;
desired.userdata = this;
devID = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(0,0), 0, &desired, &returned, SDL_AUDIO_ALLOW_FORMAT_CHANGE);
SDL_PauseAudioDevice(devID, 0);
}
void Beeper::generateSamples(short *stream, int length) {
int samplesToWrite = length / sizeof(short);
for (int i = 0; i < samplesToWrite; i++) {
if (soundOn) {
if (waveType == 0) {
stream[i] = (short)(amplitude * sampleSine(samplingIndex));
}
else if (waveType == 1) {
stream[i] = (short)(amplitude * 0.8 * sampleSquare(samplingIndex));
}
}
else {
stream[i] = 0;
}
//INFO << "Sampling index: " << samplingIndex;
samplingIndex += (waveTone * M_PI * 2) / samplingRate;
//INFO << "Stream input: " << stream[i];
if (samplingIndex >= (M_PI*2)) {
samplingIndex -= M_PI * 2;
}
}
}
void Beeper::setSoundOn(bool soundOnOrOff) {
soundOn = soundOnOrOff;
//if (soundOnOrOff) {
// samplingIndex = 0;
//}
}
void Beeper::setWaveType(int waveTypeID) {
waveType = waveTypeID;
//samplingIndex = 0;
}
void Beeper::setWaveTone(int waveHz) {
waveTone = waveHz;
//samplingIndex = 0;
}
float Beeper::sampleSine(float index) {
double result = sin((index));
//INFO << "Sine result: " << result;
return result;
}
float Beeper::sampleSquare(float index)
{
int unSquaredSin = sin((index));
if (unSquaredSin >= 0) {
return 1;
}
else {
return -1;
}
}
The callback function is being called and the generateSamples function is loading data into the stream, but I cannot hear anything but a very slight click at irregular periods. I have had a look at the data inside the stream and it follows a pattern that I would expect for a scaled sine wave with a 440 Hz frequency. Is there something obvious that I am missing? I did notice that the size of the stream is double what I put when declaring the SDL_AudioSpec and calling SDL_OpenAudioDevice. Why is that?

Answered my own question! When opening the audio device I used the flag SDL_AUDIO_ALLOW_FORMAT_CHANGE which meant that SDL was actually using a float buffer instead of the short buffer that I expected. This was causing issues in a couple of places that were hard to detect (the stream being double the amount of bytes I was expecting should have tipped me off). I changed that parameter in SDL_OpenAudioDevice() to 0 and it worked as expected!

OPUS decode raw PCM data

I am trying to compress and decompress raw PCM (16-Bit) audio, using OPUS.
Here below is my code for opus_encoder.c. If I remove my decoder.c, the buffer works just fine as in the microphone is able to take in raw PCM data. However, once I have implemented my decoder class, it gave me a lot of errors such as memory allocation, heap corruption and so on. Here are some of my errors:
std::bad_alloc at memory location 0x0031D4BC
Stack overflow (parameters: 0x00000000, 0x05122000)
Access violation reading location 0x04A40000.
Based on my understanding, I think my decoder size cannot allocate the memory properly. Can you take a look at my codes and see what went wrong?
Opus_encoder.c
#include "opusencoder.h"
#include <QtConcurrent/QtConcurrent>
opusencoder::opusencoder(){
}
opusencoder::~opusencoder(){
}
OpusEncoder *enc;
int error;
unsigned char *compressedbuffer;
opus_uint32 enc_final_range;
short pcm = 0;
unsigned char *opusencoder::encodedata(const char *audiodata, const unsigned int& size) {
if (size == 0)
return false;
enc = (OpusEncoder *)malloc(opus_encoder_get_size(1));
enc = opus_encoder_create(8000, 1, OPUS_APPLICATION_VOIP, &error);
if (enc == NULL)
{
exit;
}
opus_int32 rate;
opus_encoder_ctl(enc, OPUS_GET_BANDWIDTH(&rate));
this->encoded_data_size = rate;
int len;
for (int i = 0; i < size / 2; i++)
{
//combine pairs of bytes in the original data into two-byte number
//convert const char to short
pcm= audiodata[2 * i] << 8 | audiodata[(2 * i) + 1];
}
qDebug() << "audiodata: " << pcm << endl;
compressedbuffer = new (unsigned char[this->encoded_data_size]);
len = opus_encode(enc, &pcm, 320, compressedbuffer, this->encoded_data_size);
len = opus_packet_unpad(compressedbuffer, len);
len++;
if (len < 0)
{
qDebug() << "Failure to compress";
return NULL;
}
qDebug() << "COmpressed buffer:" << compressedbuffer << endl;
qDebug() << "opus_encode() ................................ OK.\n" << endl;
}
Opus_decoder.c
##include "opusdecoder.h"
#include <QtConcurrent/QtConcurrent>
#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst))))
int num_channels = 1;
opusdecoder::opusdecoder(){
}
opusdecoder::~opusdecoder(){
}
opus_int16* opusdecoder::decodedata(int frame_size, const unsigned char *data)
{
dec = opus_decoder_create(8000, 1, &err);
if (dec == NULL)
{
exit;
}
opus_int32 rate;
opus_decoder_ctl(dec, OPUS_GET_BANDWIDTH(&rate));
rate = decoded_data_size;
this->num_channels = num_channels;
int decodedatanotwo;
opus_int16 *decompress = new (opus_int16[frame_size * this->num_channels]);
opus_packet_get_nb_channels(data);
decodedatanotwo= opus_decode(dec, data, this->decoded_data_size, decompress, 320, 0);
if (decodedatanotwo < 0)
{
qDebug() << "Failure to decompress";
return NULL;
}
qDebug() << "opus_decode() ................................ OK.\n" << decodedatanotwo << endl;
if (decodedatanotwo != frame_size)
{
exit;
}
}

Decoding by libjpeg -> Encoding by x264, strange artefacts on frames

I have a collection of jpeg, which must be decoded by lib jpeg, and after it, encoded by x264 (after it encoded packets are streamed via rtmp).
Code I used for decoding:
struct my_error_mgr
{
struct jpeg_error_mgr pub;
jmp_buf setjmp_buffer;
};
typedef my_error_mgr *my_error_ptr;
METHODDEF(void) my_error_exit (j_common_ptr cinfo)
{
my_error_ptr myerr = (my_error_ptr) cinfo->err;
(*cinfo->err->output_message) (cinfo);
longjmp(myerr->setjmp_buffer, 1);
}
void init_source(j_decompress_ptr ptr)
{
Q_UNUSED(ptr)
}
boolean fill_input_buffer(j_decompress_ptr ptr)
{
Q_UNUSED(ptr)
return TRUE;
}
void term_source(j_decompress_ptr ptr)
{
Q_UNUSED(ptr)
}
void skip_input_data(j_decompress_ptr ptr, long num_bytes)
{
if(num_bytes>0)
{
ptr->src->next_input_byte+=(size_t)num_bytes;
ptr->src->bytes_in_buffer-=(size_t)num_bytes;
}
}
EtherDecoder::EtherDecoder(QObject *parent):
QObject(parent)
{
}
void EtherDecoder::dataBlockReady(QByteArray data)
{
jpeg_decompress_struct decompressInfo;
jpeg_create_decompress(&decompressInfo);
my_error_mgr err;
decompressInfo.do_fancy_upsampling = FALSE;
decompressInfo.src = (jpeg_source_mgr *) (*decompressInfo.mem->alloc_small) ((j_common_ptr) &decompressInfo, JPOOL_PERMANENT, sizeof(jpeg_source_mgr));
decompressInfo.err = jpeg_std_error(&err.pub);
err.pub.error_exit = my_error_exit;
if (setjmp(err.setjmp_buffer))
{
jpeg_destroy_decompress(&decompressInfo);
return;
}
decompressInfo.src->init_source = init_source;
decompressInfo.src->resync_to_restart = jpeg_resync_to_restart;
decompressInfo.src->fill_input_buffer = fill_input_buffer;
decompressInfo.src->skip_input_data = skip_input_data;
decompressInfo.src->term_source = term_source;
decompressInfo.src->next_input_byte = reinterpret_cast<const JOCTET*>(data.data());
decompressInfo.src->bytes_in_buffer = data.size();
jpeg_read_header(&decompressInfo, TRUE);
jpeg_start_decompress(&decompressInfo);
int size = 0;
int n_samples = 0;
char *samples = new char[5242880];
char *reserv = samples;
while (decompressInfo.output_scanline < decompressInfo.output_height)
{
n_samples = jpeg_read_scanlines(&decompressInfo, (JSAMPARRAY) &samples, 1);
samples += n_samples * decompressInfo.image_width * decompressInfo.num_components;
size += n_samples * decompressInfo.image_width * decompressInfo.num_components;
}
jpeg_finish_decompress(&decompressInfo);
QByteArray output(reserv, size);
emit frameReady(output, decompressInfo.output_width, decompressInfo.output_height);
jpeg_destroy_decompress(&decompressInfo);
delete[] reserv;
}
When I emit frameReady signal, I send data to Encoder, method, where I init Encedor looks like:
bool EtherEncoder::initEncoder(unsigned int width, unsigned int height)
{
x264_param_t param;
x264_param_default_preset(&param, "veryfast", "zerolatency");
param.i_width=width;
param.i_height=height;
param.i_frame_total=0;
param.i_csp=X264_CSP_I420;
param.i_timebase_num=1;
param.i_timebase_den=96000;
param.b_annexb=true;
param.b_repeat_headers=false;
x264_param_apply_fastfirstpass(&param);
x264_param_apply_profile(&param, "baseline");
_context=x264_encoder_open(&param);
if(!_context)
return false;
int nal_count;
x264_nal_t *nals;
if(x264_encoder_headers(_context, &nals, &nal_count)<0)
{
x264_encoder_close(_context);
_context=0;
return false;
}
_extradata=QByteArray();
_width=width;
_height=height;
if(nal_count>0)
{
_extradata=QByteArray(
(const char *)nals[0].p_payload,
nals[nal_count-1].p_payload+nals[nal_count-1].i_payload-nals[0].p_payload);
}
return true;
}
And encoding method:
void EtherEncoder::onFrameReady(QByteArray data, int width, int height)
{
while(data.size()>0)
{
if(!_context && initEncoder(width, height))
{
_timestampDelta=realTimestamp();
}
if(_context)
{
x264_picture_t pic;
x264_picture_init(&pic);
pic.i_type=X264_TYPE_AUTO;
pic.i_pts=_timestampDelta*96000;
pic.img.i_csp=X264_CSP_I420;
pic.img.i_plane=3;
int planeSize = width*height;
uint8_t *p = (uint8_t*)data.data();
pic.img.plane[0]=p;
p+=planeSize;
pic.img.plane[1]=p;
p+=planeSize/4;
pic.img.plane[2]=p;
pic.img.i_stride[0]=width;
pic.img.i_stride[1]=width/2;
pic.img.i_stride[2]=width/2;
if(_forceKeyFrame)
{
pic.i_type=X264_TYPE_I;
_forceKeyFrame=false;
}
int nal_count;
x264_nal_t *nals;
int rc=x264_encoder_encode(_context, &nals, &nal_count, &pic, &pic);
if(rc>0)
{
_mutex.lock();
_packets.push_back(
Packet(
QByteArray(
(const char *)nals[0].p_payload, nals[nal_count- 1].p_payload+nals[nal_count-1].i_payload-nals[0].p_payload),
_timestampDelta/96.0,
_timestampDelta/96.0,
pic.b_keyframe));
_timestampDelta+=40;
data.clear();
_mutex.unlock();
emit onPacketReady();
}
}
}
}
Decoding and encoding proceeds without errors, at the end I get valid video stream, but, it seems that in one of this steps I set Invalid data for decoder/encoder. I get only 1/4 part of image (top-left, as I understood) and it has invalid color and come color stripes. Maybe I set invalid strides and planes when encode frame, or maybe my setting data for libjpeg decoder is incorrect.. Please ask questions about my code, I'll try to make some explanations for you. I explodes my brain.. Thank you.

C++ - Play back a tone generated from a sinusoidal wave

Hey everyone, I'm currently trying to figure out how to play back a tone I have generated using a sinusoidal wave.
Here's my code:
#include <iostream>
#include <OpenAL/al.h>
#include <OpenAL/alc.h>
#include <Math.h>
using namespace std;
int main (int argc, char * const argv[]) {
int number = 0;
int i, size;
double const Pi=4*atan(1);
cout << "Enter number of seconds:" << endl;
scanf("%d", &number);
size = 44100*number;
unsigned char buffer [size]; //buffer array
for(i = 0; i < size; i++){
buffer[i] = (char)sin((2*Pi*440)/(44100*i))*127;
}
return 0;
}
Obviously it doesn't do anything at the moment, since I have no idea how to play the buffer.
I don't want to generate a wav file, nor do I want to load one in. I just want to play back the buffer I have generated.
I am currently working on Mac OS X, and have tried using OpenAL methods - however I have found that alut and alu are not part of it anymore and if I try to use it then it turns out that it's all depredated anyway.
I have also tried to include QAudioOutput, but for some reason it does not appear to be anywhere on my Mac.
I just want a simple playback of the tone I've created. Does anyone have anything they can point me to?
Thanks heaps!!!

I've written an example exactly for this. Runs fine with OpenAL under MacOSX and plays smooth sines. Take a look here:
http://ioctl.eu/blog/2011/03/16/openal-sine-synth/
Code is quite short, i guess i can add it here as well for sake of completeness:
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <OpenAL/al.h>
#include <OpenAL/alc.h>
#define CASE_RETURN(err) case (err): return "##err"
const char* al_err_str(ALenum err) {
switch(err) {
CASE_RETURN(AL_NO_ERROR);
CASE_RETURN(AL_INVALID_NAME);
CASE_RETURN(AL_INVALID_ENUM);
CASE_RETURN(AL_INVALID_VALUE);
CASE_RETURN(AL_INVALID_OPERATION);
CASE_RETURN(AL_OUT_OF_MEMORY);
}
return "unknown";
}
#undef CASE_RETURN
#define __al_check_error(file,line) \
do { \
ALenum err = alGetError(); \
for(; err!=AL_NO_ERROR; err=alGetError()) { \
std::cerr << "AL Error " << al_err_str(err) << " at " << file << ":" << line << std::endl; \
} \
}while(0)
#define al_check_error() \
__al_check_error(__FILE__, __LINE__)
void init_al() {
ALCdevice *dev = NULL;
ALCcontext *ctx = NULL;
const char *defname = alcGetString(NULL, ALC_DEFAULT_DEVICE_SPECIFIER);
std::cout << "Default device: " << defname << std::endl;
dev = alcOpenDevice(defname);
ctx = alcCreateContext(dev, NULL);
alcMakeContextCurrent(ctx);
}
void exit_al() {
ALCdevice *dev = NULL;
ALCcontext *ctx = NULL;
ctx = alcGetCurrentContext();
dev = alcGetContextsDevice(ctx);
alcMakeContextCurrent(NULL);
alcDestroyContext(ctx);
alcCloseDevice(dev);
}
int main(int argc, char* argv[]) {
/* initialize OpenAL */
init_al();
/* Create buffer to store samples */
ALuint buf;
alGenBuffers(1, &buf);
al_check_error();
/* Fill buffer with Sine-Wave */
float freq = 440.f;
int seconds = 4;
unsigned sample_rate = 22050;
size_t buf_size = seconds * sample_rate;
short *samples;
samples = new short[buf_size];
for(int i=0; i<buf_size; ++i) {
samples[i] = 32760 * sin( (2.f*float(M_PI)*freq)/sample_rate * i );
}
/* Download buffer to OpenAL */
alBufferData(buf, AL_FORMAT_MONO16, samples, buf_size, sample_rate);
al_check_error();
/* Set-up sound source and play buffer */
ALuint src = 0;
alGenSources(1, &src);
alSourcei(src, AL_BUFFER, buf);
alSourcePlay(src);
/* While sound is playing, sleep */
al_check_error();
sleep(seconds);
/* Dealloc OpenAL */
exit_al();
al_check_error();
return 0;
}
Update: I've found OpenAL a bit too limiting for my needs, like I have some problems with low-latency playback as this appears to be not the primary domain of OpenAL. Instead, I've found the very convincing PortAudio: http://www.portaudio.com/
It supports all major platforms (Mac,Win,Unix/ALSA) and looks very good. There is an example for sine playback which is far more sophisticated, yet quite simple. Just download the latest release and find the sine-playback sample at test/patest_sine.c

You will need to go through the OS to play back sounds. It's not as straightforward as you would think. In OSX, you will need to go through CoreAudio.
A better approach would be to use a wrapper library like PortAudio (http://www.portaudio.com/) which will make your code more portable and save you some of the boilerplate needed to get sound out of your program.

Try this (this program uses Z transform concept, a complete example that generates dtmf tones using ALSA and compilable on LINUX are available here)‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌:
/*
* Cosine Samples Generator
*
* Autor: Volnei Klehm
* Data: 04/01/2014
*/
#include <math.h>
#include <stdio.h>
#define S_FREQ 8000 /*Sample frequency, should be greater thar 2*sineFrequency
If using audio output it has to be the same saple frequency
Used there*/
const float frequency_in_Hertz = 697; /*set output frequency*/
const float generatorContant1 = cosf(2*M_PI*(frequency_in_Hertz/S_FREQ));
const float generatorContant2 = sinf(2*M_PI*(frequency_in_Hertz/S_FREQ));
float GenerateSignal(){
static float Register[2]={1,0};
static float FeedBack;
FeedBack=2*generatorContant1*Register[0]-Register[1];
Register[1]=Register[0];
Register[0]=FeedBack;
return (generatorContant2*Register[1]);
}
int main(void) {
/*generate 300 samples*/
for (int NumberOfSamples = 300; NumberOfSamples > 0; NumberOfSamples--)
printf("\n%f", GenerateSignal());
return 0;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How to encode and decode audio data with opus? - c++

Related

Decreasing Latency of playing sound using Playsound in C++ (windows)

How do I generate a tone using SDL_audio?

OPUS decode raw PCM data

Decoding by libjpeg -> Encoding by x264, strange artefacts on frames

C++ - Play back a tone generated from a sinusoidal wave

Categories

Resources