I've been looking for some time for a quick and reliable way of creating grayscale videos with avconv library from frames that are captured/created with OpenCV, in a C++ application.
I know that OpenCV have it's internal way of creating videos, however, it has some encoding performance and options limitations.
So, in this way, I would like to know which are the options for accomplishing this task?
For accomplishing this task, one of the options that I've found, which complies with my needs is the following class:
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/mathematics.h>
}
class AVConvVideoMaker
{
AVCodec* codec;
AVCodecContext* context;
AVFrame* picture;
int imgWidth, imgHeight, imgBufferSize;
FILE* outputFile;
int outputBufferSize;
uint8_t* outputBuffer;
int pictureBufferSize;
uint8_t* pictureBuffer;
int outputSize;
public:
AVConvVideoMaker(std::string outputFilePath, int imgWidth, int imgHeight)
: codec(NULL)
, context(NULL)
, picture(NULL)
, imgWidth(imgWidth)
, imgHeight(imgHeight)
, imgBufferSize(imgWidth*imgHeight)
, outputFile(fopen(outputFilePath.c_str(), "wb"))
, outputBufferSize(100000)
, outputBuffer(new uint8_t[outputBufferSize])
, pictureBufferSize((imgBufferSize*3)/2)
, pictureBuffer(new uint8_t[pictureBufferSize])
, outputSize(0)
{
avcodec_register_all();
this->codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO);
if (!this->codec)
{
throw std::runtime_error("Codec not found");
}
this->context = avcodec_alloc_context3(codec);
this->picture = avcodec_alloc_frame();
this->context->bit_rate = 400000;
this->context->width = this->imgWidth;
this->context->height = this->imgHeight;
this->context->time_base = (AVRational){1, 25};
this->context->gop_size = 10;
this->context->max_b_frames = 1;
this->context->pix_fmt = PIX_FMT_YUV420P;
if(avcodec_open2(this->context, this->codec, NULL) < 0)
{
throw std::runtime_error("Could not open codec");
}
if(!this->outputFile)
{
throw std::runtime_error("Could not open video output file");
}
this->picture->data[0] = this->pictureBuffer;
this->picture->data[1] = this->picture->data[0] + imgBufferSize;
this->picture->data[2] = this->picture->data[1] + imgBufferSize / 4;
this->picture->linesize[0] = this->imgWidth;
this->picture->linesize[1] = this->imgWidth / 2;
this->picture->linesize[2] = this->imgWidth / 2;
}
void insertFrame(cv::Mat1b& img)
{
fflush(stdout);
/* Y */
for(int y=0; y < this->context->height; y++)
{
for(int x=0; x < this->context->width; x++)
{
this->picture->data[0][y * picture->linesize[0] + x] = img.at<uchar>(y,x);
}
}
/* Cb and Cr */
for(int y=0; y < this->context->height/2; y++)
{
for(int x=0; x < this->context->width/2; x++)
{
this->picture->data[1][y * this->picture->linesize[1] + x] = 128;
this->picture->data[2][y * this->picture->linesize[2] + x] = 128;
}
}
this->outputSize = avcodec_encode_video(this->context, this->outputBuffer, this->outputBufferSize, this->picture);
fwrite(this->outputBuffer, 1, outputSize, this->outputFile);
}
~AVConvVideoMaker()
{
this->outputBuffer[0] = 0x00;
this->outputBuffer[1] = 0x00;
this->outputBuffer[2] = 0x01;
this->outputBuffer[3] = 0xb7;
fwrite(this->outputBuffer, 1, 4, this->outputFile);
fclose(this->outputFile);
avcodec_close(this->context);
av_free(this->context);
av_free(this->picture);
delete outputBuffer;
delete pictureBuffer;
}
};
For compiling this in Ubuntu 16.04, you have to link with:
g++ --std=c++11 main.cpp -lopencv_core -lopencv_highgui -lavutil -lavcodec
Related
We have following method implemented using ijl15.lib API.
We want to use libjpeg libraries instead of ijl. How should I implement WriteJPGBuffer using libjpeg libraries? We are aware of LoadJPG and SaveJPG from file. However i want to write and read the jpg image in buffer using libjpeg libraries. Any inputs will be very helpul. Thank you in advance.
unsigned char WriteJPGBuffer(unsigned int &size)
{
size = 0;
int jErr;
JPEG_CORE_PROPERTIES jpgProps;
bool colorsSwapped;
if (!jpgSupported)
return NULL;
jErr = ijlInit(&jpgProps);
if (jErr != IJL_OK)
return NULL;
jpgProps.DIBWidth = m_width;
jpgProps.DIBHeight = -m_height;
jpgProps.DIBBytes = (unsigned char *)m_pData;
jpgProps.DIBPadBytes = 0 ;
jpgProps.DIBChannels = 4;
jpgProps.DIBColor = IJL_RGB;
jpgProps.JPGFile = NULL;
jpgProps.JPGWidth = m_width;
jpgProps.JPGHeight = m_height;
jpgProps.JPGChannels = 3;
jpgProps.JPGColor = IJL_YCBCR;
jpgProps.JPGSubsampling = IJL_411;
jpgProps.jquality = jpgQuality;
unsigned int iSize = m_width*m_height*3;
unsigned char * pBuffer = new unsigned char[iSize];
jpgProps.JPGSizeBytes = iSize;
jpgProps.JPGBytes = pBuffer;
jpgProps.jprops.jpeg_comment_size = (unsigned short)m_strCommentAdobe.length;
jpgProps.jprops.jpeg_comment = (char*)m_strCommentAdobe;
colorsSwapped = SetInternalFormat(RGB);
jErr = ijlWrite(&jpgProps, IJL_JBUFF_WRITEWHOLEIMAGE);
if (colorsSwapped)
SetInternalFormat(BGR);
if (jErr != IJL_OK)
{
ijlFree(&jpgProps);
return NULL;
}
size = jpgProps.JPGSizeBytes;
ijlFree(&jpgProps);
return jpgProps.JPGBytes;
}
Thanks for your inputs. I have implemented the solution below through RND. In below implementation, we have image data stored in the class member variable in the form of RGBQUAD which i am converting into unsigned char* first using ConversionfromGLRGBQUADToUnsignedChar function and then writing it jpeg buffer.
void ConversionfromGLRGBQUADToUnsignedChar(unsigned char* dataInCharFromGLRGBQUAD)
{
int spot,spotDst;
for (int y = 0;y < m_height;y++)
{
for (int x = 0;x<m_width;x++)
{
spot = y * m_width + x;
spotDst = spot * 3;
dataInCharFromGLRGBQUAD[spotDst] = m_pData[spot].red;
dataInCharFromGLRGBQUAD[spotDst + 1] = m_pData[spot].green;
dataInCharFromGLRGBQUAD[spotDst + 2] = m_pData[spot].blue;
}
}
}
unsigned char * WriteJPGBuffer(unsigned int &size)
{
size = 0;
struct jpeg_compress_struct cinfo;
struct jpeg_error_mgr jerr;
JSAMPROW row_pointer[1];
unsigned char* dataInCharFromGLRGBQUAD;
bool colorsSwapped;
int row_stride;
cinfo.err = jpeg_std_error(&jerr);
jpeg_create_compress(&cinfo);
unsigned long sizeOfJPGBuffer = 0;
jpeg_mem_dest(&cinfo, &m_pDIBData, &sizeOfJPGBuffer);
cinfo.image_width = m_width;
cinfo.image_height = m_height;
cinfo.input_components = 3;
cinfo.in_color_space = JCS_RGB;
cinfo.jpeg_color_space = JCS_YCbCr;
jpeg_set_defaults(&cinfo);
jpeg_set_quality(&cinfo, jpgQuality, true);
jpeg_start_compress(&cinfo, true);
colorsSwapped = SetInternalFormat(RGB);
FlipVert();
dataInCharFromGLRGBQUAD = new unsigned char[m_width*m_height*3];
ConversionfromGLRGBQUADToUnsignedChar(dataInCharFromGLRGBQUAD);
row_stride = cinfo.image_width * cinfo.input_components;
while (cinfo.next_scanline < cinfo.image_height)
{
row_pointer[0] = &dataInCharFromGLRGBQUAD[cinfo.next_scanline * row_stride];
jpeg_write_scanlines(&cinfo, row_pointer, 1);
}
if (colorsSwapped)
SetInternalFormat(BGR);
jpeg_finish_compress(&cinfo);
jpeg_destroy_compress(&cinfo);
size = sizeOfJPGBuffer;
delete[] dataInCharFromGLRGBQUAD;
return m_pDIBData;
}
I'm working on video encoding that will be used in a Unity plugin. I have made image encoding work, but now I'm at the audio. So trying only with the audio in to a mp4 file with AAC encoding. And I'm stuck. The resulting file does not contain anything. Also, from what I understand, AAC in ffmpeg only supports AV_SAMPLE_FMT_FLTP, that's why I use it. Here's my code:
Setup:
int initialize_encoding_audio(const char *filename)
{
int ret;
AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
avcodec_register_all();
av_register_all();
aud_codec = avcodec_find_encoder(aud_codec_id);
avcodec_register(aud_codec);
if (!aud_codec)
return COULD_NOT_FIND_AUD_CODEC;
aud_codec_context = avcodec_alloc_context3(aud_codec);
if (!aud_codec_context)
return CONTEXT_CREATION_ERROR;
aud_codec_context->bit_rate = 192000;
aud_codec_context->sample_rate = select_sample_rate(aud_codec);
aud_codec_context->sample_fmt = sample_fmt;
aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
aud_codec_context->codec = aud_codec;
aud_codec_context->codec_id = aud_codec_id;
ret = avcodec_open2(aud_codec_context, aud_codec, NULL);
if (ret < 0)
return COULD_NOT_OPEN_AUD_CODEC;
outctx = avformat_alloc_context();
ret = avformat_alloc_output_context2(&outctx, NULL, "mp4", filename);
outctx->audio_codec = aud_codec;
outctx->audio_codec_id = aud_codec_id;
audio_st = avformat_new_stream(outctx, aud_codec);
audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
audio_st->codecpar->channels = aud_codec_context->channels;
audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
audio_st->codecpar->codec_id = aud_codec_id;
audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
audio_st->codecpar->format = sample_fmt;
audio_st->codecpar->frame_size = aud_codec_context->frame_size;
audio_st->codecpar->block_align = aud_codec_context->block_align;
audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
outctx->streams = new AVStream*[1];
outctx->streams[0] = audio_st;
av_dump_format(outctx, 0, filename, 1);
if (!(outctx->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&outctx->pb, filename, AVIO_FLAG_WRITE) < 0)
return COULD_NOT_OPEN_FILE;
}
ret = avformat_write_header(outctx, NULL);
aud_frame = av_frame_alloc();
aud_frame->nb_samples = aud_codec_context->frame_size;
aud_frame->format = aud_codec_context->sample_fmt;
aud_frame->channel_layout = aud_codec_context->channel_layout;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
av_frame_get_buffer(aud_frame, buffer_size / aud_codec_context->channels);
if (!aud_frame)
return COULD_NOT_ALLOCATE_FRAME;
aud_frame_counter = 0;
return 0;
}
Encoding:
int encode_audio_samples(uint8_t **aud_samples)
{
int ret;
int buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
aud_codec_context->sample_fmt, 0);
for (size_t i = 0; i < buffer_size / aud_codec_context->channels; i++)
{
aud_frame->data[0][i] = aud_samples[0][i];
aud_frame->data[1][i] = aud_samples[1][i];
}
aud_frame->pts = aud_frame_counter++;
ret = avcodec_send_frame(aud_codec_context, aud_frame);
if (ret < 0)
return ERROR_ENCODING_SAMPLES_SEND;
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);
pkt.stream_index = audio_st->index;
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
return ERROR_ENCODING_SAMPLES_RECEIVE;
else
break;
}
return 0;
}
Finish encoding:
int finish_audio_encoding()
{
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
fflush(stdout);
int ret = avcodec_send_frame(aud_codec_context, NULL);
if (ret < 0)
return ERROR_ENCODING_FRAME_SEND;
while (true)
{
ret = avcodec_receive_packet(aud_codec_context, &pkt);
if (!ret)
{
if (pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
if (pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
av_write_frame(outctx, &pkt);
av_packet_unref(&pkt);
}
if (ret == -AVERROR(AVERROR_EOF))
break;
else if (ret < 0)
return ERROR_ENCODING_FRAME_RECEIVE;
}
av_write_trailer(outctx);
}
Main:
void get_audio_frame(float_t *left_samples, float_t *right_samples, int frame_size, float* t, float* tincr, float* tincr2)
{
int j, i;
float v;
for (j = 0; j < frame_size; j++)
{
v = sin(*t);
*left_samples = v;
*right_samples = v;
left_samples++;
right_samples++;
*t += *tincr;
*tincr += *tincr2;
}
}
int main()
{
int frame_rate = 30; // this should be like 96000 / 1024 or somthing i guess?
float t, tincr, tincr2;
initialize_encoding_audio("audio.mp4");
int sec = 50;
float_t** aud_samples;
int src_samples_linesize;
int src_nb_samples = 1024;
int src_channels = 2;
int ret = av_samples_alloc_array_and_samples((uint8_t***)&aud_samples, &src_samples_linesize, src_channels,
src_nb_samples, AV_SAMPLE_FMT_FLTP, 0);
t = 0;
tincr = 0;
tincr2 = 0;
for (size_t i = 0; i < frame_rate * sec; i++)
{
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
encode_audio_samples((uint8_t **)aud_samples);
}
finish_audio_encoding();
//cleanup();
return 0;
}
I guess the first thing that I would want to make sure I got right is the synthetic sound generation and how I transfer that to the AVFrame. Are my conversions correct? But feel free to point out anything that might be wrong.
Thanks in advance!
Edit: the whole source: http://pastebin.com/jYtmkhek
Edit2: Added initialization of tincr & tincr2
Unless I'm missing something from the pastebin, you forgot to initialize a few variables. You're using garbage to generate your samples.
float t, tincr, tincr2;
[...]
get_audio_frame(aud_samples[0], aud_samples[1], src_nb_samples, &t, &tincr, &tincr2);
You probably want to start with t=0 and increment by 2 * PI * frequency / sample rate for a sine wave.
Also, avformat_new_stream() creates the stream for you, don't do it with new.
Update:
I removed all the c++ stuff to test this. Here's the code that works: pastebin
And here's the resulting file: audio.mp4
ffmpeg -i audio.mp4 -filter_complex "showwaves=s=640x120:mode=line:colors=white" -frames:v 1 wave.jpg
Diff:
1,6d0
< #include "encoder.h"
< #include <algorithm>
< #include <iterator>
<
< extern "C"
< {
14a9
> #include <math.h>
40,41c35,36
< SwsContext *sws_ctx;
< SwrContext *swr_ctx = NULL;
---
> struct SwsContext *sws_ctx;
> struct SwrContext *swr_ctx = NULL;
76,77c71,72
< AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
< AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
---
> enum AVCodecID aud_codec_id = AV_CODEC_ID_AAC;
> enum AVSampleFormat sample_fmt = AV_SAMPLE_FMT_FLTP;
125,126c120,121
< outctx->streams = new AVStream*[1];
< outctx->streams[0] = audio_st;
---
> //outctx->streams = new AVStream*[1];
> //outctx->streams[0] = audio_st;
182c177
< while (true)
---
> while (1)
216c211
< while (true)
---
> while (1)
291c286
< float t, tincr, tincr2;
---
> float t = 0, tincr = 2 * M_PI * 440.0 / 96000, tincr2 = 0;
317d311
< }
I'm developing a project where I need to convert PCM 16-bits 2 channels sound into a IEEE Float 32-bits 2 channels.
To do this I'm using the following code:
void CAudioConverter::ConvI16ToF32(BYTE* pcmFrom, BYTE* floatTo, int length)
{
short* src = reinterpret_cast<short*>(pcmFrom);
float* dst = reinterpret_cast<float*>(floatTo);
for (int n = 0; n < length; n++)
{
dst[n] = static_cast<float>(src[n]) / 32768.0f;
}
}
I have initialized the variable __pcm32_bytesPerFrame with:
WAVEFORMATEX* closestFormat;
ws->default_pb_dev->GetMixFormat(&closestFormat);
__pcm32_bytesPerFrame = closestFormat->nAvgBytesPerSec * (prm->samples_per_frame * 1000 / (prm->clock_rate * closestFormat->nChannels)) / 1000;
strm->pb_max_frame_count is:
hr = ws->default_pb_dev->GetBufferSize(&ws->pb_max_frame_count);
I have a while loop in a dedicated thread the does something like:
hr = strm->default_pb_dev->GetCurrentPadding(&padding);
incoming_frame = __pcm32_bytesPerFrame / 4;
frame_to_render = strm->pb_max_frame_count - padding;
if (frame_to_render >= incoming_frame)
{
frame_to_render = incoming_frame;
} else {
/* Don't get new frame because there's no space */
frame_to_render = 0;
}
if (frame_to_render > 0)
{
pjmedia_frame frame;
hr = strm->pb_client->GetBuffer(frame_to_render, &cur_pb_buf);
if (FAILED(hr)) {
continue;
}
void* destBuffer = (void*)malloc(strm->bytes_per_frame*frame_to_render*sizeof(pj_uint16_t));
if (strm->fmt_id == PJMEDIA_FORMAT_L16) {
/* PCM mode */
frame.type = PJMEDIA_FRAME_TYPE_AUDIO;
frame.size = strm->bytes_per_frame;
frame.timestamp.u64 = strm->pb_timestamp.u64;
frame.bit_info = 0;
frame.buf = destBuffer;
}
status = (*strm->pb_cb)(strm->user_data, &frame);
CAudioConverter* conv = new CAudioConverter();
conv->ConvI16ToF32((BYTE*)destBuffer, cur_pb_buf, frame_to_render);
hr = strm->pb_client->ReleaseBuffer(frame_to_render, 0);
(...)
But, to send the sound to the WASAPI capture buffer I need a BYTE*.
How can I fill my 'floatTo' argument?
Any ideas?
Thanks
What about this:
void CAudioConverter::ConvI16ToF32(BYTE* pcmFrom, BYTE* floatTo, int length)
{
short* src = reinterpret_cast<short*>(pcmFrom);
float* dst = reinterpret_cast<float*>(floatTo);
for (int n = 0; n < length; n++)
{
dst[n] = static_cast<float>(src[n]) / 32768.0f;
}
}
Additionally make sure length indicates the number of elments in pcmFrom and floatTo, and not the number of bytes allocated. In you case pcmFrom should have allocated length*2 bytes and floatTo needs room for length*4 bytes.
I am new to openNI and Kinect as a beginner I tried implementing OpenNI SimpleViewer through the Tutorial in OpenNI.org at this link : http://openni.org/docs2/Tutorial/smpl_simple_view.html
I have implemented the code as described in this tutorial but I am still facing problem as The program is throwing exception and it is not showing any output.
I have linked all the libraries and installed the kinect properly. I have also included the OpenCV library with openNI.
Here is My C++ Code that I have Implemented :
int main()
{
rc = context.InitFromXmlFile(XML_PATH,&errors);
if(rc == XN_STATUS_NO_NODE_PRESENT)
{
XnChar strError[1024];
errors.ToString(strError,1024);
printf("%s\n",strError);
return(rc);
}
else if(rc != XN_STATUS_OK)
{
printf("Open failed : %s\n", xnGetStatusString(rc));
return(rc);
}
rc = context.FindExistingNode(XN_NODE_TYPE_DEPTH,depth);
if(rc != XN_STATUS_OK)
{
printf("No Depth Node Found. Check XML!\n");
return -1;
}
depth.GetMetaData(depthMD);
image.GetMetaData(imageMD);
if(imageMD.FullXRes() != depthMD.FullXRes() || imageMD.FullYRes() != depthMD.FullYRes())
{
printf("The device depth and image resolution must be equal\n");
return -1;
}
if(imageMD.PixelFormat() != XN_PIXEL_FORMAT_RGB24)
{
printf("The Device Image Format must be RGB24\n");
return -1;
}
unsigned short textMapX = (((unsigned short)(depthMD.FullXRes() - 1)/512) + 1 * 512);
unsigned short textMapY = (((unsigned short)(depthMD.FullYRes() - 1)/512) + 1 * 512);
XnRGB24Pixel* pTextMap = (XnRGB24Pixel*) malloc(textMapX * textMapY * sizeof(XnRGB24Pixel));
rc = context.WaitOneUpdateAll(depth);
depth.GetMetaData(depthMD);
image.GetMetaData(imageMD);
const XnDepthPixel* pDepth = depthMD.Data();
const XnUInt8* pImage = imageMD.Data();
unsigned int imageScale = GL_WIN_SIZE_X/depthMD.FullXRes();
xnOSMemSet(depthHist,0,MAX * sizeof(float));
unsigned int numberOfPoints = 0;
for(XnUInt y = 0; y < depthMD.YRes(); ++ y)
{
for(XnUInt x = 0; x < depthMD.XRes(); ++x, ++pDepth);
{
if(*pDepth != 0)
{
depthHist[*pDepth]++;
numberOfPoints++;
}
}
}
for(int index = 1; index < MAX; index++)
{
depthHist[index] += depthHist[index - 1];
}
if(numberOfPoints)
{
for(int index = 1; index < MAX; index++)
{
depthHist[index] = (unsigned int)(256 * (1.0f - (depthHist[index]/numberOfPoints)));
}
}
return 0;
}
I am also facing problem understanding the later part of the code.
i am having problems understanding how the audio part of the sdl library works
now, i know that when you initialize it, you have to specify the frequency and a >>callback<< function, which i think is then called automatically at the given frequency.
can anyone who worked with the sdl library write a simple example that would use sdl_audio to generate a 440 hz square wave (since it is the simplest waveform) at a sampling frequency of 44000 hz?
The Introduction to SDL (2011 cached version: 2) has got a neat example of using SDL Sound library that should get you started: http://www.libsdl.org/intro.en/usingsound.html
EDIT: Here is a working program that does what you asked for. I modified a bit the code found here: http://www.dgames.org/beep-sound-with-sdl/
#include <SDL/SDL.h>
#include <SDL/SDL_audio.h>
#include <queue>
#include <cmath>
const int AMPLITUDE = 28000;
const int FREQUENCY = 44100;
struct BeepObject
{
double freq;
int samplesLeft;
};
class Beeper
{
private:
double v;
std::queue<BeepObject> beeps;
public:
Beeper();
~Beeper();
void beep(double freq, int duration);
void generateSamples(Sint16 *stream, int length);
void wait();
};
void audio_callback(void*, Uint8*, int);
Beeper::Beeper()
{
SDL_AudioSpec desiredSpec;
desiredSpec.freq = FREQUENCY;
desiredSpec.format = AUDIO_S16SYS;
desiredSpec.channels = 1;
desiredSpec.samples = 2048;
desiredSpec.callback = audio_callback;
desiredSpec.userdata = this;
SDL_AudioSpec obtainedSpec;
// you might want to look for errors here
SDL_OpenAudio(&desiredSpec, &obtainedSpec);
// start play audio
SDL_PauseAudio(0);
}
Beeper::~Beeper()
{
SDL_CloseAudio();
}
void Beeper::generateSamples(Sint16 *stream, int length)
{
int i = 0;
while (i < length) {
if (beeps.empty()) {
while (i < length) {
stream[i] = 0;
i++;
}
return;
}
BeepObject& bo = beeps.front();
int samplesToDo = std::min(i + bo.samplesLeft, length);
bo.samplesLeft -= samplesToDo - i;
while (i < samplesToDo) {
stream[i] = AMPLITUDE * std::sin(v * 2 * M_PI / FREQUENCY);
i++;
v += bo.freq;
}
if (bo.samplesLeft == 0) {
beeps.pop();
}
}
}
void Beeper::beep(double freq, int duration)
{
BeepObject bo;
bo.freq = freq;
bo.samplesLeft = duration * FREQUENCY / 1000;
SDL_LockAudio();
beeps.push(bo);
SDL_UnlockAudio();
}
void Beeper::wait()
{
int size;
do {
SDL_Delay(20);
SDL_LockAudio();
size = beeps.size();
SDL_UnlockAudio();
} while (size > 0);
}
void audio_callback(void *_beeper, Uint8 *_stream, int _length)
{
Sint16 *stream = (Sint16*) _stream;
int length = _length / 2;
Beeper* beeper = (Beeper*) _beeper;
beeper->generateSamples(stream, length);
}
int main(int argc, char* argv[])
{
SDL_Init(SDL_INIT_AUDIO);
int duration = 1000;
double Hz = 440;
Beeper b;
b.beep(Hz, duration);
b.wait();
return 0;
}
Good luck.
A boiled-down variant of the beeper-example, reduced to the bare minimum (with error-handling).
#include <math.h>
#include <SDL.h>
#include <SDL_audio.h>
const int AMPLITUDE = 28000;
const int SAMPLE_RATE = 44100;
void audio_callback(void *user_data, Uint8 *raw_buffer, int bytes)
{
Sint16 *buffer = (Sint16*)raw_buffer;
int length = bytes / 2; // 2 bytes per sample for AUDIO_S16SYS
int &sample_nr(*(int*)user_data);
for(int i = 0; i < length; i++, sample_nr++)
{
double time = (double)sample_nr / (double)SAMPLE_RATE;
buffer[i] = (Sint16)(AMPLITUDE * sin(2.0f * M_PI * 441.0f * time)); // render 441 HZ sine wave
}
}
int main(int argc, char *argv[])
{
if(SDL_Init(SDL_INIT_AUDIO) != 0) SDL_Log("Failed to initialize SDL: %s", SDL_GetError());
int sample_nr = 0;
SDL_AudioSpec want;
want.freq = SAMPLE_RATE; // number of samples per second
want.format = AUDIO_S16SYS; // sample type (here: signed short i.e. 16 bit)
want.channels = 1; // only one channel
want.samples = 2048; // buffer-size
want.callback = audio_callback; // function SDL calls periodically to refill the buffer
want.userdata = &sample_nr; // counter, keeping track of current sample number
SDL_AudioSpec have;
if(SDL_OpenAudio(&want, &have) != 0) SDL_LogError(SDL_LOG_CATEGORY_AUDIO, "Failed to open audio: %s", SDL_GetError());
if(want.format != have.format) SDL_LogError(SDL_LOG_CATEGORY_AUDIO, "Failed to get the desired AudioSpec");
SDL_PauseAudio(0); // start playing sound
SDL_Delay(1000); // wait while sound is playing
SDL_PauseAudio(1); // stop playing sound
SDL_CloseAudio();
return 0;
}
SDL 2 C example
The following code produces a sinusoidal sound, it is adapted from: https://codereview.stackexchange.com/questions/41086/play-some-sine-waves-with-sdl2
main.c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <SDL2/SDL.h>
const double ChromaticRatio = 1.059463094359295264562;
const double Tao = 6.283185307179586476925;
Uint32 sampleRate = 48000;
Uint32 frameRate = 60;
Uint32 floatStreamLength = 1024;
Uint32 samplesPerFrame;
Uint32 msPerFrame;
double practicallySilent = 0.001;
Uint32 audioBufferLength = 48000;
float *audioBuffer;
SDL_atomic_t audioCallbackLeftOff;
Sint32 audioMainLeftOff;
Uint8 audioMainAccumulator;
SDL_AudioDeviceID AudioDevice;
SDL_AudioSpec audioSpec;
SDL_Event event;
SDL_bool running = SDL_TRUE;
typedef struct {
float *waveform;
Uint32 waveformLength;
double volume;
double pan;
double frequency;
double phase;
} voice;
void speak(voice *v) {
float sample;
Uint32 sourceIndex;
double phaseIncrement = v->frequency/sampleRate;
Uint32 i;
if (v->volume > practicallySilent) {
for (i = 0; (i + 1) < samplesPerFrame; i += 2) {
v->phase += phaseIncrement;
if (v->phase > 1)
v->phase -= 1;
sourceIndex = v->phase*v->waveformLength;
sample = v->waveform[sourceIndex]*v->volume;
audioBuffer[audioMainLeftOff+i] += sample*(1-v->pan);
audioBuffer[audioMainLeftOff+i+1] += sample*v->pan;
}
}
else {
for (i=0; i<samplesPerFrame; i+=1)
audioBuffer[audioMainLeftOff+i] = 0;
}
audioMainAccumulator++;
}
double getFrequency(double pitch) {
return pow(ChromaticRatio, pitch-57)*440;
}
int getWaveformLength(double pitch) {
return sampleRate / getFrequency(pitch)+0.5f;
}
void buildSineWave(float *data, Uint32 length) {
Uint32 i;
for (i=0; i < length; i++)
data[i] = sin(i*(Tao/length));
}
void logSpec(SDL_AudioSpec *as) {
printf(
" freq______%5d\n"
" format____%5d\n"
" channels__%5d\n"
" silence___%5d\n"
" samples___%5d\n"
" size______%5d\n\n",
(int) as->freq,
(int) as->format,
(int) as->channels,
(int) as->silence,
(int) as->samples,
(int) as->size
);
}
void logVoice(voice *v) {
printf(
" waveformLength__%d\n"
" volume__________%f\n"
" pan_____________%f\n"
" frequency_______%f\n"
" phase___________%f\n",
v->waveformLength,
v->volume,
v->pan,
v->frequency,
v->phase
);
}
void logWavedata(float *floatStream, Uint32 floatStreamLength, Uint32 increment) {
printf("\n\nwaveform data:\n\n");
Uint32 i=0;
for (i = 0; i < floatStreamLength; i += increment)
printf("%4d:%2.16f\n", i, floatStream[i]);
printf("\n\n");
}
void audioCallback(void *unused, Uint8 *byteStream, int byteStreamLength) {
float* floatStream = (float*) byteStream;
Sint32 localAudioCallbackLeftOff = SDL_AtomicGet(&audioCallbackLeftOff);
Uint32 i;
for (i = 0; i < floatStreamLength; i++) {
floatStream[i] = audioBuffer[localAudioCallbackLeftOff];
localAudioCallbackLeftOff++;
if (localAudioCallbackLeftOff == audioBufferLength)
localAudioCallbackLeftOff = 0;
}
SDL_AtomicSet(&audioCallbackLeftOff, localAudioCallbackLeftOff);
}
int init(void) {
SDL_Init(SDL_INIT_AUDIO | SDL_INIT_TIMER);
SDL_AudioSpec want;
SDL_zero(want);
want.freq = sampleRate;
want.format = AUDIO_F32;
want.channels = 2;
want.samples = floatStreamLength;
want.callback = audioCallback;
AudioDevice = SDL_OpenAudioDevice(NULL, 0, &want, &audioSpec, SDL_AUDIO_ALLOW_FORMAT_CHANGE);
if (AudioDevice == 0) {
printf("\nFailed to open audio: %s\n", SDL_GetError());
return 1;
}
printf("want:\n");
logSpec(&want);
printf("audioSpec:\n");
logSpec(&audioSpec);
if (audioSpec.format != want.format) {
printf("\nCouldn't get Float32 audio format.\n");
return 2;
}
sampleRate = audioSpec.freq;
floatStreamLength = audioSpec.size / 4;
samplesPerFrame = sampleRate / frameRate;
msPerFrame = 1000 / frameRate;
audioMainLeftOff = samplesPerFrame * 8;
SDL_AtomicSet(&audioCallbackLeftOff, 0);
if (audioBufferLength % samplesPerFrame)
audioBufferLength += samplesPerFrame - (audioBufferLength % samplesPerFrame);
audioBuffer = malloc(sizeof(float) * audioBufferLength);
return 0;
}
int onExit(void) {
SDL_CloseAudioDevice(AudioDevice);
SDL_Quit();
return 0;
}
int main(int argc, char *argv[]) {
float syncCompensationFactor = 0.0016;
Sint32 mainAudioLead;
Uint32 i;
voice testVoiceA;
voice testVoiceB;
voice testVoiceC;
testVoiceA.volume = 1;
testVoiceB.volume = 1;
testVoiceC.volume = 1;
testVoiceA.pan = 0.5;
testVoiceB.pan = 0;
testVoiceC.pan = 1;
testVoiceA.phase = 0;
testVoiceB.phase = 0;
testVoiceC.phase = 0;
testVoiceA.frequency = getFrequency(45);
testVoiceB.frequency = getFrequency(49);
testVoiceC.frequency = getFrequency(52);
Uint16 C0waveformLength = getWaveformLength(0);
testVoiceA.waveformLength = C0waveformLength;
testVoiceB.waveformLength = C0waveformLength;
testVoiceC.waveformLength = C0waveformLength;
float sineWave[C0waveformLength];
buildSineWave(sineWave, C0waveformLength);
testVoiceA.waveform = sineWave;
testVoiceB.waveform = sineWave;
testVoiceC.waveform = sineWave;
if (init())
return 1;
SDL_Delay(42);
SDL_PauseAudioDevice(AudioDevice, 0);
while (running) {
while (SDL_PollEvent(&event) != 0) {
if (event.type == SDL_QUIT) {
running = SDL_FALSE;
}
}
for (i = 0; i < samplesPerFrame; i++)
audioBuffer[audioMainLeftOff+i] = 0;
speak(&testVoiceA);
speak(&testVoiceB);
speak(&testVoiceC);
if (audioMainAccumulator > 1) {
for (i=0; i<samplesPerFrame; i++) {
audioBuffer[audioMainLeftOff+i] /= audioMainAccumulator;
}
}
audioMainAccumulator = 0;
audioMainLeftOff += samplesPerFrame;
if (audioMainLeftOff == audioBufferLength)
audioMainLeftOff = 0;
mainAudioLead = audioMainLeftOff - SDL_AtomicGet(&audioCallbackLeftOff);
if (mainAudioLead < 0)
mainAudioLead += audioBufferLength;
if (mainAudioLead < floatStreamLength)
printf("An audio collision may have occured!\n");
SDL_Delay(mainAudioLead * syncCompensationFactor);
}
onExit();
return 0;
}
Compile and run:
gcc -ggdb3 -O3 -std=c99 -Wall -Wextra -pedantic -o main.out main.c -lSDL2 -lm
./main.out
Should be easy to turn this into a simple piano with: https://github.com/cirosantilli/cpp-cheat/blob/f734a2e76fbcfc67f707ae06be7a2a2ef5db47d1/c/interactive/audio_gen.c#L44
For wav manipulation, also check the official examples:
http://hg.libsdl.org/SDL/file/e12c38730512/test/testresample.c
http://hg.libsdl.org/SDL/file/e12c38730512/test/loopwave.c
Tested on Ubuntu 19.10, SDL 2.0.10.
This is a minimal example of how to play a sine wave in SDL2.
Make sure to call SDL_Init(SDL_INIT_AUDIO) before creating an instance of Sound.
Sound.h
#include <cstdint>
#include <SDL2/SDL.h>
class Sound
{
public:
Sound();
~Sound();
void play();
void stop();
const double m_sineFreq;
const double m_sampleFreq;
const double m_samplesPerSine;
uint32_t m_samplePos;
private:
static void SDLAudioCallback(void *data, Uint8 *buffer, int length);
SDL_AudioDeviceID m_device;
};
Sound.cpp
#include "Sound.h"
#include <cmath>
#include <iostream>
Sound::Sound()
: m_sineFreq(1000),
m_sampleFreq(44100),
m_samplesPerSine(m_sampleFreq / m_sineFreq),
m_samplePos(0)
{
SDL_AudioSpec wantSpec, haveSpec;
SDL_zero(wantSpec);
wantSpec.freq = m_sampleFreq;
wantSpec.format = AUDIO_U8;
wantSpec.channels = 1;
wantSpec.samples = 2048;
wantSpec.callback = SDLAudioCallback;
wantSpec.userdata = this;
m_device = SDL_OpenAudioDevice(NULL, 0, &wantSpec, &haveSpec, SDL_AUDIO_ALLOW_FORMAT_CHANGE);
if (m_device == 0)
{
std::cout << "Failed to open audio: " << SDL_GetError() << std::endl;
}
}
Sound::~Sound()
{
SDL_CloseAudioDevice(m_device);
}
void Sound::play()
{
SDL_PauseAudioDevice(m_device, 0);
}
void Sound::stop()
{
SDL_PauseAudioDevice(m_device, 1);
}
void Sound::SDLAudioCallback(void *data, Uint8 *buffer, int length)
{
Sound *sound = reinterpret_cast<Sound*>(data);
for(int i = 0; i < length; ++i)
{
buffer[i] = (std::sin(sound->m_samplePos / sound->m_samplesPerSine * M_PI * 2) + 1) * 127.5;
++sound->m_samplePos;
}
}