Using c++, SDL2, SDL2_mix, ffmpeg2.
Inited SDL2_mix with callback
Mix_HookMusic(MusicPlayer, &g_audioPos);
Decoded audio from ogg to AVFrame* by this code:
while(av_read_frame(m_formatContext, &m_packet) >= 0)
{
if(m_packet.stream_index == m_audioStream)
{
int audio_frame_finished;
avcodec_decode_audio4(m_audioCodecContext, m_audioFrame, &audio_frame_finished, &m_packet);
if(!audio_frame_finished)
{
continue;
}
}
}
After this code i have frame with some m_audioFrame->data[0] and m_audioFrame->linesize[0] == 4096
Once in a while my callback being called:
void MusicPlayer(void *_udata, Uint8 *_stream, int _len)
{
if(!g_audioData)
{
return;
}
AudioData* audio = reinterpret_cast<AudioData*>(g_audioData);
if(!audio || audio->Frame->linesize[0] == 0)
{
return;
}
SDL_memcpy((Uint8*)audio->Data + audio->Pos, (Uint8*)audio->Frame->data[0], audio->Frame->linesize[0]);
audio->Pos+= audio->Frame->linesize[0];
int rest = g_chunkSize - audio->Pos;//frame->linesize[0];
if(rest <= 0)
{
SDL_memcpy(_stream, audio->Data, _len);
audio->Pos = 0;
*(int*)_udata += _len;
}
}
_len == 8192, so i must push 2 frames to fill stream, but all i get - clicks in my speaker. What am i doing wrong?
PS: Tried to reopen MIX with Mix_OpenAudio(m_audioCodecContext->sample_rate, AUDIO_S16SYS, m_audioCodecContext->channels, 4096);. Interesting thing is m_audioCodecContext->channels == 2 and when my callback being called _len = 16384. Have no idea what to do. Please help!!!
You did not show how you perform the ffmpeg initialization; I'd speculate you forgot to specify the requested sample format:
aCodecCtx->request_sample_fmt = AV_SAMPLE_FMT_S16;
Take a look at the source code of my Karaoke lyrics editor which uses the SDL/ffmpeg audio player based on the latest ffmpeg (and hence decode_audio4): https://sourceforge.net/p/karlyriceditor/code/HEAD/tree/src/audioplayerprivate.cpp
Related
I have made a soft synthesizer in Visual Studio 2012 with C++, MFC and DirectX. Despite having added code to rapidly fade out the sound I am experiencing popping / clicking when stopping playback (also when starting).
I copied the DirectX code from this project: http://www.codeproject.com/Articles/7474/Sound-Generator-How-to-create-alien-sounds-using-m
I'm not sure if I'm allowed to cut and paste all the code from the Code Project. Basically I use the Player class from that project as is, the instance of this class is called m_player in my code. The Stop member function in that class calls the Stop function of LPDIRECTSOUNDBUFFER:
void Player::Stop()
{
DWORD status;
if (m_lpDSBuffer == NULL)
return;
HRESULT hres = m_lpDSBuffer->GetStatus(&status);
if (FAILED(hres))
EXCEP(DirectSoundErr::GetErrDesc(hres), "Player::Stop GetStatus");
if ((status & DSBSTATUS_PLAYING) == DSBSTATUS_PLAYING)
{
hres = m_lpDSBuffer->Stop();
if (FAILED(hres))
EXCEP(DirectSoundErr::GetErrDesc(hres), "Player::Stop Stop");
}
}
Here is the notification code (with some supporting code) in my project that fills the sound buffer. Note that the rend function always returns a double between -1 to 1, m_ev_smps = 441, m_n_evs = 3 and m_ev_sz = 882. subInit is called from OnInitDialog:
#define FD_STEP 0.0005
#define SC_NOT_PLYD 0
#define SC_PLYNG 1
#define SC_FD_OUT 2
#define SC_FD_IN 3
#define SC_STPNG 4
#define SC_STPD 5
bool CMainDlg::subInit()
// initialises various variables and the sound player
{
Player *pPlayer;
SOUNDFORMAT format;
std::vector<DWORD> events;
int t, buf_sz;
try
{
pPlayer = new Player();
pPlayer->SetHWnd(m_hWnd);
m_player = pPlayer;
m_player->Init();
format.NbBitsPerSample = 16;
format.NbChannels = 1;
format.SamplingRate = 44100;
m_ev_smps = 441;
m_n_evs = 3;
m_smps = new short[m_ev_smps];
m_smp_scale = (int)pow(2, format.NbBitsPerSample - 1);
m_max_tm = (int)((double)m_ev_smps / (double)(format.SamplingRate * 1000));
m_ev_sz = m_ev_smps * format.NbBitsPerSample/8;
buf_sz = m_ev_sz * m_n_evs;
m_player->CreateSoundBuffer(format, buf_sz, 0);
m_player->SetSoundEventListener(this);
for(t = 0; t < m_n_evs; t++)
events.push_back((int)((t + 1)*m_ev_sz - m_ev_sz * 0.95));
m_player->CreateEventReadNotification(events);
m_status = SC_NOT_PLYD;
}
catch(MATExceptions &e)
{
MessageBox(e.getAllExceptionStr().c_str(), "Error initializing the sound player");
EndDialog(IDCANCEL);
return FALSE;
}
return TRUE;
}
void CMainDlg::Stop()
// stop playing
{
m_player->Stop();
m_status = SC_STPD;
}
void CMainDlg::OnBnClickedStop()
// causes fade out
{
m_status = SC_FD_OUT;
}
void CMainDlg::OnSoundPlayerNotify(int ev_num)
// render some sound samples and check for errors
{
ScopeGuardMutex guard(&m_mutex);
int s, end, begin, elapsed;
if (m_status != SC_STPNG)
{
begin = GetTickCount();
try
{
for(s = 0; s < m_ev_smps; s++)
{
m_smps[s] = (int)(m_synth->rend() * 32768 * m_fade);
if (m_status == SC_FD_IN)
{
m_fade += FD_STEP;
if (m_fade > 1)
{
m_fade = 1;
m_status = SC_PLYNG;
}
}
else if (m_status == SC_FD_OUT)
{
m_fade -= FD_STEP;
if (m_fade < 0)
{
m_fade = 0;
m_status = SC_STPNG;
}
}
}
}
catch(MATExceptions &e)
{
OutputDebugString(e.getAllExceptionStr().c_str());
}
try
{
m_player->Write(((ev_num + 1) % m_n_evs)*m_ev_sz, (unsigned char*)m_smps, m_ev_sz);
}
catch(MATExceptions &e)
{
OutputDebugString(e.getAllExceptionStr().c_str());
}
end = GetTickCount();
elapsed = end - begin;
if(elapsed > m_max_tm)
m_warn_msg.Format(_T("Warning! compute time: %dms"), elapsed);
else
m_warn_msg.Format(_T("compute time: %dms"), elapsed);
}
if (m_status == SC_STPNG)
Stop();
}
It seems like the buffer is not always sounding out when the stop button is clicked. I don't have any specific code for waiting for the sound buffer to finish playing before the DirectX Stop is called. Other than that the sound playback is working just fine, so at least I am initialising the player correctly and notification code is working in that respect.
Try replacing 32768 with 32767. Not by any means sure this is your issue, but it could overflow the positive short int range (assuming your audio is 16-bit) and cause a "pop".
I got rid of the pops / clicks when stopping playback, by filling the buffer with zeros after the fade out. However I still get pops when re-starting playback, despite filling with zeros and then fading back in (it is frustrating).
I try to use DirectX Input to manage input mouse. But when I try to get X and Y coordinates of my mouse, values are incorrect (negative or seem to be random).
I show you the code that I used :
bool System::frame()
{
bool result;
if (input->isButtonDown(BUTTON_L)) //if left button is down
{
result = ReadMouse();
if(!result)
return false;
ProcessInput();
}
}
bool System::ReadMouse()
{
HRESULT result;
//this->mouseState is a DIMOUSESTATE ; this->mouse is a LDIRECTINPUTDEVICE8
result = this->mouse->GetDeviceState(sizeof(DIMOUSESTATE), (LPVOID)&this->mouseState);
if(FAILED(result))
{
if((result == DIERR_INPUTLOST) || (result == DIERR_NOTACQUIRED))
this->mouse->Acquire();
else
return false;
}
return true;
}
void System::ProcessInput()
{
this->mouseX = this->mouseState.lX;
this->mouseY = this->mouseState.lY;
if(this->mouseX < 0)
this->mouseX = 0;
if(this->mouseY < 0)
this->mouseY = 0;
if(this->mouseX > this->ScreenWidth)
this->mouseX = this->ScreenWidth;
if(this->mouseY > this->ScreenHeight)
this->mouseY = this->ScreenHeight;
return;
}
My last test give this->mouseX = -657 and this->mouseY = -36 instead of 200 and 200 (approximately). I check the function when I initialize the mouse, they seem to works (I followed a tutorial).
I think the reason is that DirectInput gives you relative data for the position of the mouse.
Please see: http://msdn.microsoft.com/en-us/library/windows/desktop/ee418272(v=vs.85).aspx for an explanation of how to interpret data from mouse and how to switch to absolute mode.
It is recommended to use the Raw Input API instead of DirectInput.
(http://msdn.microsoft.com/en-us/library/windows/desktop/ms645536(v=vs.85).aspx)
I am trying to make a little video player that has seek bar (with ffmpeg, of course). For that i need function that will, using data from frame and/or packet, get me current time in the video that should be set in seek slider.
It should work like this:
my_time = get_cur_time()
seek(my_time + 10)
assert(my_time+10 == get_cur_time())
seek(my_time - 10)
assert(my_time-10 == get_cur_time())
I do understand thatffmpeg does not support precise seeking, so equality here means "something reasonably cloae).
What code have i used for this thus far:
frame_time = frame->pts*av_q2d(video_dec_ctx->time_base) * 1000;
where frame is AVFrame and video_dec_ctx is AVCodecContext.
And for seeking:
int fn = ffmpeg::av_rescale(tsms,fmt_ctx->streams[video_stream->index]->time_base.den,
fmt_ctx->streams[video_stream->index]->time_base.num);
int frame = fn/1000;
printf("\t avformat_seek_file to %d\n",frame);
int flags = AVSEEK_FLAG_FRAME;
if (frame < this->frame->pts)
flags |= AVSEEK_FLAG_BACKWARD;
if(ffmpeg::av_seek_frame(fmt_ctx,video_stream->index,frame,flags))
{
printf("\nFailed to seek for time %d",frame);
return false;
}
avcodec_flush_buffers(video_dec_ctx);
int got_frame = 0;
do
if (av_read_frame(fmt_ctx, &pkt) >= 0) {
decode_packet_ro(&got_frame, 0);
av_free_packet(&pkt);
}
else
{
read_cache = true;
pkt.data = NULL;
pkt.size = 0;
break;
}
while(!(got_frame && this->frame->pts >= frame));
The code does forward seeking passably, but after any attempt of backward seeking my second assertion fails. After seeking to previous position, my method of getting time does not return position less that one before seeking. That causes my seek slider to work grossly incorrectly.
I've been trying to write a class that derives from FramedSource in Live555 that will allow me to stream live data from my D3D9 application to an MP4 or similar.
What I do each frame is grab the backbuffer into system memory as a texture, then convert it from RGB -> YUV420P, then encode it using x264, then ideally pass the NAL packets on to Live555. I made a class called H264FramedSource that derived from FramedSource basically by copying the DeviceSource file. Instead of the input being an input file, I've made it a NAL packet which I update each frame.
I'm quite new to codecs and streaming, so I could be doing everything completely wrong. In each doGetNextFrame() should I be grabbing the NAL packet and doing something like
memcpy(fTo, nal->p_payload, nal->i_payload)
I assume that the payload is my frame data in bytes? If anybody has an example of a class they derived from FramedSource that might at least be close to what I'm trying to do I would love to see it, this is all new to me and a little tricky to figure out what's happening. Live555's documentation is pretty much the code itself which doesn't exactly make it easy for me to figure out.
Ok, I finally got some time to spend on this and got it working! I'm sure there are others who will be begging to know how to do it so here it is.
You will need your own FramedSource to take each frame, encode, and prepare it for streaming, I will provide some of the source code for this soon.
Essentially throw your FramedSource into the H264VideoStreamDiscreteFramer, then throw this into the H264RTPSink. Something like this
scheduler = BasicTaskScheduler::createNew();
env = BasicUsageEnvironment::createNew(*scheduler);
framedSource = H264FramedSource::createNew(*env, 0,0);
h264VideoStreamDiscreteFramer
= H264VideoStreamDiscreteFramer::createNew(*env, framedSource);
// initialise the RTP Sink stuff here, look at
// testH264VideoStreamer.cpp to find out how
videoSink->startPlaying(*h264VideoStreamDiscreteFramer, NULL, videoSink);
env->taskScheduler().doEventLoop();
Now in your main render loop, throw over your backbuffer which you've saved to system memory to your FramedSource so it can be encoded etc. For more info on how to setup the encoding stuff check out this answer How does one encode a series of images into H264 using the x264 C API?
My implementation is very much in a hacky state and is yet to be optimised at all, my d3d application runs at around 15fps due to the encoding, ouch, so I will have to look into this. But for all intents and purposes this StackOverflow question is answered because I was mostly after how to stream it. I hope this helps other people.
As for my FramedSource it looks a little something like this
concurrent_queue<x264_nal_t> m_queue;
SwsContext* convertCtx;
x264_param_t param;
x264_t* encoder;
x264_picture_t pic_in, pic_out;
EventTriggerId H264FramedSource::eventTriggerId = 0;
unsigned H264FramedSource::FrameSize = 0;
unsigned H264FramedSource::referenceCount = 0;
int W = 720;
int H = 960;
H264FramedSource* H264FramedSource::createNew(UsageEnvironment& env,
unsigned preferredFrameSize,
unsigned playTimePerFrame)
{
return new H264FramedSource(env, preferredFrameSize, playTimePerFrame);
}
H264FramedSource::H264FramedSource(UsageEnvironment& env,
unsigned preferredFrameSize,
unsigned playTimePerFrame)
: FramedSource(env),
fPreferredFrameSize(fMaxSize),
fPlayTimePerFrame(playTimePerFrame),
fLastPlayTime(0),
fCurIndex(0)
{
if (referenceCount == 0)
{
}
++referenceCount;
x264_param_default_preset(¶m, "veryfast", "zerolatency");
param.i_threads = 1;
param.i_width = 720;
param.i_height = 960;
param.i_fps_num = 60;
param.i_fps_den = 1;
// Intra refres:
param.i_keyint_max = 60;
param.b_intra_refresh = 1;
//Rate control:
param.rc.i_rc_method = X264_RC_CRF;
param.rc.f_rf_constant = 25;
param.rc.f_rf_constant_max = 35;
param.i_sps_id = 7;
//For streaming:
param.b_repeat_headers = 1;
param.b_annexb = 1;
x264_param_apply_profile(¶m, "baseline");
encoder = x264_encoder_open(¶m);
pic_in.i_type = X264_TYPE_AUTO;
pic_in.i_qpplus1 = 0;
pic_in.img.i_csp = X264_CSP_I420;
pic_in.img.i_plane = 3;
x264_picture_alloc(&pic_in, X264_CSP_I420, 720, 920);
convertCtx = sws_getContext(720, 960, PIX_FMT_RGB24, 720, 760, PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
if (eventTriggerId == 0)
{
eventTriggerId = envir().taskScheduler().createEventTrigger(deliverFrame0);
}
}
H264FramedSource::~H264FramedSource()
{
--referenceCount;
if (referenceCount == 0)
{
// Reclaim our 'event trigger'
envir().taskScheduler().deleteEventTrigger(eventTriggerId);
eventTriggerId = 0;
}
}
void H264FramedSource::AddToBuffer(uint8_t* buf, int surfaceSizeInBytes)
{
uint8_t* surfaceData = (new uint8_t[surfaceSizeInBytes]);
memcpy(surfaceData, buf, surfaceSizeInBytes);
int srcstride = W*3;
sws_scale(convertCtx, &surfaceData, &srcstride,0, H, pic_in.img.plane, pic_in.img.i_stride);
x264_nal_t* nals = NULL;
int i_nals = 0;
int frame_size = -1;
frame_size = x264_encoder_encode(encoder, &nals, &i_nals, &pic_in, &pic_out);
static bool finished = false;
if (frame_size >= 0)
{
static bool alreadydone = false;
if(!alreadydone)
{
x264_encoder_headers(encoder, &nals, &i_nals);
alreadydone = true;
}
for(int i = 0; i < i_nals; ++i)
{
m_queue.push(nals[i]);
}
}
delete [] surfaceData;
surfaceData = NULL;
envir().taskScheduler().triggerEvent(eventTriggerId, this);
}
void H264FramedSource::doGetNextFrame()
{
deliverFrame();
}
void H264FramedSource::deliverFrame0(void* clientData)
{
((H264FramedSource*)clientData)->deliverFrame();
}
void H264FramedSource::deliverFrame()
{
x264_nal_t nalToDeliver;
if (fPlayTimePerFrame > 0 && fPreferredFrameSize > 0) {
if (fPresentationTime.tv_sec == 0 && fPresentationTime.tv_usec == 0) {
// This is the first frame, so use the current time:
gettimeofday(&fPresentationTime, NULL);
} else {
// Increment by the play time of the previous data:
unsigned uSeconds = fPresentationTime.tv_usec + fLastPlayTime;
fPresentationTime.tv_sec += uSeconds/1000000;
fPresentationTime.tv_usec = uSeconds%1000000;
}
// Remember the play time of this data:
fLastPlayTime = (fPlayTimePerFrame*fFrameSize)/fPreferredFrameSize;
fDurationInMicroseconds = fLastPlayTime;
} else {
// We don't know a specific play time duration for this data,
// so just record the current time as being the 'presentation time':
gettimeofday(&fPresentationTime, NULL);
}
if(!m_queue.empty())
{
m_queue.wait_and_pop(nalToDeliver);
uint8_t* newFrameDataStart = (uint8_t*)0xD15EA5E;
newFrameDataStart = (uint8_t*)(nalToDeliver.p_payload);
unsigned newFrameSize = nalToDeliver.i_payload;
// Deliver the data here:
if (newFrameSize > fMaxSize) {
fFrameSize = fMaxSize;
fNumTruncatedBytes = newFrameSize - fMaxSize;
}
else {
fFrameSize = newFrameSize;
}
memcpy(fTo, nalToDeliver.p_payload, nalToDeliver.i_payload);
FramedSource::afterGetting(this);
}
}
Oh and for those who want to know what my concurrent queue is, here it is, and it works brilliantly http://www.justsoftwaresolutions.co.uk/threading/implementing-a-thread-safe-queue-using-condition-variables.html
Enjoy and good luck!
The deliverFrame method lacks the following check at its start:
if (!isCurrentlyAwaitingData()) return;
see DeviceSource.cpp in LIVE
I have developed TTS engine in .NET. Now I want to expose it over web.
I have used the base64 string encoding to transfer the WAV format, but it is slow when I pass longer text.
Now I'm considering to build some MP3 streaming (maybe with NAudio) where I will convert the WAV formated MemoryStream into MP3 stream and pass it to the client. Does anyone has some experience with this?
Does anyone has experience how to convert WAV MemoryStream with NAudio to MP3 MemoryStream?
public class MP3StreamingPanel2 : UserControl
{
enum StreamingPlaybackState
{
Stopped,
Playing,
Buffering,
Paused
}
private BufferedWaveProvider bufferedWaveProvider;
private IWavePlayer waveOut;
private volatile StreamingPlaybackState playbackState;
private volatile bool fullyDownloaded;
private HttpWebRequest webRequest;
public void StreamMP32(string url)
{
Configuration config = ConfigurationManager.OpenExeConfiguration(ConfigurationUserLevel.None);
SettingsSection section = (SettingsSection)config.GetSection("system.net/settings");
section.HttpWebRequest.UseUnsafeHeaderParsing = true;
config.Save();
this.fullyDownloaded = false;
webRequest = (HttpWebRequest)WebRequest.Create(url);
int metaInt = 0; // blocksize of mp3 data
webRequest.Headers.Clear();
webRequest.Headers.Add("GET", "/ HTTP/1.0");
// needed to receive metadata informations
webRequest.Headers.Add("Icy-MetaData", "1");
webRequest.UserAgent = "WinampMPEG/5.09";
HttpWebResponse resp = null;
try
{
resp = (HttpWebResponse)webRequest.GetResponse();
}
catch (WebException e)
{
if (e.Status != WebExceptionStatus.RequestCanceled)
{
//ShowError(e.Message);
}
return;
}
byte[] buffer = new byte[16384 * 4]; // needs to be big enough to hold a decompressed frame
try
{
// read blocksize to find metadata block
metaInt = Convert.ToInt32(resp.GetResponseHeader("icy-metaint"));
}
catch
{
}
IMp3FrameDecompressor decompressor = null;
try
{
using (var responseStream = resp.GetResponseStream())
{
var readFullyStream = new ReadFullyStream(responseStream);
readFullyStream.metaInt = metaInt;
do
{
if (bufferedWaveProvider != null && bufferedWaveProvider.BufferLength - bufferedWaveProvider.BufferedBytes < bufferedWaveProvider.WaveFormat.AverageBytesPerSecond / 4)
{
Debug.WriteLine("Buffer getting full, taking a break");
Thread.Sleep(500);
}
else
{
Mp3Frame frame = null;
try
{
frame = Mp3Frame.LoadFromStream(readFullyStream, true);
}
catch (EndOfStreamException)
{
this.fullyDownloaded = true;
// reached the end of the MP3 file / stream
break;
}
catch (WebException)
{
// probably we have aborted download from the GUI thread
break;
}
if (decompressor == null)
{
// don't think these details matter too much - just help ACM select the right codec
// however, the buffered provider doesn't know what sample rate it is working at
// until we have a frame
WaveFormat waveFormat = new Mp3WaveFormat(frame.SampleRate, frame.ChannelMode == ChannelMode.Mono ? 1 : 2, frame.FrameLength, frame.BitRate);
decompressor = new AcmMp3FrameDecompressor(waveFormat);
this.bufferedWaveProvider = new BufferedWaveProvider(decompressor.OutputFormat);
this.bufferedWaveProvider.BufferDuration = TimeSpan.FromSeconds(20); // allow us to get well ahead of ourselves
//this.bufferedWaveProvider.BufferedDuration = 250;
}
int decompressed = decompressor.DecompressFrame(frame, buffer, 0);
//Debug.WriteLine(String.Format("Decompressed a frame {0}", decompressed));
bufferedWaveProvider.AddSamples(buffer, 0, decompressed);
}
} while (playbackState != StreamingPlaybackState.Stopped);
Debug.WriteLine("Exiting");
// was doing this in a finally block, but for some reason
// we are hanging on response stream .Dispose so never get there
decompressor.Dispose();
}
}
finally
{
if (decompressor != null)
{
decompressor.Dispose();
}
}
}
}
NAudio does not include an MP3 encoder. When I need to encode MP3 I use lame.exe. If you don't want to go via a file, lame.exe allows you to read from stdin and write to stdout, so if you redirect standard in and out on the process you can convert on the fly.