Currently, we are playing 5 sounds one after another using Wave output and Fetching from the TCP socket. We are now using playBuffer to play the sounds. But there is a latency of playing one sound from another sound to. I don't want any latency in between playing the 5 audio and want to be played immediately. Is there any way to do that in playsound, or can I achieve that using any other library in C++ ? I am currently using a windows system. Would really appreciate some help, Seaching for hours for a solution.
// AudioTask.cpp : Defines the entry point for the console application.
// Adapted from http://www.cplusplus.com/forum/beginner/88542/
#include "stdafx.h"
#define _WIN32_WINNT 0x0500
#include <windows.h>
#include <mmsystem.h>
#include <iostream>
#include <fstream>
#include <conio.h>
#include <math.h>
#include <stdint.h>
#define PI 3.14159265
using namespace std;
typedef struct WAV_HEADER1 {
uint8_t RIFF[4]; // = { 'R', 'I', 'F', 'F' };
uint32_t ChunkSize;
uint8_t WAVE[4]; // = { 'W', 'A', 'V', 'E' };
uint8_t fmt[4]; // = { 'f', 'm', 't', ' ' };
uint32_t Subchunk1Size = 16;
uint16_t AudioFormat = 1;
uint16_t NumOfChan = 1;
uint32_t SamplesPerSec = 16000;
uint32_t bytesPerSec = 16000 * 2;
uint16_t blockAlign = 2;
uint16_t bitsPerSample = 16;
uint8_t Subchunk2ID[4]; // = { 'd', 'a', 't', 'a' };
uint32_t Subchunk2Size;
} wav_hdr1;
void playBuffer(short* audioSamplesData1, short* audioSamplesData2, int count)
{
static_assert(sizeof(wav_hdr1) == 44, "");
wav_hdr1 wav;
wav.NumOfChan = 2;
wav.SamplesPerSec = 44100;
wav.bytesPerSec = 176400;
wav.blockAlign = 4;
wav.bitsPerSample = 16;
// Fixed values
wav.RIFF[0] = 'R';
wav.RIFF[1] = 'I';
wav.RIFF[2] = 'F';
wav.RIFF[3] = 'F';
wav.WAVE[0] = 'W';
wav.WAVE[1] = 'A';
wav.WAVE[2] = 'V';
wav.WAVE[3] = 'E';
wav.fmt[0] = 'f';
wav.fmt[1] = 'm';
wav.fmt[2] = 't';
wav.fmt[3] = ' ';
wav.Subchunk2ID[0] = 'd';
wav.Subchunk2ID[1] = 'a';
wav.Subchunk2ID[2] = 't';
wav.Subchunk2ID[3] = 'a';
wav.ChunkSize = (count * 2 * 2) + sizeof(wav_hdr1) - 8;
wav.Subchunk2Size = wav.ChunkSize - 20;
char* data = new char[44 + (count * 2 * 2)];
memcpy(data, &wav, sizeof(wav));
int index = sizeof(wav);
//constexpr double max_amplitude = 32766;
for (int i = 0; i < count; i++)
{
short value = audioSamplesData1 ? audioSamplesData1[i] : 0;
memcpy(data + index, &value, sizeof(short));
index += sizeof(short);
value = audioSamplesData2 ? audioSamplesData2[i] : 0;
memcpy(data + index, &value, sizeof(short));
index += sizeof(short);
}
PlaySound((char*)data, GetModuleHandle(0), SND_MEMORY | SND_SYNC);
}
void performAction(short audioSamplesData1[], short audioSamplesData2[], int count)
{
playBuffer(audioSamplesData1, audioSamplesData1, count);
playBuffer(audioSamplesData2, audioSamplesData2, count);
playBuffer(audioSamplesData1, NULL, count);
playBuffer(NULL, audioSamplesData2, count);
playBuffer(audioSamplesData1, audioSamplesData2, count);
}
class Wave {
public:
Wave(char * filename);
~Wave();
void play(bool async = true);
bool isok();
private:
char * buffer;
bool ok;
HINSTANCE HInstance;
int numberOfAudioBytes;
};
Wave::Wave(char * filename)
{
ok = false;
buffer = 0;
HInstance = GetModuleHandle(0);
numberOfAudioBytes = 0;
ifstream infile(filename, ios::binary);
if (!infile)
{
std::cout << "Wave::file error: " << filename << std::endl;
return;
}
infile.seekg(0, ios::end); // get length of file
int length = infile.tellg();
buffer = new char[length]; // allocate memory
infile.seekg(0, ios::beg); // position to start of file
infile.read(buffer, length); // read entire file
std::cout << "Number of elements in buffer : " << length << std::endl;
numberOfAudioBytes = length;
infile.close();
ok = true;
}
Wave::~Wave()
{
PlaySound(NULL, 0, 0); // STOP ANY PLAYING SOUND
delete[] buffer; // before deleting buffer.
}
void Wave::play(bool async)
{
if (!ok)
return;
// Create two arrays of sound data to use as a test for performing the task we need to do.
const int SAMPLE_RATE = 44100; // 44.1 kHz
const int FILE_LENGTH_IN_SECONDS = 3;
const int NUMBER_OF_SAMPLES = SAMPLE_RATE*FILE_LENGTH_IN_SECONDS; // Number of elements of audio data in the array, 132300 in this case.
std::cout << "NUMBER_OF_SAMPLES : " << NUMBER_OF_SAMPLES << std::endl;
short audioSamplesData_A[NUMBER_OF_SAMPLES];
short audioSamplesData_B[NUMBER_OF_SAMPLES];
float maxVolume = 32767.0; // 2^15 - 10.0
float frequencyHz_A = 500.0;
float frequencyHz_B = 250.0;
for (int i = 0; i < NUMBER_OF_SAMPLES; i++)
{
float pcmValue_A = sin(i*frequencyHz_A / SAMPLE_RATE * PI * 2);
float pcmValue_B = sin(i*frequencyHz_B / SAMPLE_RATE * PI * 2);
short pcmValueShort_A = (short)(maxVolume * pcmValue_A);
short pcmValueShort_B = (short)(maxVolume * pcmValue_B);
//short pcmValueShort_B = (short)(0.5*maxVolume*(pcmValue_A + pcmValue_B));
audioSamplesData_A[i] = pcmValueShort_A; // This is what you need to play.
audioSamplesData_B[i] = pcmValueShort_B; // This is what you need to play.
// waveData += pack('h', pcmValueShort_A) - Python code from Python equivalent program, perhaps we need something similar.
// See enclosed "Py Mono Stereo.py" file or visit https://swharden.com/blog/2011-07-08-create-mono-and-stereo-wave-files-with-python/
}
// The task that needs to be done for this project:
// The audio data is available in the form of an array of shorts (audioSamplesData_A and audioSamplesData_B created above).
// What needs to happen is this audio data (audioSamplesData_A and audioSamplesData_B) must each be played so we can hear them.
// When this task is over, there will be no need for any WAV file anywhere, the goal is NOT to produce a WAV file. The goal is
// to take the audio data in the form of audioSamplesData_A and play it from memory somehow.
// We need to take the input data (audioSamplesData_A and audioSamplesData_B) and play the same sounds that the 5 WAV files are currently playing, but
// in the end, we will no longer need those WAV files.
// You do NOT need to create any new files.
// In the end, you do not need to read any files either.
// In the final project, all you will need is this current main.cpp file. You run main.cpp and you hear the 5 sounds.
// The 5 sounds, are created BY C++ here in this file (see loop above).
// Display the first 100 elements for one of the audio samples array
for (int i = 0; i < 100; i++)
{
//std::cout << "i = " << i << ", audioSamplesData_B[i] : " << audioSamplesData_B[i] << std::endl;
}
// Display the first 100 elements for the serialized buffer of WAV header data + some audio data, all coming from one of the WAV files on the disk.
for (int i = 0; i < 100; i++) // Last 6 elements is where audio data begins. First 44 elements are WAV header data.
{
//std::cout << "i = " << i << ", buffer[i] : " << (int) buffer[i] << std::endl;
}
performAction(audioSamplesData_A, audioSamplesData_B, NUMBER_OF_SAMPLES);
// Play the sample sound, the one obtained from the WAV file on the disk, not the one created from the audio samples created above.
//PlaySound((char*)(&audioSamplesData_A[0]), HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)audioSamplesData_B, HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)audioSamplesData_AB, HInstance, SND_MEMORY | SND_SYNC);
//PlaySound((char*)buffer, HInstance, SND_MEMORY | SND_SYNC);
}
bool Wave::isok()
{
return ok;
}
int main(int argc, char *argv[]) {
std::cout << "Trying to play sound ...\n";
// Load the WAV files from them from the disk. These files are here only to help you understand what we need. In the end, we will no longer need them.
Wave outputA("outputA.WAV"); // Audio file equivalent to audioSamplesData_A curve generated in the loop above.
Wave outputB("outputB.WAV"); // Audio file equivalent to audioSamplesData_B curve generated in the loop above.
Wave outputALeftOnly("outputALeftOnly.WAV"); // Audio file that plays sound A on the left only, must be able to take audioSamplesData_A and somehow make it left only.
Wave outputBRightOnly("outputBRightOnly.WAV"); // Audio file that plays sound B on the right only, must be able to take audioSamplesData_B and somehow make it right only.
Wave outputALeftOutputBRight("outputALeftOutputBRight.WAV"); // Must be able to take both audioSamplesData_A and audioSamplesData_B and make it play different sounds in left and right.
// Play the WAV files from the disk, either all of them or a subset of them.
outputA.play(0);
//outputB.play(0);
//outputALeftOnly.play(0);
//outputBRightOnly.play(0);
//outputALeftOutputBRight.play(0);
std::cout << "press key to exit";
while (1) {} // Loop to prevent command line terminal from closing automatically.
return 0;
}
Related
I am trying to generate a simple, constant sine tone using SDL_audio. I have a small helper class that can be called to turn the tone on/off, change the frequency, and change the wave shape. I have followed some examples I could find on the web and got the following:
beeper.h
#pragma once
#include <SDL.h>
#include <SDL_audio.h>
#include <cmath>
#include "logger.h"
class Beeper {
private:
//Should there be sound right now
bool soundOn = true;
//Type of wave that should be generated
int waveType = 0;
//Tone that the wave will produce (may or may not be applicable based on wave type)
float waveTone = 440;
//Running index for sampling
float samplingIndex = 0;
//These are useful variables that cannot be changed outside of this file:
//Volume
const Sint16 amplitude = 32000;
//Sampling rate
const int samplingRate = 44100;
//Buffer size
const int bufferSize = 1024;
//Samples a sine wave at a given index
float sampleSine(float index);
//Samples a square wave at a given index
float sampleSquare(float index);
public:
//Initializes SDL audio, audio device, and audio specs
void initializeAudio();
//Function called by SDL audio_callback that fills stream with samples
void generateSamples(short* stream, int length);
//Turn sound on or off
void setSoundOn(bool soundOnOrOff);
//Set timbre of tone produced by beeper
void setWaveType(int waveTypeID);
//Set tone (in Hz) produced by beeper
void setWaveTone(int waveHz);
};
beeper.cpp
#include <beeper.h>
void fillBuffer(void* userdata, Uint8* _stream, int len) {
short * stream = reinterpret_cast<short*>(_stream);
int length = len;
Beeper* beeper = (Beeper*)userdata;
beeper->generateSamples(stream, length);
}
void Beeper::initializeAudio() {
SDL_AudioSpec desired, returned;
SDL_AudioDeviceID devID;
SDL_zero(desired);
desired.freq = samplingRate;
desired.format = AUDIO_S16SYS; //16-bit audio
desired.channels = 1;
desired.samples = bufferSize;
desired.callback = &fillBuffer;
desired.userdata = this;
devID = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(0,0), 0, &desired, &returned, SDL_AUDIO_ALLOW_FORMAT_CHANGE);
SDL_PauseAudioDevice(devID, 0);
}
void Beeper::generateSamples(short *stream, int length) {
int samplesToWrite = length / sizeof(short);
for (int i = 0; i < samplesToWrite; i++) {
if (soundOn) {
if (waveType == 0) {
stream[i] = (short)(amplitude * sampleSine(samplingIndex));
}
else if (waveType == 1) {
stream[i] = (short)(amplitude * 0.8 * sampleSquare(samplingIndex));
}
}
else {
stream[i] = 0;
}
//INFO << "Sampling index: " << samplingIndex;
samplingIndex += (waveTone * M_PI * 2) / samplingRate;
//INFO << "Stream input: " << stream[i];
if (samplingIndex >= (M_PI*2)) {
samplingIndex -= M_PI * 2;
}
}
}
void Beeper::setSoundOn(bool soundOnOrOff) {
soundOn = soundOnOrOff;
//if (soundOnOrOff) {
// samplingIndex = 0;
//}
}
void Beeper::setWaveType(int waveTypeID) {
waveType = waveTypeID;
//samplingIndex = 0;
}
void Beeper::setWaveTone(int waveHz) {
waveTone = waveHz;
//samplingIndex = 0;
}
float Beeper::sampleSine(float index) {
double result = sin((index));
//INFO << "Sine result: " << result;
return result;
}
float Beeper::sampleSquare(float index)
{
int unSquaredSin = sin((index));
if (unSquaredSin >= 0) {
return 1;
}
else {
return -1;
}
}
The callback function is being called and the generateSamples function is loading data into the stream, but I cannot hear anything but a very slight click at irregular periods. I have had a look at the data inside the stream and it follows a pattern that I would expect for a scaled sine wave with a 440 Hz frequency. Is there something obvious that I am missing? I did notice that the size of the stream is double what I put when declaring the SDL_AudioSpec and calling SDL_OpenAudioDevice. Why is that?
Answered my own question! When opening the audio device I used the flag SDL_AUDIO_ALLOW_FORMAT_CHANGE which meant that SDL was actually using a float buffer instead of the short buffer that I expected. This was causing issues in a couple of places that were hard to detect (the stream being double the amount of bytes I was expecting should have tipped me off). I changed that parameter in SDL_OpenAudioDevice() to 0 and it worked as expected!
I'm working on a voice chat and I need to compress my audio data. I record and play the audio data via the Qt Framework. If I record and play the audio data without compressing it everything is fine. If I compress,decompress and play the audio data I just hear a cracking sound.
Edit: I had a look at the demo code and I tried to use that code.
I can hear something but it is very laggy. If I increase the size of pcm_bytes to e.g 40000 it sounds better but my voice has still lags and cracking sounds.
This is the line (audioinput.cpp at the bottom):
speaker->write((const char*)pcm_bytes,3840);
codecopus.cpp:
#include "codecopus.h"
CodecOpus::CodecOpus()
{
}
void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
int error;
decoderState = opus_decoder_create(samplingRate,channels,&error);
if(error == OPUS_OK){
std::cout << "Created Opus Decoder struct" << std::endl;
}
}
void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
int error;
encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
if(error == OPUS_OK){
std::cout << "Created Opus Encoder struct" << std::endl;
}
}
opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
return i;
}
int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{
int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);
return i;
}
CodecOpus::~CodecOpus()
{
opus_decoder_destroy(this->decoderState);
opus_encoder_destroy(this->encoderState);
}
audioinput.h:
#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
Q_OBJECT
public:
AudioInput();
~AudioInput();
void startRecording();
void CreateNewAudioThread();
private:
CodecOpus opus;
unsigned char cbits[4000] = {};
opus_int16 in[960*2*sizeof(opus_int16)] = {};
opus_int16 out[5760*2] = {};
unsigned char *pcm_bytes;
int MAX_FRAME_SIZE;
QAudioFormat audioFormat;
QAudioInput *audioInput;
QIODevice *mic;
QByteArray data;
int micFrameSize;
QAudioOutput *audioOutput;
QIODevice *speaker;
QAudioFormat speakerAudioFormat;
public slots:
void OnAudioNotfiy();
};
#endif // AUDIOINPUT_H
audioinput.cpp:
#include "audioinput.h"
AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
audioFormat.setSampleRate(48000);
audioFormat.setChannelCount(2);
audioFormat.setSampleSize(16);
audioFormat.setSampleType(QAudioFormat::SignedInt);
audioFormat.setByteOrder(QAudioFormat::LittleEndian);
audioFormat.setCodec("audio/pcm");
speakerAudioFormat.setSampleRate(48000);
speakerAudioFormat.setChannelCount(2);
speakerAudioFormat.setSampleSize(16);
speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
speakerAudioFormat.setCodec("audio/pcm");
QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
if(!info.isFormatSupported(audioFormat)){
std::cout << "Mic Format not supported!" << std::endl;
audioFormat = info.nearestFormat(audioFormat);
}
QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
std::cout << "Speaker Format is not supported!" << std::endl;
speakerAudioFormat = info.nearestFormat(speakerAudioFormat);
}
std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
audioInput = new QAudioInput(audioFormat);
audioOutput = new QAudioOutput(speakerAudioFormat);
audioInput->setNotifyInterval(20);
micFrameSize = (audioFormat.sampleRate()/1000)*20;
opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());
MAX_FRAME_SIZE = 6*960;
connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}
AudioInput::~AudioInput()
{
}
void AudioInput::startRecording()
{
mic = audioInput->start();
speaker = audioOutput->start();
std::cout << "Recording started!" << std::endl;
}
void AudioInput::CreateNewAudioThread()
{
std::thread t1(&AudioInput::startRecording,this);
t1.detach();
}
void AudioInput::OnAudioNotfiy()
{
data = mic->readAll();
std::cout << "data size" <<data.size() << std::endl;
if(data.size() > 0){
pcm_bytes = reinterpret_cast<unsigned char*>(data.data());
//convert
for(int i=0;i<2*960;i++){ //TODO HARDCODED
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
}
opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);
opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);
for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
{
pcm_bytes[2*i]=out[i]&0xFF;
pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
}
speaker->write((const char*)pcm_bytes,3840);
}
}
1)You encode only 960 bytes, while the buffer is much larger. You must split the buffer into several equal parts and pass them to the encoder. The size of the part is 120, 240, 480, 960, 1920, and 2880.
2)Use qFromLittleEndian()/qToLittleEndian() functions or type casting when converting from char array to opus_int16 array/from opus_int16 array to char array. This will prevent cracking and poor sound quality.
Example:
void voice::slot_read_audio_input()
{
// Audio settings:
// Sample Rate=48000
// Sample Size=16
// Channel Count=1
// Byte Order=Little Endian
// Sample Type= UnSignedInt
// Encoder settings:
// Sample Rate=48000
// Channel Count=1
// OPUS_APPLICATION_VOIP
// Decoder settings:
// Sample Rate=48000
// Channel Count=1
QByteArray audio_buffer;//mic
QByteArray output_audio_buffer;//speaker
int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
audio_buffer.resize(voice_input->bytesReady());
output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
input->read(audio_buffer.data(),audio_buffer.size());
for(int i=0;i<FRAME_COUNT;i++)
{
// convert from LittleEndian
for(int j=0;j<FRAME_SIZE;j++)
{
input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
}
opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);
// conver to LittleEndian
for(int j = 0; j<decompressedBytes;j++)
{
qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
// decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
}
audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
// or use this:
// output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
}
}
I had a long answer ready about how you are misinterpreting the return value of opus.decodeData as the number of bytes, where the correct interpretation is "number of decoded samples per channel". But it still looks like you account for that in the byte conversion routine later on. So I'm not precisely sure where the bug is.
In general I think you are making the conversion from unsigned char <-> int16 more complicated than it needs to be. You should be able to just pass the audio buffer directly to / from opus and reinterpret its pointer to the needed type inline, without having to manually do bit manipulations to convert and copy between different buffers. The audio device should give you little-endian data but if there is a mismatch you can do a basic byte swapping routine
for (int c = 0; c < numSamples; c++)
{
unsigned char tmp = data[2 * c];
data[2 * c] = data[2 * c + 1];
data[2 * c + 1] = tmp;
}
I don't see it here but I assume you also have code to only consume exactly 960 samples at a time from the mic and keep the rest in the buffer for the next frame, otherwise you'll drop data.
Not that it matters much, but you can also replace 4000 in cbits with 1275, which is the maximum opus packet size.
I'm currently working on a project similar to OBS, where I'm capturing screen data, encoding it with the x264 library, and then broadcasting it to a twitch server.
Currently, the servers are accepting the data, but no video is being played - it buffers for a moment, then returns an error code "2000: network error"
Like OBS Classic, I'm dividing each NAL provided by x264 by its type, and then making changes to each
int frame_size = x264_encoder_encode(encoder, &nals, &num_nals, &pic_in, &pic_out);
//sort the NAL's into their types and make necessary adjustments
int timeOffset = int(pic_out.i_pts - pic_out.i_dts);
timeOffset = htonl(timeOffset);//host to network translation, ensure the bytes are in the right format
BYTE *timeOffsetAddr = ((BYTE*)&timeOffset) + 1;
videoSection sect;
bool foundFrame = false;
uint8_t * spsPayload = NULL;
int spsSize = 0;
for (int i = 0; i<num_nals; i++) {
//std::cout << "VideoEncoder: EncodedImages Size: " << encodedImages->size() << std::endl;
x264_nal_t &nal = nals[i];
//std::cout << "NAL is:" << nal.i_type << std::endl;
//need to account for pps/sps, seems to always be the first frame sent
if (nal.i_type == NAL_SPS) {
spsSize = nal.i_payload;
spsPayload = (uint8_t*)malloc(spsSize);
memcpy(spsPayload, nal.p_payload, spsSize);
} else if (nal.i_type == NAL_PPS){
//pps always happens after sps
if (spsPayload == NULL) {
std::cout << "VideoEncoder: critical error, sps not set" << std::endl;
}
uint8_t * payload = (uint8_t*)malloc(nal.i_payload + spsSize);
memcpy(payload, spsPayload, spsSize);
memcpy(payload, nal.p_payload + spsSize, nal.i_payload);
sect = { nal.i_payload + spsSize, payload, nal.i_type };
encodedImages->push(sect);
} else if (nal.i_type == NAL_SEI || nal.i_type == NAL_FILLER) {
//these need some bytes at the start removed
BYTE *skip = nal.p_payload;
while (*(skip++) != 0x1);
int skipBytes = (int)(skip - nal.p_payload);
int newPayloadSize = (nal.i_payload - skipBytes);
uint8_t * payload = (uint8_t*)malloc(newPayloadSize);
memcpy(payload, nal.p_payload + skipBytes, newPayloadSize);
sect = { newPayloadSize, payload, nal.i_type };
encodedImages->push(sect);
} else if (nal.i_type == NAL_SLICE_IDR || nal.i_type == NAL_SLICE) {
//these packets need an additional section at the start
BYTE *skip = nal.p_payload;
while (*(skip++) != 0x1);
int skipBytes = (int)(skip - nal.p_payload);
std::vector<BYTE> bodyData;
if (!foundFrame) {
if (nal.i_type == NAL_SLICE_IDR) { bodyData.push_back(0x17); } else { bodyData.push_back(0x27); } //add a 17 or a 27 as appropriate
bodyData.push_back(1);
bodyData.push_back(*timeOffsetAddr);
foundFrame = true;
}
//put into the payload the bodyData followed by the nal payload
uint8_t * bodyDataPayload = (uint8_t*)malloc(bodyData.size());
memcpy(bodyDataPayload, bodyData.data(), bodyData.size() * sizeof(BYTE));
int newPayloadSize = (nal.i_payload - skipBytes);
uint8_t * payload = (uint8_t*)malloc(newPayloadSize + sizeof(bodyDataPayload));
memcpy(payload, bodyDataPayload, sizeof(bodyDataPayload));
memcpy(payload + sizeof(bodyDataPayload), nal.p_payload + skipBytes, newPayloadSize);
int totalSize = newPayloadSize + sizeof(bodyDataPayload);
sect = { totalSize, payload, nal.i_type };
encodedImages->push(sect);
} else {
std::cout << "VideoEncoder: Nal type did not match expected" << std::endl;
continue;
}
}
The NAL payload data is then put into a struct, VideoSection, in a queue buffer
//used to transfer encoded data
struct videoSection {
int frameSize;
uint8_t* payload;
int type;
};
After which it is picked up by the broadcaster, a few more changes are made, and then I call rtmp_send()
videoSection sect = encodedImages->front();
encodedImages->pop();
//std::cout << "Broadcaster: Frame Size: " << sect.frameSize << std::endl;
//two methods of sending RTMP data, _sendpacket and _write. Using sendpacket for greater control
RTMPPacket * packet;
unsigned char* buf = (unsigned char*)sect.payload;
int type = buf[0]&0x1f; //I believe &0x1f sets a 32bit limit
int len = sect.frameSize;
long timeOffset = GetTickCount() - rtmp_start_time;
//assign space packet will need
packet = (RTMPPacket *)malloc(sizeof(RTMPPacket)+RTMP_MAX_HEADER_SIZE + len + 9);
memset(packet, 0, sizeof(RTMPPacket) + RTMP_MAX_HEADER_SIZE);
packet->m_body = (char *)packet + sizeof(RTMPPacket) + RTMP_MAX_HEADER_SIZE;
packet->m_nBodySize = len + 9;
//std::cout << "Broadcaster: Packet Size: " << sizeof(RTMPPacket) + RTMP_MAX_HEADER_SIZE + len + 9 << std::endl;
//std::cout << "Broadcaster: Packet Body Size: " << len + 9 << std::endl;
//set body to point to the packetbody
unsigned char *body = (unsigned char *)packet->m_body;
memset(body, 0, len + 9);
//NAL_SLICE_IDR represents keyframe
//first element determines packet type
body[0] = 0x27;//inter-frame h.264
if (sect.type == NAL_SLICE_IDR) {
body[0] = 0x17; //h.264 codec id
}
//-------------------------------------------------------------------------------
//this section taken from https://stackoverflow.com/questions/25031759/using-x264-and-librtmp-to-send-live-camera-frame-but-the-flash-cant-show
//in an effort to understand packet format. it does not resolve my previous issues formatting the data for twitch to play it
//sets body to be NAL unit
body[1] = 0x01;
body[2] = 0x00;
body[3] = 0x00;
body[4] = 0x00;
//>> is a shift right
//shift len to the right, and AND it
/*body[5] = (len >> 24) & 0xff;
body[6] = (len >> 16) & 0xff;
body[7] = (len >> 8) & 0xff;
body[8] = (len) & 0xff;*/
//end code sourced from https://stackoverflow.com/questions/25031759/using-x264-and-librtmp-to-send-live-camera-frame-but-the-flash-cant-show
//-------------------------------------------------------------------------------
//copy from buffer into rest of body
memcpy(&body[9], buf, len);
//DEBUG
//save individual packet body to a file with name rtmp[packetnum]
//determine why some packets do not have 0x27 or 0x17 at the start
//still happening, makes no sense given the above code
/*std::string fileLocation = "rtmp" + std::to_string(packCount++);
std::cout << fileLocation << std::endl;
const char * charConversion = fileLocation.c_str();
FILE* saveFile = NULL;
saveFile = fopen(charConversion, "w+b");//open as write and binary
if (!fwrite(body, len + 9, 1, saveFile)) {
std::cout << "VideoEncoder: Error while trying to write to file" << std::endl;
}
fclose(saveFile);*/
//END DEBUG
//other packet details
packet->m_hasAbsTimestamp = 0;
packet->m_packetType = RTMP_PACKET_TYPE_VIDEO;
if (rtmp != NULL) {
packet->m_nInfoField2 = rtmp->m_stream_id;
}
packet->m_nChannel = 0x04;
packet->m_headerType = RTMP_PACKET_SIZE_LARGE;
packet->m_nTimeStamp = timeOffset;
//send the packet
if (rtmp != NULL) {
RTMP_SendPacket(rtmp, packet, TRUE);
}
I can see that Twitch is receiving the data in the inspector, at a steady 3kbps. so I'm sure something is wrong with how I'm adjusting the data before sending it. Can anyone advise me on what I'm doing wrong here?
The problems start before the code you included even. When you configure x264 be sure to set:
b_aud = 0;
b_repeat_headers = 0;
b_annexb = 0;
This will tell x264 to generate the format needed by rtmp, Then you can skip all the per-nal preprocessing.
For sps/pps use x264_encoder_headers to retrieve them after x264_encoder_open. Encode them into an "extradata" buffer as documented here Possible Locations for Sequence/Picture Parameter Set(s) for H.264 Stream. This extradata goes into an rtmp "sequence header" packet before any frames are sent. Set the frame the AVCPacketType accordingly body[1] in your case, 0 for sequence header 1 for everything else,
body[0] = 0x27;
body[1] = 0;
body[2] = 0;
body[3] = 0;
body[4] = 0;
memcpy(&body[5], extradata, extradata_size);
body[2] through body[4] MUST be set to the frame cts (pts - dts) if you have b frames. If you want to set it to zero, configure x264 for baseline profile, but this will result in reduced image quality. Use the return code from x264_encoder_encode as the frame size, and write the whole frame in one go.
int frame_size = x264_encoder_encode(encoder, &nals, &num_nals, &pic_in, &pic_out);
if(frame_size) {
int cts = pic_out->i_pts - pic_out->i_dts;
body[0] = pic_out->b_keyframe ? 0x27 : 0x17;
body[1] = 1;
body[2] = cts>>16;
body[3] = cts>>8;
body[4] = cts;
memcpy(&body[5], nals->p_payload, frame_size);
}
Finally, Twitch requires you also send an AAC audio stream. and be sure to set the keyframe interval to 2 seconds.
I've created very simple mp3 player in C++ using mpg123 and out123, by following this tutorial: https://www.kasrajamshidi.com/blog/audio. However, I had an issue when the player was starting: for first few seconds it was playing garbage noise. This sound wasn't the same every single time. Sometimes it took longer, sometimes it was almost inaudible.
I thought, it's because I write to the buffer and read from it almost in the same time, so I've modified the code and used circular buffer, where I first read some data and write it to the buffer, then while playing it I write bytes with some offset. Unfortunately the problem remains the same and I'm confused.
Here is my code:
#include <iostream>
#include <mpg123.h>
#include <out123.h>
// If i print messages music starts much smoother...
// (But the problem itself does not disappear).
#define PRINT_BUFFER_STATE false
// Size of the circular buffer.
const int BUFFER_SIZE = 4;
int main(int argc, char **argv)
{
// Initialize mpg123
if(mpg123_init() != MPG123_OK)
return 255;
mpg123_handle *mpg_handle = mpg123_new(nullptr, nullptr);
out123_handle *out_handle = out123_new();
// This buffer is circular. The inxed of point we write is far before
// the place we play from. bytes_written[i] tells us how many bytes
// were written to buffer[i]. This is the number of bytes we need to output,
// when we play from it.
unsigned char **buffer =
(unsigned char **)alloca(sizeof(unsigned char *) * BUFFER_SIZE);
std::size_t *bytes_written =
(std::size_t *)alloca(sizeof(std::size_t) * BUFFER_SIZE);
// These indexes tell us where we do we write bytes,
// and from where do we play them.
int play_buffer = 0;
int write_buffer = 0;
int number_of_clips = 2;
char **clip_path = (char **)alloca(sizeof(char *) * number_of_clips);
clip_path[0] = "/home/mateusz/Music/guitar.mp3";
clip_path[1] = "/home/mateusz/Music/drums.mp3";
for (int clip = 0; clip < number_of_clips; ++clip)
{
// Open a given file with mpg123 (to get format,
// which will then be passed to out device).
mpg123_open(mpg_handle, clip_path[clip]);
// Set format details:
int channels = 0;
int encoding = 0;
long rate = 0;
mpg123_getformat(mpg_handle, &rate, &channels, &encoding);
// Allocate the contets of the circulat buffer.
std::size_t buffer_size = mpg123_outblock(mpg_handle);
for (int i = 0; i < BUFFER_SIZE; ++i)
buffer[i] = (unsigned char *)malloc(sizeof(unsigned char) * buffer_size);
// Start by filling part of the bufer so that the index we write
// is ahead.
for (write_buffer = 0;
write_buffer < BUFFER_SIZE-1;
++write_buffer)
{
mpg123_read(mpg_handle, buffer[write_buffer], buffer_size,
&bytes_written[write_buffer]);
// If there is nothing to read we break the loop.
if (!bytes_written[write_buffer])
break;
}
// Set out handle to the device we'll play from with default parameters.
out123_open(out_handle, nullptr, nullptr);
out123_start(out_handle, rate, channels, encoding);
play_buffer = 0;
// play_buffer should never catch write_buffer unless
// the second one finished its job.
while (write_buffer != play_buffer)
{
#if PRINT_BUFFER_STATE
std::cout << "W: " << write_buffer << " R: " << play_buffer << "\n";
#endif
out123_play(out_handle, buffer[play_buffer],
bytes_written[play_buffer]);
mpg123_read(mpg_handle, buffer[write_buffer], buffer_size,
&bytes_written[write_buffer]);
play_buffer = (play_buffer + 1) % BUFFER_SIZE;
if (bytes_written[write_buffer])
write_buffer = (write_buffer + 1) % BUFFER_SIZE;
}
for (int i = 0; i < BUFFER_SIZE; ++i)
free(buffer[i]);
out123_stop(out_handle);
out123_close(out_handle);
mpg123_close(mpg_handle);
}
out123_del(out_handle);
mpg123_delete(mpg_handle);
mpg123_exit();
return 0;
}
Changing BUFFER_SIZE to really big number doesn't help so the problem is not there. Surprisingly enough when I print some stuff to console it seems to work much smoother. But the problem does not disappear.
My guess is that something is not synchronized, but thats really all I can tell... Should my program sleep in the loop after playing each chunk of sound? Well, I tried to put a sleep command almost everywhere, but it didn't achieve much. There must be something I'm doing wrong but I can't figgure it out. So my question is: how do I prevent my player from playing this terrible sound every time it starts new file?
I understand that reading files by GPU is inefficient task as it's faced by the slowest part of the system, that is, IO. However, I came up with another approach by using the CPU for files reading and let the processing burden be handled by the GPU. I wrote the following code in C++ but I'm stuck at the integration point, that is, how to make GPU handle these files after they've been read by the CPU. In other words, what is the set off point of C++-amp to be added and integrated with the code? or should I rewrite the whole code from the scratch?
{/* this code to read multiple .dat files from the directory that contains the implementation (from my account of stackoverflow) */
#include <Windows.h>
#include <ctime>
#include <stdint.h>
#include <iostream>
using std::cout;
using std::endl;
#include <fstream>
using std::ifstream;
#include <cstring>
/* Returns the amount of milliseconds elapsed since the UNIX epoch. Works on both
* windows and linux. */
uint64_t GetTimeMs64()
{
FILETIME ft;
LARGE_INTEGER li;
/* Get the amount of 100 nano seconds intervals elapsed since January 1, 1601 (UTC) and copy it
* to a LARGE_INTEGER structure. */
GetSystemTimeAsFileTime(&ft);
li.LowPart = ft.dwLowDateTime;
li.HighPart = ft.dwHighDateTime;
uint64_t ret;
ret = li.QuadPart;
ret -= 116444736000000000LL; /* Convert from file time to UNIX epoch time. */
ret /= 10000; /* From 100 nano seconds (10^-7) to 1 millisecond (10^-3) intervals */
return ret;
}
const int MAX_CHARS_PER_LINE = 512;
const int MAX_TOKENS_PER_LINE = 20;
const char* const DELIMITER = "|";
int main()
{
// create a file-reading object
uint64_t a = GetTimeMs64();
cout << a << endl;
HANDLE h;
WIN32_FIND_DATA find_data;
h = FindFirstFile( "*.dat", & find_data );
if( h == INVALID_HANDLE_VALUE ) {
cout<<"error"<<endl;
}
do {
char * s = find_data.cFileName;
ifstream fin;
fin.open(s); // open a file
if (!fin.good())
return 1; // exit if file not found
// read each line of the file
while (!fin.eof())
{
// read an entire line into memory
char buf[MAX_CHARS_PER_LINE];
fin.getline(buf, MAX_CHARS_PER_LINE);
// parse the line into blank-delimited tokens
int n = 0; // a for-loop index
// array to store memory addresses of the tokens in buf
const char* token[MAX_TOKENS_PER_LINE] = {}; // initialize to 0
// parse the line
token[0] = strtok(buf, DELIMITER); // first token
if (token[0]) // zero if line is blank
{
for (n = 1; n < MAX_TOKENS_PER_LINE; n++)
{
token[n] = strtok(0, DELIMITER); // subsequent tokens
if (!token[n]) break; // no more tokens
}
}
// process (print) the tokens
for (int i = 0; i < n; i++) // n = #of tokens
cout << "Token[" << i << "] = " << token[i] << endl;
cout << endl;
}
// Your code here
} while( FindNextFile( h, & find_data ) );
FindClose( h );
uint64_t b = GetTimeMs64();
cout << a << endl;
cout << b << endl;
uint64_t c = b - a;
cout << c << endl;
system("pause");
}
There is no way to handle the files for GPU. As you assumed CPU handles IO.
So you need to store your read information in memory, send it to the GPU, compute there and etc.
One of the good ways to work with files is to archive (with GPU) your information.
So you read file with CPU, extract > compute > archive with GPU, and store it with CPU.
UPD.
(CPU IO READ from file (should be already archived information)) to -> main memory
(CPU SEND) to -> GPU global memory from main memory
(GPU EXTRACT (if archived))
(GPU COMPUTE (your work here))
(GPU ARCHIVE)
(CPU RETRIEVE) to -> main memory from GPU global memory
(CPU IO WRITE to file)