Opus, can't encode / decode all data

Opus, can't encode / decode all data - c++

I'm working using Opus to encode and decode some Portaudio recorded data, so i'm trying to encode a record of around 10 seconds, i've wrote the registered data to a log file and i get around 480000 lines which represent the NUM_SECONDS * SAMPLE_rate, so the time * the frequency.
Problem is when i try to use opus with it, i can encode and decode, but the size of the data returned is not the same at all, it is the FRAMES_PER_BUFFER size, and the data itself does not correspond to anything from the previous data. Here is my code :
my encode and decode methods :
typedef struct {
int size;
std::vector<unsigned char> *sound;
} AudioEnc;
typedef struct {
int size;
std::vector<float> sound;
} AudioData;
#define SAMPLE_RATE (48000)
#define FRAMES_PER_BUFFER (1920)
#define NUM_SECONDS (10)
#define NUM_CHANNELS (1)
AudioEnc &Opus::Encode(AudioData &data)
{
AudioEnc enc;
enc.sound = new std::vector<unsigned char>;
const unsigned int max_size = NUM_SECONDS * SAMPLE_RATE * sizeof(float);
float sound[max_size];
enc.sound->resize(NUM_SECONDS * SAMPLE_RATE);
if (data.size == 0)
enc.sound->resize(opus_encode_float(_encode, sound, FRAMES_PER_BUFFER, enc.sound->data(), max_size));
else
enc.sound->resize(opus_encode_float(_encode, data.sound.data(), FRAMES_PER_BUFFER, enc.sound->data(), max_size));
return (enc);
}
AudioData Opus::Decode(AudioEnc &data)
{
AudioData dec;
dec.sound.resize(NUM_SECONDS * SAMPLE_RATE);
dec.sound.resize(opus_decode_float(_decode, data.sound->data(), data.sound->size(), dec.sound.data(), FRAMES_PER_BUFFER, 0));
return (dec);
}
Record data is to big for pastebin, do you guys have any idea on what i'm missing ?

Related

nvJPEG: encode packed BGR

Well, my goal is simple -- trying to create a JPEG encoded image from buffer with packed/interleaved BGR data (could be RGB as well).
The NVidia docs contain an example, the proper image input is essentially described here.
So I tried the following:
#include <nvjpeg.h>
// very simple
typedef struct {
int width;
int height;
unsigned char *buffer;
unsigned long data_size;
} my_bitmap_type;
std::vector<unsigned char> BitmapToJpegCUDA(const my_bitmap_type *image)
{
nvjpegHandle_t nv_handle;
nvjpegEncoderState_t nv_enc_state;
nvjpegEncoderParams_t nv_enc_params;
cudaStream_t stream = NULL;
nvjpegStatus_t er;
nvjpegCreateSimple(&nv_handle);
nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
nvjpegImage_t nv_image;
nv_image.channel[0] = image->buffer;
nv_image.pitch[0] = 3 * image->width;
// Nope, that's for planar images!
// nv_image.channel[0] = image->buffer;
// nv_image.channel[1] = image->buffer + image->width * image->height;
// nv_image.channel[2] = image->buffer + 2 * image->width * image->height;
// nv_image.pitch[0] = image->width;
// nv_image.pitch[1] = image->width;
// nv_image.pitch[2] = image->width;
er = nvjpegEncodeImage(nv_handle, nv_enc_state, nv_enc_params, &nv_image,
NVJPEG_INPUT_BGRI, image->width, image->height, stream);
LOG(ERROR) << "enc " << er;
size_t length = 0;
nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state, NULL, &length, stream);
cudaStreamSynchronize(stream);
std::vector<unsigned char> jpeg(length);
nvjpegEncodeRetrieveBitstream(nv_handle, nv_enc_state, jpeg.data(), &length, 0);
nvjpegEncoderParamsDestroy(nv_enc_params);
nvjpegEncoderStateDestroy(nv_enc_state);
nvjpegDestroy(nv_handle);
return jpeg;
}
The logger says that nvjpegEncodeImage just returns NVJPEG_STATUS_INVALID_PARAMETER, meaning nothing works. In case you suspect my_bitmap_type to be filled wrong, here's the similar turbojpeg-powered encoding:
#include <turbojpeg.h>
std::vector<unsigned char> BitmapToJpegBuffer(const my_bitmap_type *image)
{
std::vector<unsigned char> out_data(3 * image->width * image->height);
cudaError_t err = cudaMemcpy(out_data.data(), image->buffer, image->data_size, cudaMemcpyDeviceToHost);
if (cudaSuccess != err) {
LOG(ERROR) << "failed to copy CUDA memory: " << err;
}
tjhandle jpeg = tjInitCompress();
unsigned char *encoded_buf = nullptr;
long unsigned int encoded_sz = 0;
int tjres = tjCompress2(jpeg,
out_data.data(),
image->width,
image->width * 3,
image->height,
TJPF_BGR,
&encoded_buf,
&encoded_sz,
TJSAMP_444,
95,
TJFLAG_FASTDCT);
if (tjres != 0) {
LOG(ERROR) << "jpeg compession failed!";
return {};
}
std::vector<unsigned char> result(encoded_buf, encoded_buf + encoded_sz);
tjFree(encoded_buf);
tjDestroy(jpeg);
return result;
}
... aaand it works pretty fine.
I'm desperate trying to figure out, what's missing in the code. Would gratefully appreciate any help or advice.
UPD:
Using CentOS 7 / libnvjpeg-11-1.x86_64 (CUDA 11.1) / gcc 4.8.5

Okaaay, that's strange a bit, but after some time spent on trial and error it occured that NVidia docs lack an essential detail:
nvjpegCreateSimple(&nv_handle);
nvjpegEncoderStateCreate(nv_handle, &nv_enc_state, stream);
nvjpegEncoderParamsCreate(nv_handle, &nv_enc_params, stream);
// This has to be done, default params are not sufficient
nvjpegEncoderParamsSetSamplingFactors(nv_enc_params, NVJPEG_CSS_444, stream);
Although the docs clearly state that the default subsampling for JPEG compression is 4:4:4, encoding's not working with the default encoder params, subsampling has to be explicitly set.
So, that one line of code fixes everything.

Converting a short[] from SoundTouch audio library for playback

I'm attempting to use the SoundTouch C++ library for audio speed and pitch changes in an Android app. I have successfully pushed a Java byte[] array (from a .wav) through JNI, returned it, and played it back with an AudioTrack.
The next step is attempting to push a sample byte[] through the SoundTouch pipeline. I have dissected the source of the SoundStretch console program included with the library and have attempted to adapt it. I am using a stereo, 16-bit source for testing purposes.
With my current temporary setup I am ignoring the RIFF header and converting it along with the .wav data because the Java AudioTrack object does not need to read the header, it just plays raw PCM. Playing the raw byte[] without sending through SoundTouch just results in a small click where the header is.
After sending through the SoundTouch pipeline, I am playing back white noise where the beginning of the audio is supposed to be. I assume I am having a problem at the end of my write() function, where I am casting short's to signed chars. Here, the console app is writing to a file, instead of pushing to a vector:
int res = (int)fwrite(temp, 1, numBytes, fptr);
I have read the documentation for fwrite but I don't know enough about bit twiddling or audio processing to know what to do here to correctly get this information in a char[] instead of writing to a file. I know I am loosing information with the cast, but I am unsure of how to correct it.
In case anyone is extra motivated, the SoundStretch source can be found here: http://www.surina.net/soundtouch/sourcecode.html
extern "C" DLL_PUBLIC jbyteArray
Java_net_surina_soundtouch_SoundTouch_getMutatedBytes
(JNIEnv *env, jobject thiz, jbyteArray input, jint length)
{
const int BUFF_SIZE = 2048000;
SoundTouch soundTouch;
jboolean isCopy;
jbyte* ar = env->GetByteArrayElements(input, &isCopy);
signed char* cBufferIn = (signed char*)ar;
SAMPLETYPE* fBufferIn = new SAMPLETYPE[length];
vector<signed char> fBufferOut;
//converts the chars to floats per the SoundTouch console app.
convertInput16(cBufferIn, fBufferIn, length);
//channels, sampling rate, speed, pitch change
setup(&soundTouch, 2, 44100, 1.0, 0);
//transform floats from fBufferIn to fBufferout
process(&soundTouch, fBufferIn, fBufferOut, BUFF_SIZE);
signed char* res = &fBufferOut[0];
jbyteArray result = env->NewByteArray(length);
env->SetByteArrayRegion(result, 0, fBufferOut.size(), res);
LOGV("fBufferOut Size: %d", fBufferOut.size());
delete[] fBufferIn;
return result;
}
process():
static void process(SoundTouch* soundTouch, SAMPLETYPE* fBufferIn, vector<signed char>& fBufferOut, int BUFF_SIZE)
{
int nSamples = BUFF_SIZE / 2; //2 bytes per sample, using 16 bit sample for testing
int buffSizeSamples = BUFF_SIZE / 2; //2 channel stereo
soundTouch->putSamples(fBufferIn, nSamples);
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2); //2 channels
} while (nSamples != 0);
soundTouch->flush();
do
{
nSamples = soundTouch->receiveSamples(fBufferIn, buffSizeSamples);
write(fBufferIn, fBufferOut, nSamples / 2);
LOGV("NUMBER OF SAMPLES: %d", nSamples);
} while (nSamples != 0);
}
write():
static void write(const float *bufferIn, vector<signed char>& bufferOut, int numElems)
{
int numBytes;
int bytesPerSample;
if (numElems == 0) return;
bytesPerSample = 16 / 8; //16 bit test sample / bits in a byte
numBytes = numElems * bytesPerSample;
short *temp = (short*)getConvBuffer(numBytes);
switch (bytesPerSample)
{
case 2: //16 bit encoding per the SoundStretch console app
{
short *temp2 = (short *)temp;
for (int i = 0; i < numElems; i++)
{
short value = (short)saturate(bufferIn[i] * 32768.0f, -32768.0f, 32767.0f); //magic to me
temp2[i] = value; //works for little endian only.
}
break;
}
default:
assert(false);
}
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back((signed char)temp[i]); //I think my problem is here.
}
delete[] temp;
//bytesWritten += numBytes;
}

I just needed to get all the bits in char[]:
for (int i = 0; i < numElems; ++i)
{
bufferOut.push_back(temp[i] & 0xff);
bufferOut.push_back((temp[i] >> 8) & 0xff);
}

Convert raw PCM to FLAC?

EDIT: I've updated the code below to resemble the progress I have made. I'm trying to write the .wav header myself. The code does not work properly as of now, the audio is not being written to the file properly. The code does not contain any attempts to convert it to a .flac file yet.
I am using a Raspberry Pi (Debian Linux) to record audio with the ALSA library. The recording works fine, but I need to encode the input audio into the FLAC codec.
This is where I get lost. I have spent a considerable amount of time trying to figure out how to convert this raw data into FLAC, but I keep coming up with examples of how to convert .wav files into .flac files.
Here is the current (updated) code I have for recording audio with ALSA (it may be a bit rough, I'm still picking up C++):
// Use the newer ALSA API
#define ALSA_PCM_NEW_HW_PARAMS_API
#include <alsa/asoundlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Riff
{
char chunkId[4]; // "RIFF" (assuming char is 8 bits)
int chunkSize; // (assuming int is 32 bits)
char format[4]; // "WAVE"
};
struct Format
{
char chunkId[4]; // "fmt "
int chunkSize;
short format; // assuming short is 16 bits
short numChannels;
int sampleRate;
int byteRate;
short align;
short bitsPerSample;
};
struct Data
{
char chunkId[4]; // "data"
int chunkSize; // length of data
char* data;
};
struct Wave // Actual structure of a PCM WAVE file
{
Riff riffHeader;
Format formatHeader;
Data dataHeader;
};
int main(int argc, char *argv[])
{
void saveWaveFile(struct Wave *waveFile);
long loops;
int rc;
int size;
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
unsigned int sampleRate = 44100;
int dir;
snd_pcm_uframes_t frames;
char *buffer;
char *device = (char*) "plughw:1,0";
//char *device = (char*) "default";
printf("Capture device is %s\n", device);
/* Open PCM device for recording (capture). */
rc = snd_pcm_open(&handle, device, SND_PCM_STREAM_CAPTURE, 0);
if (rc < 0)
{
fprintf(stderr, "Unable to open PCM device: %s\n", snd_strerror(rc));
exit(1);
}
/* Allocate a hardware parameters object. */
snd_pcm_hw_params_alloca(&params);
/* Fill it in with default values. */
snd_pcm_hw_params_any(handle, params);
/* Set the desired hardware parameters. */
/* Interleaved mode */
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
/* Signed 16-bit little-endian format */
snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
/* Two channels (stereo) */
snd_pcm_hw_params_set_channels(handle, params, 2);
/* 44100 bits/second sampling rate (CD quality) */
snd_pcm_hw_params_set_rate_near(handle, params, &sampleRate, &dir);
/* Set period size to 32 frames. */
frames = 32;
snd_pcm_hw_params_set_period_size_near(handle, params, &frames, &dir);
/* Write the parameters to the driver */
rc = snd_pcm_hw_params(handle, params);
if (rc < 0)
{
fprintf(stderr, "Unable to set HW parameters: %s\n", snd_strerror(rc));
exit(1);
}
/* Use a buffer large enough to hold one period */
snd_pcm_hw_params_get_period_size(params, &frames, &dir);
size = frames * 4; /* 2 bytes/sample, 2 channels */
buffer = (char *) malloc(size);
/* We want to loop for 5 seconds */
snd_pcm_hw_params_get_period_time(params, &sampleRate, &dir);
loops = 5000000 / sampleRate;
while (loops > 0)
{
loops--;
rc = snd_pcm_readi(handle, buffer, frames);
if (rc == -EPIPE)
{
/* EPIPE means overrun */
fprintf(stderr, "Overrun occurred.\n");
snd_pcm_prepare(handle);
} else if (rc < 0)
{
fprintf(stderr, "Error from read: %s\n", snd_strerror(rc));
} else if (rc != (int)frames)
{
fprintf(stderr, "Short read, read %d frames.\n", rc);
}
if (rc != size) fprintf(stderr, "Short write: wrote %d bytes.\n", rc);
}
Wave wave;
strcpy(wave.riffHeader.chunkId, "RIFF");
wave.riffHeader.chunkSize = 36 + size;
strcpy(wave.riffHeader.format, "WAVE");
strcpy(wave.formatHeader.chunkId, "fmt");
wave.formatHeader.chunkSize = 16;
wave.formatHeader.format = 1; // PCM, other value indicates compression
wave.formatHeader.numChannels = 2; // Stereo
wave.formatHeader.sampleRate = sampleRate;
wave.formatHeader.byteRate = sampleRate * 2 * 2;
wave.formatHeader.align = 2 * 2;
wave.formatHeader.bitsPerSample = 16;
strcpy(wave.dataHeader.chunkId, "data");
wave.dataHeader.chunkSize = size;
wave.dataHeader.data = buffer;
saveWaveFile(&wave);
snd_pcm_drain(handle);
snd_pcm_close(handle);
free(buffer);
return 0;
}
void saveWaveFile(struct Wave *waveFile)
{
FILE *file = fopen("test.wav", "wb");
size_t written;
if (file == NULL)
{
fprintf(stderr, "Cannot open file for writing.\n");
exit(1);
}
written = fwrite(waveFile, sizeof waveFile[0], 1, file);
fclose(file);
if (written < 1);
{
fprintf(stderr, "Writing to file failed, error %d.\n", written);
exit(1);
}
}
How would I go about converting the PCM data into the FLAC and save it to disk for later use? I have downloaded libflac-dev already and just need an example to go off of.
The way I am doing it right now:
./capture > test.raw // or ./capture > test.flac
The way it should be (program does everything for me):
./capture

If I understand the FLAC::Encoder::File documentation, you can do something like
#include <FLAC++/encoder.h>
FLAC::Encoder::File encoder;
encoder.init("outfile.flac");
encoder.process(buffer, samples);
encoder.finish();
where buffer is an array (of size samples) of 32-bit integer pointers.
Unfortunately, I know next to nothing about audio encoding so I can't speak for any other options. Good luck!

Please refer to the below code :
FLAC Encoder Test Code
This example is using a wav file as an input and then encodes it into FLAC.
As I understand, there is no major difference b/w WAV file and your RAW data, I think you can modify this code to directly read the "buffer" and convert it. You already have all the related information (Channel/Bitrate etc) so it should not be much of a problem to remove the WAV header reading code.

Please note: this is a modified version of the Flac Encoder sample from their git repo.
It includes some comments and hints on how to change it to OP's requirements, entire source for this will be a little bit long.
And do note that this is the C API, which tends to be a bit more complex than the C++ one. But it is fairly easy to convert between the two once you get the idea.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "share/compat.h"
#include "FLAC/metadata.h"
#include "FLAC/stream_encoder.h"
/* this call back is what tells your program the progress that the encoder has made */
static void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data);
#define READSIZE 1024
static unsigned total_samples = 0; /* can use a 32-bit number due to WAVE size limitations */
/* buffer is where we record to, in your case what ALSA writes to */
/* Note the calculation here to take the total bytes that the buffer takes */
static FLAC__byte buffer[READSIZE/*samples*/ * 2/*bytes_per_sample*/ * 2/*channels*/];
/* pcm is input to FLAC encoder */
/* the PCM data should be here, bps is 4 here...but we are allocating ints! */
static FLAC__int32 pcm[READSIZE/*samples*/ * 2/*channels*/];
int main(int argc, char *argv[])
{
FLAC__bool ok = true;
FLAC__StreamEncoder *encoder = 0;
FLAC__StreamEncoderInitStatus init_status;
FLAC__StreamMetadata *metadata[2];
FLAC__StreamMetadata_VorbisComment_Entry entry;
FILE *fin;
unsigned sample_rate = 0;
unsigned channels = 0;
unsigned bps = 0;
if((fin = fopen(argv[1], "rb")) == NULL) {
fprintf(stderr, "ERROR: opening %s for output\n", argv[1]);
return 1;
}
/* set sample rate, bps, total samples to encode here, these are dummy values */
sample_rate = 44100;
channels = 2;
bps = 16;
total_samples = 5000;
/* allocate the encoder */
if((encoder = FLAC__stream_encoder_new()) == NULL) {
fprintf(stderr, "ERROR: allocating encoder\n");
fclose(fin);
return 1;
}
ok &= FLAC__stream_encoder_set_verify(encoder, true);
ok &= FLAC__stream_encoder_set_compression_level(encoder, 5);
ok &= FLAC__stream_encoder_set_channels(encoder, channels);
ok &= FLAC__stream_encoder_set_bits_per_sample(encoder, bps);
ok &= FLAC__stream_encoder_set_sample_rate(encoder, sample_rate);
ok &= FLAC__stream_encoder_set_total_samples_estimate(encoder, total_samples);
/* sample adds meta data here I've removed it for clarity */
/* initialize encoder */
if(ok) {
/* client data is whats the progress_callback is called with, any objects you need to update on callback can be passed thru this pointer */
init_status = FLAC__stream_encoder_init_file(encoder, argv[2], progress_callback, /*client_data=*/NULL);
if(init_status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
fprintf(stderr, "ERROR: initializing encoder: %s\n", FLAC__StreamEncoderInitStatusString[init_status]);
ok = false;
}
}
/* read blocks of samples from WAVE file and feed to encoder */
if(ok) {
size_t left = (size_t)total_samples;
while(ok && left) {
/* record using ALSA and set SAMPLES_IN_BUFFER */
/* convert the packed little-endian 16-bit PCM samples from WAVE into an interleaved FLAC__int32 buffer for libFLAC */
/* why? because bps=2 means that we are dealing with short int(16 bit) samples these are usually signed if you do not explicitly say that they are unsigned */
size_t i;
for(i = 0; i < SAMPLES_IN_BUFFER*channels; i++) {
/* THIS. this isn't the only way to convert between formats, I do not condone this because at first the glance the code seems like it's processing two channels here, but it's not it's just copying 16bit data to an int array, I prefer to use proper type casting, none the less this works so... */
pcm[i] = (FLAC__int32)(((FLAC__int16)(FLAC__int8)buffer[2*i+1] << 8) | (FLAC__int16)buffer[2*i]);
}
/* feed samples to encoder */
ok = FLAC__stream_encoder_process_interleaved(encoder, pcm, SAMPLES_IN_BUFFER);
left-=SAMPLES_IN_BUFFER;
}
}
ok &= FLAC__stream_encoder_finish(encoder);
fprintf(stderr, "encoding: %s\n", ok? "succeeded" : "FAILED");
fprintf(stderr, " state: %s\n", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(encoder)]);
FLAC__stream_encoder_delete(encoder);
fclose(fin);
return 0;
}
/* the updates from FLAC's encoder system comes here */
void progress_callback(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data)
{
(void)encoder, (void)client_data;
fprintf(stderr, "wrote %" PRIu64 " bytes, %" PRIu64 "/%u samples, %u/%u frames\n", bytes_written, samples_written, total_samples, frames_written, total_frames_estimate);
}

Get audio samples from byte array

How to get data samples from QAudioInput
I found in this examples code from audioinput example code
void InputTest::readMore()
{
if(!m_audioInput)
return;
qint64 len = m_audioInput->bytesReady();
if(len > 4096)
len = 4096;
qint64 l = m_input->read(m_buffer.data(), len);
if(l > 0) {
m_audioInfo->write(m_buffer.constData(), l);
}
}
I understood that m_buffer contains audio data samples
but my audio processing library receives short samples
How I can convert this to short pointer
My audio library function like this
putSample( short *Sample, int numberOfSample)
I can get number of samples from
Q_ASSERT(m_format.sampleSize() % 8 == 0);
const int channelBytes = m_format.sampleSize() / 8;
const int sampleBytes = m_format.channels() * channelBytes;
Q_ASSERT(len % sampleBytes == 0);
const int numSamples = len / sampleBytes;

This page indicates read() is expecting a char* to store the data in. If you have set up the format of the audio device properly the data will indeed be 'segmented' as shorts in the char array and you can simply cast the char* to a short* before passing it to your library.

Video for Windows - Adding Audio Stream to AVI

I have really simple program add the add an audio stream into an avi file with a pre-existing video stream.
The issue is that the resulting file contains a video stream but there does not appear to be any data in the stream.
The audio file is read by SDKwavefile from the DirectX samples.
AVIFileInit();
PAVIFILE avi;
AVIFileOpen(&avi, argv[1], OF_WRITE, NULL);
CWaveFile wav;
wav.Open(argv[2], NULL, WAVEFILE_READ);
WAVEFORMATEX *wavFormat = wav.GetFormat();
PAVISTREAM audioStream;
AVIFileCreateStream(avi, &audioStream, &audioInfo);
AVISTREAMINFO audioInfo;
memset(&audioInfo, 0, sizeof(AVISTREAMINFO));
audioInfo.fccType = streamtypeAUDIO;
audioInfo.dwScale = wavFormat->nBlockAlign;
audioInfo.dwRate = wavFormat->nSamplesPerSec * wavFormat->nBlockAlign;
audioInfo.dwSampleSize = wavFormat->nBlockAlign;
audioInfo.dwQuality = (DWORD)-1;
AVIStreamSetFormat(audioStream, 0, wavFormat, sizeof(WAVEFORMATEX));
BYTE *data = (BYTE *)malloc(wav.GetSize());
DWORD sizeRead;
wav.Read(data, wav.GetSize(), &sizeRead);
AVIStreamWrite(audioStream, 0, (wav.GetSize() * 8) / wavFormat->wBitsPerSample, data, wav.GetSize(), 0, NULL, NULL);
AVIStreamRelease(audioStream);
free(data);
wav.Close();
AVIFileRelease(avi);
AVIFileExit();
(Also, I know I shouldn't be using VFW anymore but that decision goes way above my head. And I know I'm not checking the results of anything, that can come later.)
Thanks.

I tried to use this to add a .wav to an existing .avi (although I had a class CWaveSoundRead).
If you check the return codes, you get to AVIStreamWrite() which returns 0x80044065, which turns out to be AVIERR_UNSUPPORTED.
In hindsight, I'd say you called AVIFileCreateStream() before you filled in the AVISTREAMINFO object. Actually, now that I see it, it's hard to imagine your code compiling as-is, since audioInfo is defined AFTER AVIFileCreateStream!
Here's something I did, although it still mistakes the audio stream length:
struct FmtChunk {
char id[4]; //="fmt "
unsigned long size; //=16 or 0x28
short wFormatTag; //=WAVE_FORMAT_PCM=1
unsigned short wChannels; //=1 or 2 for mono or stereo
unsigned long dwSamplesPerSec; //=11025 or 22050 or 44100
unsigned long dwAvgBytesPerSec; //=wBlockAlign * dwSamplesPerSec
unsigned short wBlockAlign; //=wChannels * (wBitsPerSample==8?1:2)
unsigned short wBitsPerSample; //=8 or 16, for bits per sample
};
struct DataChunk {
char id[4]; //="data"
unsigned long size; //=datsize, size of the following array
unsigned char data[1]; //=the raw data goes here
};
struct WavChunk {
char id[4]; //="RIFF"
unsigned long size; //=datsize+8+16+4
char type[4]; //="WAVE"
};
bool Q_AVI_AddWav(cstring fnameVideo,cstring fnameAudio)
// Adds a .wav file to an existing .avi (with video stream)
{
IAVIStream* m_pStreamAudio=0;
HRESULT hr;
AVIFileInit();
PAVIFILE avi;
hr=AVIFileOpen(&avi, fnameVideo,OF_WRITE,NULL);
CHECK(hr,"AVIFileOpen");
WavChunk wav;
FmtChunk fmt;
DataChunk dat;
//read wav file
FILE *fr;
int pos;
fr=qfopen(fnameAudio,"rb");
// Read header
fread(&wav,1,sizeof(wav),fr);
// Read 'fmt' chunk; may be 16 or 40 in length
pos=ftell(fr);
fread(&fmt,1,sizeof(fmt),fr);
if(fmt.size==40)fseek(fr,40-16,SEEK_CUR); // Skip rest of fmt
// else it's ok
// Read data specs
fread(&dat,sizeof(dat),1,fr);
char *buf = new char[dat.size];
qdbg("Wav data %d bytes\n",dat.size);
fread(buf,1,dat.size,fr);
qfclose(fr);
// set wave format info
WAVEFORMATEX wfx;
wfx.wFormatTag=fmt.wFormatTag;
wfx.cbSize=0;
wfx.nAvgBytesPerSec=fmt.dwAvgBytesPerSec;
wfx.nBlockAlign=fmt.wBlockAlign;
wfx.nChannels=fmt.wChannels;
wfx.nSamplesPerSec=fmt.dwSamplesPerSec;
wfx.wBitsPerSample=fmt.wBitsPerSample;
// create audio stream
AVISTREAMINFO ahdr; ZeroMemory(&ahdr,sizeof(ahdr));
ahdr.fccType=streamtypeAUDIO;
ahdr.dwScale=wfx.nBlockAlign;
ahdr.dwRate=wfx.nSamplesPerSec*wfx.nBlockAlign;
ahdr.dwSampleSize=wfx.nBlockAlign;
ahdr.dwQuality=(DWORD)-1;
hr=AVIFileCreateStream(avi, &m_pStreamAudio, &ahdr);
CHECK(hr,"AVIFileCreateStream");
if(hr!=AVIERR_OK) {if (buf) QDELETE_ARRAY(buf); /*delete[] buf;*/ return false;}
hr = AVIStreamSetFormat(m_pStreamAudio,0,&wfx,sizeof(WAVEFORMATEX));
CHECK(hr,"AVIStreamSetFormat");
if(hr!=AVIERR_OK) {if (buf) QDELETE_ARRAY(buf); /*delete[] buf;*/ return false;}
//write audio stream
unsigned long numbytes = dat.size;
unsigned long numsamps = fmt.wChannels*numbytes*8 / wfx.wBitsPerSample;
hr = AVIStreamWrite(m_pStreamAudio,0,numsamps,buf,numbytes,0,0,0);
CHECK(hr,"AVIStreamWrite");
qdbg("Write numsamps %d, numbytes %d\n",numsamps,numbytes);
QDELETE_ARRAY(buf); //if(buf)delete[] buf;
// Release audio stream
AVIStreamRelease(m_pStreamAudio);
// Close AVI
hr=AVIFileRelease(avi);
CHECK(hr,"AVIFileRelease");
// Close VFW
AVIFileExit();
return hr==AVIERR_OK;
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Opus, can't encode / decode all data - c++

Related

nvJPEG: encode packed BGR

Converting a short[] from SoundTouch audio library for playback

Convert raw PCM to FLAC?

Get audio samples from byte array

Video for Windows - Adding Audio Stream to AVI

Categories

Resources