How to use espeak_EVENT_TYPE; - c++

I am looking a way to know when espeak ended it's speaking. Somebody told use the espeakEVENT_MSG_TERMINATED. But when I try to put this part to my code it gives me this error:
#include <espeak/speak_lib.h>
espeak_EVENT_TYPE;
if( espeak_EVENT_TYPE == espeakEVENT_MSG_TERMINATED)
{
do something;
}
application.cpp:31:1: error: declaration does not declare anything
[-fpermissive] espeak_EVENT_TYPE; ^ application.cpp: In function
‘void speech(char*)’: application.cpp:116:27: error: expected
primary-expression before ‘==’ token
if( espeak_EVENT_TYPE == espeakEVENT_MSG_TERMINATED)
EDIT: I use this simple code to use espeak:
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
char text[30] = {"this is an english text"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
int main(int argc, char* argv[] )
{
output = AUDIO_OUTPUT_PLAYBACK;
espeak_Initialize(output, Buflength, path, AUDIO_OUTPUT_SYNCHRONOUS ); //Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en";
espeak_VOICE voice = {0};
// memset(&voice, 0, sizeof(espeak_VOICE));
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
espeak_SetVoiceByProperties(&voice);
Size = strlen(text)+1;
espeak_Synth( text, Size, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
return 0;
}
Edit2:
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>
#include <espeak/speak_lib.h>
#include <string>
#include <iostream>
using namespace std;
#include "pocketsphinx.h"
static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;
ad_rec_t *ad;
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
char Voice[] = {"English"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
static void initFuncs()
{
output = AUDIO_OUTPUT_PLAYBACK;
espeak_Initialize(output, Buflength, path, AUDIO_OUTPUT_SYNCHRONOUS );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en";
espeak_VOICE voice;
memset(&voice, 0, sizeof(espeak_VOICE));
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
espeak_SetVoiceByProperties(&voice);
}
int receive_espeak_events(short *wav, int numsamples, espeak_EVENT *event)
{
while (event->type != espeakEVENT_LIST_TERMINATED) {
if (event->type == espeakEVENT_MSG_TERMINATED) {
/* do something */
ad_start_rec(ad);
}
++event; // Examine the next event.
}
return 0; // Continue speaking.
}
static void sleep_msec(int32 ms)
{
struct timeval tmo;
tmo.tv_sec = 0;
tmo.tv_usec = ms * 1000;
select(0, NULL, NULL, NULL, &tmo);
}
static void speech(char* hyp)
{
Size = strlen(hyp)+1;
espeak_SetSynthCallback(receive_espeak_events);
espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
}
static void recognize_from_microphone()
{
ad_rec_t *ad;
int16 adbuf[2048];
uint8 utt_started, in_speech;
int32 k;
char *hyp=0;
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
E_FATAL("Failed to open audio device\n");
if (ad_start_rec(ad) < 0)
E_FATAL("Failed to start recording\n");
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
for (;;)
{
if ((k = ad_read(ad, adbuf, 2048)) < 0)
E_FATAL("Failed to read audio\n");
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started)
{
utt_started = TRUE;
E_INFO("Listening...\n");
}
if (!in_speech && utt_started)
{
ps_end_utt(ps);
hyp = (char*)ps_get_hyp(ps, NULL );
if (hyp != NULL)
{
///////////////////I am passing hyp to espeak heere ////////////////////
ad_stop_rec(ad);
speech(hyp);
printf("%s\n",hyp);
fflush(stdout);
// sleep_msec(3000);
}
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
}
}//for
ad_close(ad);
}
int main(int argc, char *argv[])
{
initFuncs();
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", "/home/m/myrobot3/robot/model_parameters/robot.cd_cont_1000",
"-lm","/home/m/myrobot3/robot/etc/robot.lm.bin",
"-dict", "/home/m/myrobot3/robot/etc/robot.dic",
NULL);
ps = ps_init(config);
recognize_from_microphone();
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
ERROR:
FATAL: "application.cpp", line 163: Failed to read audio

espeak_EVENT_TYPE;
This line is not sensible to the compiler. espeak_EVENT_TYPE is a data type. It is not a variable that can be compared to a value like espeakEVENT_MSG_TERMINATED. In order to declare a variable, the syntax would be:
espeak_EVENT_TYPE an_event_type {};
if (an_event_type == espeakEVENT_MSG_TERMINATED) {
/* ... */
However, if we do this, then the variable an_event_type that we just created will not actually contain any information about any real espeak event. And it certainly won't say anything about whether any actual message is terminated or not.
Receiving real event information from espeak
In order to get information about whether a message is terminated, the program needs to obtain a variable of type espeak_EVENT_TYPE from the espeak library.
Looking through this header, espeak_EVENT_TYPE is used as part of the espeak_EVENT struct. To receive espeak_EVENT notifications, it is necessary to write a function which will be called by the espeak library. (This is known as a "callback" function). Then the callback function is registered with the library by calling espeak_SetSynthCallback.
From the same header, the prototype of the callback function must be as follows:
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
wav: is the speech sound data which has been produced. NULL
indicates that the synthesis has been completed.
numsamples: is the number of entries in wav. This number may vary,
may be less than the value implied by the buflength parameter given in > espeak_Initialize, and may sometimes be zero (which does NOT indicate
end of synthesis).
events: an array of espeak_EVENT items which indicate word and
sentence events, and also the occurance if and elements
within the text. The list of events is terminated by an event of type > = 0.
Callback returns: 0=continue synthesis, 1=abort synthesis.
Putting this together, we need a function that loops through the events variable as if it is an array, until it encounters an event of type 0. Then the function needs to return 0 to continue speech activities.
int receive_espeak_events(short *wav, int numsamples, espeak_EVENT *event)
{
while (event->type != espeakEVENT_LIST_TERMINATED) {
if (event->type == espeakEVENT_MSG_TERMINATED) {
/* do something */
}
++event; // Examine the next event.
}
return 0; // Continue speaking.
}
To tell espeak to call this function, pass the function to espeak_SetSynthCallback before starting any synthesis operations.
espeak_SetSynthCallback(receive_espeak_events);

Related

How to find if espeak ended the speech?

I want to use espeak in my program. I'd like to know when espeak stops speaking. Are there any flags or functions to check?
Let's consider this is my program:
Line 1
espeak
Line 2
When I execute this code, espeak starts to say "hello, this is espeak" but before it ends, Line 2 of code is executed, and I don't like this. I am looking for a way to pause the program until espeak ends the speaking!
EDIT:
This is my complete code, I use pocketsphinx to recognize what the user say, then save it inside char* hyp and pass it through espeak by speech function.
static ps_decoder_t *ps;
static cmd_ln_t *config;
static FILE *rawfd;
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
char Voice[] = {"English"};
char text2[30] = {"this is a english test"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
//char* text;
static void initFuncs()
{
output = AUDIO_OUTPUT_PLAYBACK;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en";
espeak_VOICE voice;
memset(&voice, 0, sizeof(espeak_VOICE));
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
espeak_SetVoiceByProperties(&voice);
}
static void sleep_msec(int32 ms)
{
struct timeval tmo;
tmo.tv_sec = 0;
tmo.tv_usec = ms * 1000;
select(0, NULL, NULL, NULL, &tmo);
}
static void speech(char* hyp)
{
Size = strlen(hyp)+1;
espeak_Synth( hyp, Size, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
}
static void recognize_from_microphone()
{
ad_rec_t *ad;
int16 adbuf[2048];
uint8 utt_started, in_speech;
int32 k;
char *hyp;
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),(int) cmd_ln_float32_r(config,"-samprate"))) == NULL)
E_FATAL("Failed to open audio device\n");
if (ad_start_rec(ad) < 0)
E_FATAL("Failed to start recording\n");
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
for (;;) {
ad_start_rec(ad);
if ((k = ad_read(ad, adbuf, 2048)) < 0)
E_FATAL("Failed to read audio\n");
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
E_INFO("Listening...\n");
}
if (!in_speech && utt_started) {
ps_end_utt(ps);
hyp = (char*)ps_get_hyp(ps, NULL );
if (hyp != NULL) {
ad_stop_rec(ad);
speech(hyp);
printf("%s\n", hyp);
fflush(stdout);
}
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
}
}//for loop
ad_close(ad);
}
int main(int argc, char *argv[])
{
initFuncs();
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", MODELDIR "/en-us/en-us",
"-lm", MODELDIR "/en-us/en-us.lm.bin",
"-dict", MODELDIR "/en-us/cmudict-en-us.dict",
NULL);
ps = ps_init(config);
recognize_from_microphone();
ps_free(ps);
cmd_ln_free_r(config);
return 0;
}
I adapted the espeak part of your code. In this code espeak is finished before Line 2 begins. Also the callback functionality is implemented. You are setting a voice by name and a voice by property. Maybe this is a problem. You are working with c-style strings and not with std::string. Maybe you are calculating the wrong string length. I don't know where the problem in your code is but the following code has fixed it:
#include <string>
#include <iostream>
#include <espeak/speak_lib.h>
espeak_POSITION_TYPE position_type(POS_CHARACTER);
espeak_AUDIO_OUTPUT output(AUDIO_OUTPUT_PLAYBACK);
void* user_data;
std::string voice("English");
std::string text("this is a english test");
unsigned int Size(0);
unsigned int position(0);
unsigned int end_position(0);
unsigned int flags(espeakCHARS_AUTO);
unsigned int* unique_identifier;
static void initFuncs() {
espeak_Initialize(output, 0, 0, 0);
espeak_SetVoiceByName(voice.c_str());
}
int SynthCallback(short *wav, int numsamples, espeak_EVENT *events) {
std::cout << "Callback: ";
for (unsigned int i(0); events[i].type != espeakEVENT_LIST_TERMINATED; i++) {
if (i != 0) {
std::cout << ", ";
}
switch (events[i].type) {
case espeakEVENT_LIST_TERMINATED:
std::cout << "espeakEVENT_LIST_TERMINATED";
break;
case espeakEVENT_WORD:
std::cout << "espeakEVENT_WORD";
break;
case espeakEVENT_SENTENCE:
std::cout << "espeakEVENT_SENTENCE";
break;
case espeakEVENT_MARK:
std::cout << "espeakEVENT_MARK";
break;
case espeakEVENT_PLAY:
std::cout << "espeakEVENT_PLAY";
break;
case espeakEVENT_END:
std::cout << "espeakEVENT_END";
break;
case espeakEVENT_MSG_TERMINATED:
std::cout << "espeakEVENT_MSG_TERMINATED";
break;
case espeakEVENT_PHONEME:
std::cout << "espeakEVENT_PHONEME";
break;
case espeakEVENT_SAMPLERATE:
std::cout << "espeakEVENT_SAMPLERATE";
break;
default:
break;
}
}
std::cout << std::endl;
return 0;
}
static void speech(std::string hyp) {
Size = hyp.length();
espeak_SetSynthCallback(SynthCallback);
espeak_Synth(hyp.c_str(), Size, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
}
int main() {
initFuncs();
std::cout << "Start" << std::endl;
speech(text.c_str());
std::cout << "End" << std::endl;
return 0;
}
The out put is
Start
Callback: espeakEVENT_SENTENCE
Callback: espeakEVENT_WORD
Callback: espeakEVENT_WORD
Callback: espeakEVENT_WORD
Callback: espeakEVENT_WORD
Callback: espeakEVENT_WORD
Callback: espeakEVENT_END
Callback: espeakEVENT_MSG_TERMINATED
End
The timing of the console outout fits to the audio output. When you are working with C++, then you should use its tools and features like strings, cout instead of printf and smart pointers to avoid problems like this.

How can I pause my program inside a for(;;) loop?

I have a for loop inside my program that there is a call to espeak engin inside it:
The text is given by the other app and is different in every cycle of loop, and I am trying to get this text, pass it to espeak, it says the text, and the rest of the code will running(printing the given text for example).
But the problem is that, when espeak is talking, the rest of code doesn't wait till it ends the speaking, and I am looking for a way to pause the program till the espeak ends, then rest of the code will execute, but don't know how?
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 1000, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
espeak_init()
{
output = AUDIO_OUTPUT_PLAYBACK;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en";
espeak_VOICE voice = {0};
// memset(&voice, 0, sizeof(espeak_VOICE));
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
espeak_SetVoiceByProperties(&voice);
Size = strlen(text)+1;
}
int main()
{
for(;;)
{
...
espeak_Synth( text, Size, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
// REST OF THE CODE...PRINTING THE WORLD THAT ESPEAK SAID FOR EXAMPLE
printf("\n %s \n", text);
...
}
}

I am trying to pass some strings trough espeak and it reads them but I get "segmentation fault"

This is my code. I want to get 5 strings from the user and espeak reads each of them when user interred it. But I get segmentation fault(core dumped) message.
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
int test()
{
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 500, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
int i=0;
char text[1000];
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
output = AUDIO_OUTPUT_PLAYBACK;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en_US";
espeak_VOICE voice={0};
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
Size = strlen(text)+1;
for (i=0; i<5; i++)
{
scanf("%s ", &text);
printf("%s", text);
espeak_Synth( text, Size, position, position_type, end_position, flags,
unique_identifier, user_data );
espeak_Synchronize( );
fflush(stdout);
}
return 0;
}
int main(int argc, char* argv[] )
{
test();
return 0;
}
I tried some modification but none of them worked. I want the program works like this:
User input: hi
espeak says: hi
user input: one
espeak says: one
(for 5
inputs)
But when I try to interring more than 4 characters as input,it gives segmentation fault error!
The two main issues are:
you use strlen on an uninitialized array of chars;
the unique_identifier argument of espeak_Synth must be NULL or point to an unsigned int (see the source code) while now it is an unsigned pointer to random memory.
Move strlen after scanf, use NULL instead of unique_identifier and your code will suddenly work (kind of).
There are many other issues though: useless variables, uninitialized variables, no input sanitization and more. IMO a better approach would be to throw away the test function and rewrite it from scratch properly.
Addendum
This is how I'd rewrite the above code. It is still suboptimal (no input sanitization, no error checking) but IMO it is much cleaner.
#include <stdio.h>
#include <string.h>
#include <espeak/speak_lib.h>
static void say(const char *text)
{
static int initialized = 0;
if (! initialized) {
espeak_Initialize(AUDIO_OUTPUT_PLAYBACK, 0, NULL, 0);
espeak_SetVoiceByName("en");
initialized = 1;
}
espeak_Synth(text, strlen(text)+1,
0, POS_CHARACTER, 0,
espeakCHARS_UTF8, NULL, NULL);
espeak_Synchronize();
}
int main()
{
char text[1000];
int i;
for (i = 0; i < 5; ++i) {
scanf("%s", text);
say(text);
}
return 0;
}

Why it gives me "segmentation fault" error when I change this code?

I can compile this code without any errors:
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 500, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
char text[30] = {"this is a english test"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
int main(int argc, char* argv[] )
{
output = AUDIO_OUTPUT_PLAYBACK;
int I, Run = 1, L;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en"; //Default to US English
espeak_VOICE voice;
memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
// espeak_SetVoiceByProperties(&voice);
Size = strlen(text)+1;
espeak_Synth( text, Size, position, position_type, end_position, flags,
unique_identifier, user_data );
espeak_Synchronize( );
return 0;
}
But I get segmentation fault error when try to compile this code(putting all the code inside man():
#include <string.h>
#include <malloc.h>
#include <espeak/speak_lib.h>
int main(){
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
char *path=NULL;
int Buflength = 500, Options=0;
void* user_data;
t_espeak_callback *SynthCallback;
espeak_PARAMETER Parm;
char Voice[] = {"English"};
unsigned int Size,position=0, end_position=0, flags=espeakCHARS_AUTO, *unique_identifier;
output = AUDIO_OUTPUT_PLAYBACK;
int I, Run = 1, L;
espeak_Initialize(output, Buflength, path, Options );
espeak_SetVoiceByName(Voice);
const char *langNativeString = "en"; //Default to US English
espeak_VOICE voice;
memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
voice.languages = langNativeString;
voice.name = "US";
voice.variant = 2;
voice.gender = 1;
//espeak_SetVoiceByProperties(&voice);
char tx[]="hi there, my name is Eliyaas, what's your name?";
espeak_Synth( tx, strlen(tx)+1, position, position_type, end_position, flags,unique_identifier, user_data );
espeak_Synchronize( );
return 0;}
What is the difference and which line causes this error?
Is it possible to put all of this program inside main() function? How?
(Add more text to pass add more information needed to post error) (Add
more text to pass add more information needed to post error) (Add more
text to pass add more information needed to post error) (Add more text
to pass add more information needed to post error)
Global variables are zero initialized in C++, local variables are not and it is Undefined Behavior to read from them. You have plenty of them in your code, for example:
espeak_POSITION_TYPE position_type;
espeak_AUDIO_OUTPUT output;
you need to edit your code and make sure that all variables are properly initialized. For example with memset if they are POD, the same way as it is done to voice.

Espeak SAPI/dll usage on Windows?

Question: I am trying to use the espeak text-to-speech engine.
So for I got it working wounderfully on linux (code below).
Now I wanted to port this basic program to windows, too, but it's nearly impossible...
Part of the problem is that the windows dll only allows for AUDIO_OUTPUT_SYNCHRONOUS, which means it requires a callback, but I can't figure out how to play the audio from the callback... First it crashed, then I realized, I need a callback function, now I get the data in the callback function, but I don't know how to play it... as it is neither a wav file nor plays automatically as on Linux.
The sourceforge site is rather useless, because it basically says use the SAPI version, but then there is no example on how to use the sapi espeak dll...
Anyway, here's my code, can anybody help?
#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include <assert.h>
#include <ctype.h>
//#include "speak_lib.h"
#include "espeak/speak_lib.h"
// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev
// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak
char voicename[40];
int samplerate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};
//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
const char *data_path = NULL; // use default path for espeak-data
int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}
char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);
// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}
const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);
for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for
strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename
void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename
int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);
printf("Espeak version: %s\n", szVersionInfo);
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);
strcpy(voicename, "default");
// espeak --voices
strcpy(voicename, "german");
strcpy(voicename, GetLanguageVoiceName("DE"));
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}
static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal
// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;
espeak_SetVoiceByProperties(voice_spec);
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
espeak_Terminate();
printf("Espeak terminated\n");
return EXIT_SUCCESS;
}
/*
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
memset(&voice_select,0,sizeof(voice_select));
voice_select.languages = voicename;
if(espeak_SetVoiceByProperties(&voice_select) != EE_OK)
{
fprintf(stderr,"%svoice '%s'\n",err_load,voicename);
exit(2);
}
}
*/
The above code is for Linux.
The below code is about as far as I got on Vista x64 (32 bit emu):
#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif
#include <assert.h>
#include <ctype.h>
#include "speak_lib.h"
//#include "espeak/speak_lib.h"
// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev
// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak
char voicename[40];
int iSampleRate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};
//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
//const char *data_path = NULL; // use default path for espeak-data
const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\";
int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}
char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);
// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}
const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);
for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for
strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename
void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);
const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename
/* Callback from espeak. Directly speaks using AudioTrack. */
#define LOGI(x) printf("%s\n", x)
static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples);
LOGI(buf);
if (wav == NULL)
{
LOGI("Null: speech has completed");
}
if (numsamples > 0)
{
//audout->write(wav, sizeof(short) * numsamples);
sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples);
LOGI(buf);
}
return 0; // continue synthesis (1 is to abort)
}
static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples);
LOGI(buf);
if (wav == NULL)
{
LOGI("Null: speech has completed");
}
// The user data should contain the file pointer of the file to write to
//void* user_data = events->user_data;
FILE* user_data = fopen ( "myfile1.wav" , "ab" );
FILE* fp = static_cast<FILE *>(user_data);
// Write all of the samples
fwrite(wav, sizeof(short), numsamples, fp);
return 0; // continue synthesis (1 is to abort)
}
int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);
printf("Espeak version: %s\n", szVersionInfo);
iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0);
if (iSampleRate <= 0)
{
printf("Unable to initialize espeak");
return EXIT_FAILURE;
}
//samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);
//ListVoices();
strcpy(voicename, "default");
// espeak --voices
//strcpy(voicename, "german");
//strcpy(voicename, GetLanguageVoiceName("DE"));
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}
static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal
// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
//espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
//voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;
//espeak_SetVoiceByProperties(voice_spec);
//espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback);
espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback);
unsigned int unique_identifier;
espeak_ERROR err = espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL);
err = espeak_Synchronize();
/*
strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");
espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
*/
// espeak_Cancel();
espeak_Terminate();
printf("Espeak terminated\n");
system("pause");
return EXIT_SUCCESS;
}
Have you tried passing the buffer you obtain in your callback to sndplaysnd()??
Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long
Its standard winAPI is as follows:
sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY)
Alternately, if you have a wav-file that has the audio to play:
sndPlaySound(filename, SND_ASYNC)
playsound has a ASYNC mode that wouldn't block your program's execution while the audio is being played.
NOTE: I have used it in VB and the above snippets are for use in VB. If you are coding in VC++, you might have to modify them accordingly. But the basic intention remains the same; to pass the buffer to sndPlaySound with the ASYNC flag set.
Good LUCK!!
Several changes in source code are needed to make the windows library have the same functionality as the one on Linux. I listed the changes here. The ready to use binary is also available.
All the patches and the description were also sent to espeak maintainer (publicly, through the mailing list and patches tracker), so maybe in future it will be available directly.