.What to use for Improving speech-to-text recognition accuracy using pocketsphinx? - pocketsphinx-android

I am totally new to using pocketsphinx, I have followed the integration of demo application as mentioned in
Android offline voice recognition using PocketSphinx
It's working fine after integration of pocketsphinx as library in my application, but the output is not as accurate as desired. It's taking words even which are not being uttered from the provided dictonary.
I want to understand, how to improve accuracy of detecting words : I had initally used a .lm file; then instead of using that, I simply created a .jsgf text file and used it, but still there was no improvement in the accuracy, So after using .jsgf file do I need to compile it or something or simply copy pasting the .jsgf text file in the assests file is enough
http://cmusphinx.sourceforge.net/wiki/tutorialandroid in this link it is given building pocketsphinx-android. I have not done this. Just integrated it as library project
The Code:
public class SphinxSpeechRecognizerActivity extends Activity implements RecognitionListener {
private static String TAG = SphinxSpeechRecognizerActivity.class.getSimpleName();
private SpeechRecognizer mRecognizer;
private HashMap<String, Integer> mCaptions;
// private static final String KWS_SEARCH = "wakeup";
// private static final String KEYPHRASE = "phone";
private static final String COMMANDS = "command";
private boolean mErrorFlag = false;
private static boolean isRecognizerInProgress = false;
#Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.fragment);
initViews();
}
#Override
public void onResume() {
super.onResume();
}
#Override
public void onPause() {
super.onPause();
}
public void onDestroy() {
super.onDestroy();
Log.d(TAG, "** onDestroy **");
stopRecgonizer(true);
}
#Override
public void onBackPressed() {
super.onBackPressed();
stopRecgonizer(true);
}
private void initViews() {
final ImageView img_close = (ImageView)findViewById(R.id.ttsClose);
final ImageView img_voice_view = (ImageView)findViewById(R.id.tts_voice_view);
final ImageView img_info = (ImageView)findViewById(R.id.ttsInfo);
img_close.setOnClickListener(mOnClickListener);
img_info.setOnClickListener(mOnClickListener);
img_voice_view.setOnClickListener(mOnClickListener);
}
// Set press indicator
private View.OnClickListener mOnClickListener = new View.OnClickListener() {
#Override
public void onClick(View v) {
switch (v.getId()){
case R.id.ttsInfo:
break;
case R.id.tts_voice_view:
if (!isRecognizerInProgress) {
isRecognizerInProgress = true;
setupRecognizerController();
} else {
Log.d(TAG, "Sphinx recognizer is already running");
}
break;
case R.id.ttsClose:
default:
// Call back event
onBackPressed();
break;
}
}
};
#Override
public void onBeginningOfSpeech() {
Log.d(TAG, "** onBeginningOfSpeech **" + mErrorFlag);
}
#Override
public void onEndOfSpeech() {
Log.d(TAG, "** onEndOfSpeech **");
mRecognizer.stop();
}
#Override
public void onPartialResult(Hypothesis hypothesis) {
Log.d(TAG, "** onPartialResult **");
if (hypothesis == null)
return;
mRecognizer.stop();
}
private void switchSearch(String languageModelSearch) {
mRecognizer.stop();
mRecognizer.startListening(languageModelSearch, 2000);
}
#Override
public void onResult(Hypothesis hypothesis) {
hideListeningBackground();
stopRecgonizer(true);
if(hypothesis != null){
final String recognizedCommand = hypothesis.getHypstr();
Log.d(TAG,"Recognized Text: = " + recognizedCommand + " Score: " + hypothesis.getBestScore());
runOnUiThread(new Runnable() {
#Override
public void run() {
if(!recognizedCommand.equals("")) {
if (recognizedCommand.equalsIgnoreCase(<given_command>)) {
Intent speech_converted_intent = new Intent(SphinxSpeechRecognizerActivity.this, Subclass.class);
startActivity(speech_converted_intent);
finish();
}
} else {
showErrorMsg(Constants.MODE_SUCCESS);
}
}
});
} else {
showErrorMsg(Constants.MODE_DEFAULT);
}
}
#Override
public void onError(Exception e) {
Log.e(TAG, "** onError **");
showErrorMsg(Constants.MODE_FAILED);
}
#Override
public void onTimeout() {
Log.i(TAG, "** onTimeout **");
mRecognizer.stop();
}
private void setupRecognizerController() {
new AsyncTask<Void, Void, Exception>() {
#Override
protected Exception doInBackground(Void... params) {
try {
Assets assets = new Assets(SphinxSpeechRecognizerActivity.this);
File assetDir = assets.syncAssets();
setupRecognizer(assetDir);
} catch (IOException e) {
return e;
}
return null;
}
#Override
protected void onPostExecute(Exception result) {
if(result == null){
Log.d(TAG, "Sphinx Recognizer: Start");
mRecognizer.startListening(COMMANDS, 3000);
}
displayListeningBackground();
}
}.execute();
}
private void setupRecognizer(File assetsDir) throws IOException {
mRecognizer = defaultSetup()
.setAcousticModel(new File(assetsDir, "en-us-ptm"))
.setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
.setKeywordThreshold(1e-10f)
.setFloat("-beam", 1e-30f)
.setBoolean("-allphone_ci", true)
.getRecognizer();
mRecognizer.addListener(this);
File languageModel = new File(assetsDir, "command.gram");
mRecognizer.addGrammarSearch(COMMANDS, languageModel);
// reset();
}
private void reset(){
mRecognizer.stop();
// mRecognizer.startListening(COMMANDS);
}
private void stopRecgonizer(boolean flag){
if(flag && mRecognizer != null){
mRecognizer.cancel();
mRecognizer.shutdown();
isRecognizerInProgress = false;
}
hideListeningBackground();
}
String mShowText = "ERROR";
private void showErrorMsg(final int error_type) {
runOnUiThread(new Runnable() {
#Override
public void run() {
switch (error_type) {
case Constants.MODE_FAILED:
// ...
break;
case Constants.MODE_SUCCESS:
//...
break;
case Constants.MODE_DEFAULT:
default:
//../
break;
}
}
});
}
}
My grammar file
#JSGF V1.0;
grammar commands;
public <commands> = (<label> | <mainMenu> | <subMenu> | <track> )+;
<mainMenu> = ( music
| phone
| navigation
| vehicle
| homepage
| shortcut
);
<label> = ( back
| usb ( one | two )
| contact
| sms
| message
| dial
| ( homepage ( one | two | three ))
| ( shortcut ( one | two | three ))
);
<subMenu> = ( back
| ( next | previous ) station
| ( fm ( one | two ))
| ( dr ( one | two ))
| am
| listen
| play
| ( next | previous )
| search [ artists | playlists | songs | albums ]
| call
| received
| missed
| dial
| address
);
<track> = ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
| ten
| eleven
| twelve
| thirteen
| fourteen
| fifteen
| sixteen
| seventeen
| eighteen
| nineteen
| twenty
| (twenty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| thirty
| (thirty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| forty
| (forty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| fifty
| (fifty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| sixty
| (sixty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| seventy
| (seventy ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| eighty
| (eighty ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
| ninety
| (ninety ( one
| two
| three
| four
| five
| six
| seven
| eight
| nine
)
)
);
My log shows:
I/cmusphinx: INFO: pocketsphinx.c(993): Writing raw audio log file: /storage/emulated/0/Android/data/com.techmahindra.rngo/files/sync/000000000.raw

Accuracy debugging is a complex process, there could be too many issues - noise in data, bad cpu speed causing delays in recording, bad estimation of channel.
In order to debug performance you first need to collect the data. Uncomment the call to setRawLogDir in demo and see in the logcat that raw data files are being stored on sdcard. Check those files to make sure audio is recorded correctly. Share the data together with the logs and your model to get help on accuracy. Make sure data is properly recorded, has no noise, has proper format, that you speak without accent.
In case you want to listen continuous and ignore words which are not of your interest, you need to use keyword spotting mode, not language model or grammar.

Related

Ranges-v3 transform limitations

I am trying to use ranges-v3 to split an SNMP OID into parts and return them as a std::deque<uint32_t>.
The following code works but only after I added a number of additional un-natural steps:
#include <range/v3/all.hpp>
/// split the supplied string into nodes, using '.' as a delimiter
/// #param the path to split , e.g "888.1.2.3.4"
/// #return a std::deque<uint32_t> containing the split paths
static std::deque<uint32_t> splitPath(std::string_view path) {
constexpr std::string_view delim{"."};
auto tmp = path | ranges::views::split(delim)
| ranges::to<std::vector<std::string>>()
;
return tmp | ranges::views::transform([](std::string_view v) {
return std::stoul(std::string{v}); })
| ranges::to<std::deque<uint32_t>>();
}
Initially I expected the following to simply work:
static std::deque<uint32_t> splitPath(std::string_view path) {
constexpr std::string_view delim{"."};
return path | ranges::views::split(delim)
| ranges::views::transform([](std::string_view v) {
return std::stoul(std::string{v}); })
| ranges::to<std::deque<uint32_t>>();
}
But that results in the following error:
error: no match for ‘operator|’ (operand types are
‘ranges::split_view<std::basic_string_view<char>,
std::basic_string_view<char> >’ and
‘ranges::views::view_closure<ranges::detail::
bind_back_fn_<ranges::views::transform_base_fn, ahk::snmp::
{anonymous}::splitPath(std::string_view)::<lambda(std::string_view)> > >’)
36 | return path | ranges::views::split(delim)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| |
| ranges::split_view<std::basic_string_view<char>,
std::basic_string_view<char> >
37 | | ranges::views::transform([](std::string_view v) {
| ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| |
| ranges::views::view_closure<ranges::detail::bind_back_fn_
<ranges::views::transform_base_fn, ahk::snmp::
{anonymous}::splitPath(std::string_view)::<lambda(std::string_view)> > >
38 | return std::stoul(std::string{v}); })
Why is it necessary to convert the result of the first operation to a std::vector and store in a named value (tmp) before calling ranges::views::transform? Even the following code (which removes the named value tmp fails:
static std::deque<uint32_t> splitPath(std::string_view path) {
constexpr std::string_view delim{"."};
return path | ranges::views::split(delim)
| ranges::to<std::vector<std::string>>()
| ranges::views::transform([](std::string_view v) {
return std::stoul(std::string{v}); })
| ranges::to<std::deque<uint32_t>>();
}
The value type of the range returned by ranges::views::split isn't std::string_view, it is a implementation detail type.
I'm not sure why you were able to | to<std::vector<std::string>> at all.
Because it uses a sentinel, you will need to convert it to a common range (prior to C++20, when std::string_view is constructible from an iterator and sentinel, or C++23 when it is constructible from a range).
std::deque<uint32_t> splitPath(std::string_view path) {
constexpr std::string_view delim{"."};
auto toul = [](auto v){
auto c = v | ranges::views::common;
return std::stoul(std::string(c.begin(), c.end()));
};
return path | ranges::views::split(delim)
| ranges::views::transform(toul)
| ranges::to<std::deque<uint32_t>>();
}

command push_back results an exit code -1073741819 (0xC0000005) c++

I'm trying to make kind of a little game using c++, and I have to move a character from one point on the map to another one. When I try to do that by push_back and then erase from the source point I get this exit code. What am I doing wrong?
My code for moving is:
void Game::move(const GridPoint & src_coordinates, const GridPoint & dst_coordinates) {
if(checkIfLegalCell(this, src_coordinates) == false ||
checkIfLegalCell(this, dst_coordinates) == false) {
throw IllegalCell();
}
if(searchInGrid(this->grid_characters, src_coordinates) == false){
throw CellEmpty();
}
std::vector<Pair>::iterator it_src=this->grid_characters.begin();
for(;it_src != this->grid_characters.end() ; ++it_src){
if((*it_src).grid_point == src_coordinates){
break;
}
}
if( ((*it_src).character)->checkIfCanMove(src_coordinates, dst_coordinates) == false) {
throw MoveTooFar();
}
if(searchInGrid(this->grid_characters, dst_coordinates) == true){
throw CellOccupied();
}
this->grid_characters.push_back(Pair(dst_coordinates,(*it_src).character));
this->grid_characters.erase(it_src);
}
And my main looks like that:
#include <iostream>
#include <cassert>
#include "Exceptions.h"
#include "Game.h"
using namespace mtm;
void example1() {
std::cout << "------example 1------" << std::endl;
Game g1(8,8);
g1.addCharacter(GridPoint(1,1), Game::makeCharacter(CharacterType::MEDIC, Team::POWERLIFTERS, 10, 2, 4, 5));
g1.addCharacter(GridPoint(1,4), Game::makeCharacter(CharacterType::SNIPER, Team::POWERLIFTERS, 10, 2, 4, 5));
g1.addCharacter(GridPoint(6,1), Game::makeCharacter(CharacterType::SOLDIER, Team::CROSSFITTERS, 10, 2, 4, 5));
g1.addCharacter(GridPoint(6,4), Game::makeCharacter(CharacterType::MEDIC, Team::CROSSFITTERS, 10, 2, 4, 5));
std::cout << g1 << std::endl;
g1.move(GridPoint(1,1), GridPoint(1,2));
std::cout << g1 << std::endl;
std::cout << "Nice!" << std::endl;
}
int main() {
example1();
return 0;
}
It prints:
C:\Users\User\CLionProjects\gameMakerCurr.1\cmake-build-debug\ex0.exe
------example 1------
*****************
| | | | | | | | |
| |M| | |N| | | |
| | | | | | | | |
| | | | | | | | |
| | | | | | | | |
| | | | | | | | |
| |s| | |m| | | |
| | | | | | | | |
*****************
Process finished with exit code -1073741819 (0xC0000005)
And this is the part of the "Game" that I'm using to create the pairs of- (coordinates, character)
struct Pair {
GridPoint grid_point;
std::shared_ptr<Character> character;
Pair(GridPoint grid_point, std::shared_ptr<Character> character) :
grid_point(grid_point), character(character) {}
};
class Game {
std::vector<Pair> grid_characters; // character by grid point ; key = grid_point ; value = character.
int height;
int width;
Any tips on how to fix this?
If you look attentively, in the program you trying to push the character to the end:
this->grid_characters.push_back(Pair(dst_coordinates,(*it_src).character));
this->grid_characters.erase(it_src);
First of all, after you push a new element in the vector, it will likely reallocate data to have enough capacity to contain one more element. So after the push_back the vector moved the data to a new location in memory. After that are trying to erase using the it_src which still points to the location before the push_back.
So, before doing this, and before calculating the it_src, you must assure vector has enough capacity and will not relocate your data
if (this->grid_characters.capacity() <= this->grid_characters.size())
this->grid_characters.reserve(this->grid_characters.size() + 10);
//other variants
//this->grid_characters.reserve(this->grid_characters.size() + 1);
//this->grid_characters.reserve(this->grid_characters.size() * 2);
That should fix the failure.
But logically, why are you trying to move the object to the end? You change the coordinates from 1,1 to 1,2. In the vector it will be after the object with coordinates 6,4. Looks like the order in the vector is irelevant. If there is no much sense in the order of objects then it makes sense to change only coordinates, without moving it inside the vector
it_src->grid_point = dst_coordinates;
If the order matters use set.

Collision filtering with layers (PhysX 3.4)

I would like to filter my collisions with layers like in Unity, but I really don't understand how to do it. I'm following this tutorial : http://docs.nvidia.com/gameworks/content/gameworkslibrary/physx/guide/Manual/RigidBodyCollision.html#collision-filtering
All I want to do is disable the collisions between the objects that have the layer Cube and Plane...
Graph.cpp :
bool Graph::Init()
{
/*...*/
cubeCollider->SetLayer(Physics::PhysicLayer::Cube);
planeCollider->SetLayer(Physics::PhysicLayer::Plane);
sphereCollider->SetLayer(Physics::PhysicLayer::Sphere);
capsuleCollider->SetLayer(Physics::PhysicLayer::Capsule);
_physX->SetCollisionFiltering(Physics::PhysicLayer::Cube, Physics::PhysicLayer::Plane);
/*...*/
}
And here is how I set the filter shader :
PhysX.cpp :
void PhysX::SetCollisionFiltering(PhysicLayer p_one, PhysicLayer p_two)
{
// I don't really know what to do here...
PxFilterData filterData;
filterData.word0 = p_one;
filterData.word1 = p_two;
// no collision between objects with layer ONE and objects with layer TWO ?
for (unsigned int i = 0; i < _colliders.size(); ++i)
{
if (_colliders[i]->GetLayer() == p_one || _colliders[i]->GetLayer() == p_two)
_colliders[i]->GetShape()->setSimulationFilterData(filterData);
}
}
physx::PxFilterFlags CreateFilterShader(PxFilterObjectAttributes p_attributes0, PxFilterData p_filterData0,
PxFilterObjectAttributes p_attributes1, PxFilterData p_filterData1,
PxPairFlags& p_pairFlags, const void* p_constantBlock, PxU32 constantBlockSize)
{
// Trigger
if (PxFilterObjectIsTrigger(p_attributes0) || PxFilterObjectIsTrigger(p_attributes1))
{
p_pairFlags = PxPairFlag::eDETECT_DISCRETE_CONTACT
| PxPairFlag::eSOLVE_CONTACT
| PxPairFlag::eNOTIFY_TOUCH_FOUND
| PxPairFlag::eNOTIFY_TOUCH_LOST;
}
// Normal Collision
else
{
// Not sure
if ((p_filterData0.word0 & p_filterData1.word1) && (p_filterData1.word0 & p_filterData0.word1))
{
p_pairFlags = PxPairFlag::eDETECT_DISCRETE_CONTACT
| PxPairFlag::eSOLVE_CONTACT
| PxPairFlag::eNOTIFY_CONTACT_POINTS
| PxPairFlag::eNOTIFY_THRESHOLD_FORCE_FOUND
| PxPairFlag::eNOTIFY_THRESHOLD_FORCE_LOST
| PxPairFlag::eNOTIFY_THRESHOLD_FORCE_PERSISTS
| PxPairFlag::eNOTIFY_TOUCH_FOUND
| PxPairFlag::eNOTIFY_TOUCH_LOST
| PxPairFlag::eNOTIFY_TOUCH_PERSISTS;
}
}
return PxFilterFlag::eDEFAULT;
}
With this code, none of my objects collide... I don't really understand what are PxFilter.word0, word1 word2 and word3 by the way...
Thanks in advance !
This a very late answer, just a reference for others.
Based on nvidia documentation you should return eSupress or kill enum.
https://docs.nvidia.com/gameworks/content/gameworkslibrary/physx/apireference/files/structPxFilterFlag.html
:
PxFilterFlags PhysicsWorldFilterShader(
PxFilterObjectAttributes attributes0, PxFilterData filterData0,
PxFilterObjectAttributes attributes1, PxFilterData filterData1,
PxPairFlags& pairFlags, const void* constantBlock, PxU32 constantBlockSize
)
{
// Checking if layers should be ignored
auto const layerMaskA = filterData0.word0;
auto const layerA = filterData0.word1;
auto const layerMaskB = filterData1.word0;
auto const layerB = filterData1.word1;
auto const aCollision = layerMaskA & layerB;
auto const bCollision = layerMaskB & layerA;
if (aCollision == 0 || bCollision == 0)
{
return PxFilterFlag::eSUPPRESS;
}
// all initial and persisting reports for everything, with per-point data
pairFlags = PxPairFlag::eSOLVE_CONTACT | PxPairFlag::eDETECT_DISCRETE_CONTACT | PxPairFlag::eTRIGGER_DEFAULT;
return PxFilterFlag::eDEFAULT;
}

Deleting a folder and all its contents with Qt?

How to delete a folder and all its contents with Qt?
I tried using:
QFile::remove();
but it seems like it deletes only one file a time.
For Qt5 and above there is QDir::removeRecursively:
QDir dir("C:\\Path\\To\\Folder\\Here");
dir.removeRecursively();
For Qt4 or lower you can use a recursive function that deletes every file:
bool removeDir(const QString & dirName)
{
bool result = true;
QDir dir(dirName);
if (dir.exists(dirName)) {
Q_FOREACH(QFileInfo info, dir.entryInfoList(QDir::NoDotAndDotDot | QDir::System | QDir::Hidden | QDir::AllDirs | QDir::Files, QDir::DirsFirst)) {
if (info.isDir()) {
result = removeDir(info.absoluteFilePath());
} else {
result = QFile::remove(info.absoluteFilePath());
}
if (!result) {
return result;
}
}
result = dir.rmdir(dirName);
}
return result;
}
as stated here.

Realization in C++

Need help in understanding "realization" relationship with classes. Can anyone give me a C++ example on this?
I browsed and I got to know that, a class implementing interface is an example of realization.
I didn't get better picture. How do I represent the same using UML?
Thanks
Realization specifies a contract between two or more types. Where one type (here Interface Imammals) defines the contract and the other type (Cat, Dog) promises to carry out.
Below code is a lazy example of Realization...
#include<iostream>
using namespace std;
class IMammals{
public:
virtual void walk() = 0;
};
class Cats: public IMammals {
public:
void walk() {
cout<< "Cat is walking" << endl;
}
};
class Dogs: public IMammals {
public:
void walk(){
cout<< "Dog is walking" << endl;
}
};
int main(void) {
Cats aCat;
Dogs aDog;
IMammals *ptrMammals = NULL;
ptrMammals = &aCat;
ptrMammals->walk();
ptrMammals = &aDog;
ptrMammals->walk();
return 0;
}
Using UML, realization is represented by an dotted arrow that points from the type two(Cat,Dog or Contractor) class to the type one class(IMammals or Contractee). The tip of the arrow is an empty triangle.
+-----------------+
| IMammals |
|-----------------|
| |
+---------|>| |<|--------+
| +-----------------+ |
| |
| |
+-----+-----+ +-----+-----+
| Cat | | Dog |
|-----------| |-----------|
| | | |
+-----------+ +-----------+