how to fix my implementation of Auto-complete feature using Trie Tree? - c++

Hi so i've been trying to implement Auto-complete/suggestion using trie tree
but the suggestion part only suggests 1 or 2 first similar words to it and i can't find out what causing this.
This is my Trie structre
struct TrieNode {
struct TrieNode* children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode* getNode(void) {
struct TrieNode* pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
bool isLeafNode(struct TrieNode* root) { return root->isEndOfWord != false; }
and this is the suggestion/Auto-complete part
void suggestionsRec(struct TrieNode* root, string currPrefix) {
if (root->isEndOfWord) {
cout << currPrefix;
cout << endl;
}
if (isLeafNode(root))
return;
for (int i = 0; i < ALPHABET_SIZE; i++)
{
if (root->children[i])
{
currPrefix.push_back(97 + i);
suggestionsRec(root->children[i], currPrefix);
currPrefix.pop_back();
}
}
}
int spellchecker(TrieNode* root, const string query) {
struct TrieNode* pCrawl = root;
int level;
int n = query.length();
for (level = 0; level < n; level++) {
int index = CHAR_TO_INDEX(query[level]);
if (!pCrawl->children[index])
return 0;
pCrawl = pCrawl->children[index];
}
bool isWord = (pCrawl->isEndOfWord == true);
bool isLast = isLeafNode(pCrawl);
if (isWord && isLast) {
cout << query << endl;
return -1;
}
if (!isLast) {
string prefix = query;
suggestionsRec(pCrawl, prefix);
return 1;
}
}
for example when i try words like : [ the , there , these ] and input is "t" all it suggests is "the" .
this is the whole code https://pastebin.com/VPd94rny
also any suggestion to improve my code is much appreciated

Related

c++ unable to get trie to give correct search words

I'm creating a word suggestion auto-complete interface. I'm using a trie in c++ to do this. I'd like to enter part of a word into my code and have the trie suggest possible words to complete the end of my word.
#include<bits/stdc++.h>
using namespace std;
#define alphabet (26)
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
struct TrieNode
{
struct TrieNode *children[alphabet];
// isWordEnd is true if the node represents
// end of a word
bool isWordEnd;
};
struct TrieNode *getNode(void)
{
struct TrieNode *Node = new TrieNode;
Node->isWordEnd = false;
for (int i = 0; i < alphabet; i++)
Node->children[i] = NULL;
return Node;
}
void insert(struct TrieNode *root, const string key)
{
struct TrieNode *Crawl = root;
for (int level = 0; level < key.length(); level++)
{
int index = CHAR_TO_INDEX(key[level]);
if (!Crawl->children[index])
{
Crawl->children[index] = getNode();
//Crawl = Crawl->children[index];
}
Crawl = Crawl->children[index];
}
// mark last node as leaf
Crawl->isWordEnd = true;
}
//returns 0 if current node has a child
// If all children are NULL, return 1.
bool isLastNode(struct TrieNode* root)
{
for (int i = 0; i < alphabet; i++)
if (root->children[i])
return 0;
return 1;
}
void suggestionsRec(struct TrieNode* root, string currPrefix)
{
// found a string in Trie with the given prefix
if (root->isWordEnd)
{
cout << currPrefix;
cout << endl;
}
// All children struct node pointers are NULL
if (isLastNode(root))
{
//currPrefix = "help";
//deleteNode(root);
//delete root;
//root = NULL;
//currPrefix.pop_back();
return;
}
for (int i = 0; i < alphabet; i++)
{
if (root->children[i])
{
currPrefix.push_back(97 + i);
//currPrefix.push_back(i);
//currPrefix.pop_back(97 + i);
/*if (isLastNode(root))
{
currPrefix.erase(3);
}*/
// recur over the rest
suggestionsRec(root->children[i], currPrefix);
//printAutoSuggestions(root->children[i], currPrefix);
}
}
}
// print suggestions for given query prefix.
int printAutoSuggestions(TrieNode* root, const string query)
{
struct TrieNode* Crawl = root;
// Check if prefix is present and find the
// the node (of last level) with last character
// of given string.
int level;
int n = query.length();
for (level = 0; level < n; level++)
{
int index = CHAR_TO_INDEX(query[level]);
// no string in the Trie has this prefix
if (!Crawl->children[index])
return 0;
Crawl = Crawl->children[index];
}
// If prefix is present as a word.
bool isWord = (Crawl->isWordEnd == true);
// If prefix is last node of tree (has no
// children)
bool isLast = isLastNode(Crawl);
// If prefix is present as a word, but
// there is no subtree below the last
// matching node.
if (isWord && isLast)
{
cout << query << endl;
return -1;
}
// If there are are nodes below last
// matching character.
if (!isLast)
{
string prefix = query;
suggestionsRec(Crawl, prefix);
return 1;
}
}
// Driver Code
int main()
{
struct TrieNode* root = getNode();
insert(root, "hello");
insert(root, "dog");
insert(root, "hell");
insert(root, "cat");
insert(root, "a");
insert(root, "hel");
insert(root, "help");
insert(root, "helps");
insert(root, "helping");
int comp = printAutoSuggestions(root, "hel");
if (comp == -1)
cout << "No other strings found with this prefix\n";
else if (comp == 0)
cout << "No string found with this prefix\n";
return 0;
}
When I enter the prefix "hel" I'd like to see
hel
hell
hello
help
helping
helps
But instead, I just see
hel
hell
hello
hellp
hellping
hellpis
In suggestionsRec(...) you have:
for (int i = 0; i < alphabet; i++)
{
currPrefix.push_back(97 + i);
...
suggestionsRec(root->children[i], currPrefix);
}
}
You are adding characters to currPrefix and keeping them. So you call suggestionsRec on later children, with characters in currPrefix that don't belong there.

fix segmentation fault in trie c++

I'm using a trie implementation to store and search words in c++ programming language. In using the search() function, I am getting a segmentation fault when searching for a specific word. It seems that the error occurred in checking if the struct is null.
here is the error message:
Program received signal SIGSEGV, Segmentation fault.
0x000055555555b2ff in search (this=0x55555577ee70,
wordlist=0x55555577ef00, word="a1g6os") at test.cc:30
if (!pCrawl->children[index])
here is the source code:
#include <bits/stdc++.h>
using namespace std;
const int ALPHABET_SIZE = 26;
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}
Both #Some programmer dude and #JohnnyJohansson have pointed out the root cause. The live test showed where the code read the array out-of-bounds. Actually the fix is easy once you understand what happens. The following is the fixed code if you cannot figure it out by yourself. A live test of it is here cee.studio
#include<iostream>
using namespace std;
const int ALPHABET_SIZE = 75; // increase the range
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}

Segmentation fault in trie implementation using vector c++

#include <iostream>
#include <vector>
#include <string>
using namespace std;
#define LOWERCASE_ALPHABET_SiZE 26
typedef int (*ptr) (char );
inline int charToIndex(char a) { return a - 'a'; };
class trienode
{
private:
vector< trienode* > child;
bool leaf;
public:
trienode(int );
~trienode();
void initialiseChild(int i);
trienode* getChild(int i);
void setLeaf() { leaf = true; };
bool isLeaf() { return leaf; };
};
trienode::trienode(int size)
{
for(int i = 0; i < size; i++)
{
child.push_back(NULL);
}
leaf = false;
}
trienode::~trienode()
{
for(int i = 0; i < child.size(); i++)
{
delete child.at(i);
child.at(i) = NULL;
}
}
void trienode::initialiseChild(int i)
{
child.at(i) = new trienode(child.size());
}
trienode* trienode::getChild(int i)
{
return child.at(i);
}
class trie
{
private:
trienode* root;
ptr toIndex;
public:
trie(int , ptr );
~trie();
void insert(const string& ref);
bool search(const string& ref);
};
trie::trie(int size, ptr toIndex) : toIndex(toIndex), root(new trienode(size)) { }
trie::~trie()
{
cout << "In destructor trie" << endl;
delete root;
root = NULL;
}
void trie::insert(const string& ref)
{
int size = ref.size();
trienode* root = root;
for(int i = 0; i < size; i++)
{
int index = toIndex(ref[i]);
if(root->getChild(index) == NULL) // crashing in getChild()
{
root->initialiseChild(index);
}
root = root->getChild(index);
}
root->setLeaf();
}
bool trie::search(const string& ref)
{
trienode* root = root;
int size = ref.size();
for(int i = 0; i < size && root != NULL; i++)
{
int index = toIndex(ref[i]);
if((root = root->getChild(index)) == NULL)
{
break;
}
}
return (root != NULL && root->isLeaf());
}
int main(int argc,char* argv[])
{
trie* altrie = new trie(LOWERCASE_ALPHABET_SiZE, charToIndex);
int n;
string temp;
cin >> n;
for(int i = 0; i < n; i++)
{
cin >> temp;
altrie->insert(temp);
}
int k;
for(int i = 0; i < k; i++)
{
cin >> temp;
if(altrie->search(temp))
{
cout << temp << " exists in the trie" << endl;
}
else
{
cout << temp << " doesn`t exist in the trie" << endl;
}
}
return 0;
}
I am creating Trie by supplying no of children it can have in each level and function pointer to convert the given character to index. After that I am Creating the root node of trie and when I`m inserting the first string it is getting Segmentation Fault in getChild Function
First things first explain me the reason behind the crash.
Explain me how I can improve the implementation of trie.
You are using the same name for member and local variables, like this:
trienode* root = root;
The compiler cannot tell the diffirence between the local root and trie::root so you are assigning it to itself.

Trie C++ Strings not completely filling all the values in a text file

i'm trying to sort a large amount of strings alphabetically and by length and it seems i'm only sorting around 1/7 of the amount needed. I am trying to sort around 100,000 words, when I sorted 70000 I ended up with 8000 words sorted and can't seem to find why it isn't working. Any help would be much appreciated
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
using namespace std;
class Node
{
public:
char value; // the character value
bool end; // indicates whether this node completes a word
Node * children[93]; // represents the 93 ascii values for 33-126
Node(char newChar);
~Node();
};
class Trie
{
public:
Trie();
~Trie();
void addWord(string word);
Node * getRoot();
private:
Node * root;
};
Node::Node(char newChar)
{
value = newChar;
for (int i = 0; i < 93; ++i)
children[i] = NULL;
}
Node::~Node()
{
delete[] children;
}
Trie::Trie()
{
root = new Node(' ');
root->end = true;
}
Trie::~Trie()
{
delete root;
}
Node * Trie::getRoot()
{
return root;
}
void Trie::addWord(string word)
{
Node * currentNode = root;
for (int i = 0; i < word.size(); ++i)
{
char currentChar = word.at(i);
int index = currentChar - '!';
if (currentNode->children[index] != NULL)
{
currentNode = currentNode->children[index];
}
else
{
Node * newNode = new Node(currentChar);
currentNode->children[index] = newNode;
currentNode = newNode;
}
if (i == word.size() - 1)
{
currentNode->end = true;
}
}
}
void alphabetize(Node * node, vector<string> & sorting, string prefix = "") //why dont i have to declare this?
{
if (node->end)
{
sorting.push_back(prefix);
}
for (int i = 0; i < 93; ++i)
{
if (node->children[i] != NULL)
{
string currentString = prefix + node->children[i]->value; //store all characters
alphabetize(node->children[i], sorting, currentString);
}
else
{
}
}
}
int main()
{
Trie * t = new Trie();
string tempS;
int lengthCounter = 0;
ifstream fin;
fin.open("test.txt");
vector< vector<string> > sortLength;
vector <string> row(0, "");
vector<string> sorted;
while(fin >> tempS)
{
while(tempS.length() > lengthCounter)
{
sortLength.push_back(row);
lengthCounter++;
}
t->addWord(tempS);
}
alphabetize(t->getRoot(),sorted); //filled with sorted vector
for(int i = 0; i < sorted.size(); i++)
{
sortLength[sorted[i].length()-1].push_back(sorted[i]);
}
for(int k = 0; k < sortLength.size(); k++)
{
for(int l = 0; l < sortLength[k].size(); l++)
{
cout << sortLength[k][l] << "\n";
}
}
cout << sorted.size();
return 0;
}

get all words exist in trie using recursion

I found a function which traverse all the trie and return a list contains all the words exist in my trie. My problem is I can't make this work for me, any help will appreciated.
class Node {
public:
Node();
Node* ch[26];
bool isEnd;
};
Node::Node() {
for(int i = 0; i < 26; i++)
ch[i] = NULL;
isEnd = 0;
}
class Trie {
public:
Node* root;
Trie() {root = new Node();}
void insert(string word, Node* ptr);
bool find(string word, Node* ptr);
list<string> findWords(Node* root);
};
void Trie::insert(string word, Node* ptr) {
for(unsigned int i = 0; i < word.size(); i++) {
if(ptr->ch[word[i]-'a'] == NULL)
ptr->ch[word[i]-'a'] = new Node();
ptr = ptr->ch[word[i]-'a'];
}
ptr->isEnd = 1;
}
list<string> Trie::findWords(Node* ptr) {
list<string> result;
if(ptr->isEnd)
result.push_back("");
for(int i = 0; i < 26; i++)
if(ptr->ch[i] != NULL) {
ptr = ptr->ch[i];
list<string> childResult = findWords(ptr);
char letter = (char) (97 + i);
for(string sufix : childResult)
result.push_back("" + letter + sufix);
}
copy(result.begin(),result.end(),ostream_iterator<string> (cout," "));
return result;
}
test main:
int main() {
Trie T;
string word;
for(int i = 0; i < 10; i++) {
cin >> word;
insert(word, root);
}
system("PAUSE");
return 0;
}