get all words exist in trie using recursion - c++

I found a function which traverse all the trie and return a list contains all the words exist in my trie. My problem is I can't make this work for me, any help will appreciated.
class Node {
public:
Node();
Node* ch[26];
bool isEnd;
};
Node::Node() {
for(int i = 0; i < 26; i++)
ch[i] = NULL;
isEnd = 0;
}
class Trie {
public:
Node* root;
Trie() {root = new Node();}
void insert(string word, Node* ptr);
bool find(string word, Node* ptr);
list<string> findWords(Node* root);
};
void Trie::insert(string word, Node* ptr) {
for(unsigned int i = 0; i < word.size(); i++) {
if(ptr->ch[word[i]-'a'] == NULL)
ptr->ch[word[i]-'a'] = new Node();
ptr = ptr->ch[word[i]-'a'];
}
ptr->isEnd = 1;
}
list<string> Trie::findWords(Node* ptr) {
list<string> result;
if(ptr->isEnd)
result.push_back("");
for(int i = 0; i < 26; i++)
if(ptr->ch[i] != NULL) {
ptr = ptr->ch[i];
list<string> childResult = findWords(ptr);
char letter = (char) (97 + i);
for(string sufix : childResult)
result.push_back("" + letter + sufix);
}
copy(result.begin(),result.end(),ostream_iterator<string> (cout," "));
return result;
}
test main:
int main() {
Trie T;
string word;
for(int i = 0; i < 10; i++) {
cin >> word;
insert(word, root);
}
system("PAUSE");
return 0;
}

Related

fix segmentation fault in trie c++

I'm using a trie implementation to store and search words in c++ programming language. In using the search() function, I am getting a segmentation fault when searching for a specific word. It seems that the error occurred in checking if the struct is null.
here is the error message:
Program received signal SIGSEGV, Segmentation fault.
0x000055555555b2ff in search (this=0x55555577ee70,
wordlist=0x55555577ef00, word="a1g6os") at test.cc:30
if (!pCrawl->children[index])
here is the source code:
#include <bits/stdc++.h>
using namespace std;
const int ALPHABET_SIZE = 26;
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}
Both #Some programmer dude and #JohnnyJohansson have pointed out the root cause. The live test showed where the code read the array out-of-bounds. Actually the fix is easy once you understand what happens. The following is the fixed code if you cannot figure it out by yourself. A live test of it is here cee.studio
#include<iostream>
using namespace std;
const int ALPHABET_SIZE = 75; // increase the range
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}

Comparing Hash table values and creating an array of top N words

I've created a hash table of structs. Each struct has count. I'm curious how I can go through each key and separate chain and find the highest count and add it to an array.
struct wordItem
{
std::string word;
int count;
wordItem* next;
};
This is what i have so far. My thought process is to to compare each item with every item. So go to initial key, then traverse down each chain.
Suggestions welcome.
void HashTable::printTopN(int n) {
wordItem* arr[n];
wordItem* temp;
int i;
for (i=0;i<hashTableSize; i++){
temp = hashTable[i];
while (temp!=NULL){
for (int j = 0; j<n; j++){
if(arr[n]->count<temp->count&&arr[n+1]->count<temp->count){
arr[n]=arr[n+1];
arr[n] = temp;
}
}
temp = temp->next;
}
}
for (int k = 0; k < n; k++)
std::cout<<arr[n]->word<<"--"<<arr[n]->count;
}
Also this is my addWord function for more background info.
void HashTable::addWord(std::string word) {
int hash_val = getHash(word);
wordItem* prev = NULL;
wordItem* entry = hashTable[hash_val];
while (entry != NULL)
{
prev = entry;
entry = entry->next;
}
if (entry == NULL)
{
entry = new wordItem;
entry->count = 1;
entry->word = word;
entry ->next = NULL;
if (prev == NULL)
{
hashTable[hash_val]= entry;
}
else
{
prev->next = entry;
}
}
incrementCount(word);
entry->word = word;
}
HPP fiie
struct wordItem
{
std::string word;
int count;
wordItem* next;
};
const int STOPWORD_LIST_SIZE = 50;
class HashTable {
public:
HashTable(int hashTableSize);
~HashTable();
void getStopWords(char *ignoreWordFileName);
bool isStopWord(std::string word);
bool isInTable(std::string word);
void incrementCount(std::string word);
void addWord(std::string word);
int getTotalNumberNonStopWords();
void printTopN(int n);
int getNumUniqueWords();
int getNumCollisions();
int getHash(std::string word);
private:
wordItem* searchTable(std::string word);
int numUniqueWords;
int numCollisions;
int hashTableSize;
wordItem** hashTable;
std::vector<std::string> vecIgnoreWords =
std::vector<std::string>(STOPWORD_LIST_SIZE);
};
Create an array of N items. For each item in the table, go through the array and check if current_array_item <= table_item <= next_array_item. If yes, shift all items in the array that are <= current_array_item by one (erasing the smallest one from the array) and insert table_item in place of current_array_item.

Why the words do not contain in my dictionary (c++ trie)?

I think there's something wrong in my implement of trie. I use the word apple to test it. Although my test file dict.txt contains the word apple, it returns false. What's bug?
class Trie{
private:
class Node{
public:
Node* next[26];
bool isWord;
Node(){ isWord = false; }
};
Node* root;
public:
Trie() {
root = new Node();
}
void load(const string& line) {
Node* node = root;
for(int i = 0; i < line.size(); i++){
char x = line[i];
if(node->next[x-'a'] == nullptr)
node->next[x-'a'] = new Node();
node = node->next[x-'a'];
}
node->isWord = true;
}
bool contains(const string& word) {
Node* node = root;
for(int i = 0; i < word.size(); i++){
char x = word[i];
if(node->next[x-'a'] == nullptr)
return false;
else
node = node->next[x-'a'];
}
return node->isWord;
}
bool startWith(const string& prefix) {
Node* node = root;
for(int i = 0; i < prefix.size(); i++){
char x = prefix[i];
if(node->next[x-'a'] == nullptr)
return false;
else
node = node->next[x-'a'];
}
return true;
}
};
int main() {
Trie trie;
ifstream inFile;
string line;
while(getline(inFile, line)){
trie.load(line);
}
cout << trie.contains("apple") << endl;
cout << trie.startWith("cata") << endl;
return 0;
}
You forgot to initialize pointers Node* next[26] to nullptrs. And, because of that, words may be added incorrectly. Not sure though.

Segmentation fault in trie implementation using vector c++

#include <iostream>
#include <vector>
#include <string>
using namespace std;
#define LOWERCASE_ALPHABET_SiZE 26
typedef int (*ptr) (char );
inline int charToIndex(char a) { return a - 'a'; };
class trienode
{
private:
vector< trienode* > child;
bool leaf;
public:
trienode(int );
~trienode();
void initialiseChild(int i);
trienode* getChild(int i);
void setLeaf() { leaf = true; };
bool isLeaf() { return leaf; };
};
trienode::trienode(int size)
{
for(int i = 0; i < size; i++)
{
child.push_back(NULL);
}
leaf = false;
}
trienode::~trienode()
{
for(int i = 0; i < child.size(); i++)
{
delete child.at(i);
child.at(i) = NULL;
}
}
void trienode::initialiseChild(int i)
{
child.at(i) = new trienode(child.size());
}
trienode* trienode::getChild(int i)
{
return child.at(i);
}
class trie
{
private:
trienode* root;
ptr toIndex;
public:
trie(int , ptr );
~trie();
void insert(const string& ref);
bool search(const string& ref);
};
trie::trie(int size, ptr toIndex) : toIndex(toIndex), root(new trienode(size)) { }
trie::~trie()
{
cout << "In destructor trie" << endl;
delete root;
root = NULL;
}
void trie::insert(const string& ref)
{
int size = ref.size();
trienode* root = root;
for(int i = 0; i < size; i++)
{
int index = toIndex(ref[i]);
if(root->getChild(index) == NULL) // crashing in getChild()
{
root->initialiseChild(index);
}
root = root->getChild(index);
}
root->setLeaf();
}
bool trie::search(const string& ref)
{
trienode* root = root;
int size = ref.size();
for(int i = 0; i < size && root != NULL; i++)
{
int index = toIndex(ref[i]);
if((root = root->getChild(index)) == NULL)
{
break;
}
}
return (root != NULL && root->isLeaf());
}
int main(int argc,char* argv[])
{
trie* altrie = new trie(LOWERCASE_ALPHABET_SiZE, charToIndex);
int n;
string temp;
cin >> n;
for(int i = 0; i < n; i++)
{
cin >> temp;
altrie->insert(temp);
}
int k;
for(int i = 0; i < k; i++)
{
cin >> temp;
if(altrie->search(temp))
{
cout << temp << " exists in the trie" << endl;
}
else
{
cout << temp << " doesn`t exist in the trie" << endl;
}
}
return 0;
}
I am creating Trie by supplying no of children it can have in each level and function pointer to convert the given character to index. After that I am Creating the root node of trie and when I`m inserting the first string it is getting Segmentation Fault in getChild Function
First things first explain me the reason behind the crash.
Explain me how I can improve the implementation of trie.
You are using the same name for member and local variables, like this:
trienode* root = root;
The compiler cannot tell the diffirence between the local root and trie::root so you are assigning it to itself.

Trie C++ Strings not completely filling all the values in a text file

i'm trying to sort a large amount of strings alphabetically and by length and it seems i'm only sorting around 1/7 of the amount needed. I am trying to sort around 100,000 words, when I sorted 70000 I ended up with 8000 words sorted and can't seem to find why it isn't working. Any help would be much appreciated
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
using namespace std;
class Node
{
public:
char value; // the character value
bool end; // indicates whether this node completes a word
Node * children[93]; // represents the 93 ascii values for 33-126
Node(char newChar);
~Node();
};
class Trie
{
public:
Trie();
~Trie();
void addWord(string word);
Node * getRoot();
private:
Node * root;
};
Node::Node(char newChar)
{
value = newChar;
for (int i = 0; i < 93; ++i)
children[i] = NULL;
}
Node::~Node()
{
delete[] children;
}
Trie::Trie()
{
root = new Node(' ');
root->end = true;
}
Trie::~Trie()
{
delete root;
}
Node * Trie::getRoot()
{
return root;
}
void Trie::addWord(string word)
{
Node * currentNode = root;
for (int i = 0; i < word.size(); ++i)
{
char currentChar = word.at(i);
int index = currentChar - '!';
if (currentNode->children[index] != NULL)
{
currentNode = currentNode->children[index];
}
else
{
Node * newNode = new Node(currentChar);
currentNode->children[index] = newNode;
currentNode = newNode;
}
if (i == word.size() - 1)
{
currentNode->end = true;
}
}
}
void alphabetize(Node * node, vector<string> & sorting, string prefix = "") //why dont i have to declare this?
{
if (node->end)
{
sorting.push_back(prefix);
}
for (int i = 0; i < 93; ++i)
{
if (node->children[i] != NULL)
{
string currentString = prefix + node->children[i]->value; //store all characters
alphabetize(node->children[i], sorting, currentString);
}
else
{
}
}
}
int main()
{
Trie * t = new Trie();
string tempS;
int lengthCounter = 0;
ifstream fin;
fin.open("test.txt");
vector< vector<string> > sortLength;
vector <string> row(0, "");
vector<string> sorted;
while(fin >> tempS)
{
while(tempS.length() > lengthCounter)
{
sortLength.push_back(row);
lengthCounter++;
}
t->addWord(tempS);
}
alphabetize(t->getRoot(),sorted); //filled with sorted vector
for(int i = 0; i < sorted.size(); i++)
{
sortLength[sorted[i].length()-1].push_back(sorted[i]);
}
for(int k = 0; k < sortLength.size(); k++)
{
for(int l = 0; l < sortLength[k].size(); l++)
{
cout << sortLength[k][l] << "\n";
}
}
cout << sorted.size();
return 0;
}