TRIE data structure implementation in c++ - c++

I have written a simple code to implement a trie data structure in c++. But when I run this program, it gives segmentation error as an output.
Kindly please correct me, where i have been wrong.
#include <bits/stdc++.h>
using namespace std;
struct trienode {
struct trienode * child[26];
bool isEnd;
trienode()
{
isEnd = false;
for(int i = 0; i < 26; i++)
{
child[i] = NULL;
}
}
};
struct trienode * root;
void insert_str(string &s, int n)
{
trienode * curr = root;
int i;
for(i = 0; i < n; i++)
{
int index = s[i] - 'a';
if(curr -> child[index] == NULL)
{
curr -> child[index] = new trienode();
}
else
{
curr = curr -> child[index];
}
}
curr -> isEnd = true;
}
int main()
{
string s1 = "yash";
insert_str(s1, 4);
}

You haven't allocated any memory for your root node.
Normally you would have a separate class to handle the trie as a whole. It can then allocate the root node.
class trie
{
public:
trie()
{
root = new trienode();
}
void insert_str(string &s, int n)
{
...
}
private:
trienode* root;
};
int main()
{
trie t;
string s1 = "yash";
t.insert_str(s1, 4);
}

Related

strdup for converting const char* to char*

I have designed for Huffman tree convert binary code with shorter bin code. In main if you call a Binary tree.init(q), then the tree would come out with key: frequency and value: bin code. The problem is converting const char* with char*. I've looked at some codes, and here I converted it by using strdup. Sometimes works fine but sometimes doesn't work. so I checked out the parameter for the function. Is there wrong in calling strdup or maybe others?
#pragma once
#include <stdio.h>
#include <queue>
#include <iostream>
#include "pch.h"
#include <string.h>
#include <string>
#define _CRT_SECURE_NO_WARNINGS
//this is a header file
using namespace std;
class Node
{
public:
//key : frequency, value : code
int f;
char* code;
Node* left;
Node* right;
int getFrequency()
{
return f;
}
char* getCode()
{
return code;
}
void init(int frequency, char* codestring)
{
f = frequency;
code = codestring;
}
Node* getLeft() {
return left;
}
Node* getRight()
{
return right;
}
void setLeft(Node* L)
{
left = L;
}
void setRight(Node* R)
{
right = R;
}
void setFrequency(int frequency)
{
f = frequency;
}
void setCode(char* string)
{
code = string;
}
};
class BinaryTree
{
public:
typedef priority_queue<int, vector<int>, greater<int>> pq;
pq q;
Node* proot;
int sizeofqueue;
void init(pq PriorityQueue)
{
q = PriorityQueue;
sizeofqueue = q.size();
N = 0;
int comparetimes = q.size() - 1;
for (int i = 0; i < comparetimes; i++)
{
if (i == 0)
{
put_first_two_nodes();
}
else
{
if (proot->getFrequency() <= q.top())
{
put_right_node();
}
else if (proot->getFrequency() > q.top())
{
put_left_node();
}
q.pop();
}
}
}
void put_first_two_nodes()
{
Node* pleft = new Node();
(*pleft).setFrequency(q.top());
(*pleft).setCode("0");
q.pop();
Node* pright = new Node();
(*pright).setFrequency(q.top());
(*pright).setCode("1");
put(pleft, pright);
q.pop();
}
void put_right_node()
{
Node* pright = new Node();
pright->setFrequency(q.top());
pright->setCode("1");
put(proot, pright);
appendcode(0);
}
void appendcode(int prefix)
{
string pre;
if (prefix == 1) pre = "1";
else pre = "0";
Node* targetNode = proot->getRight();
char* rcode = targetNode->getRight()->getCode();
char* lcode = targetNode->getLeft()->getCode();
string lefts = pre;
string rights = pre;
lefts.append(lcode);
rights.append(rcode);
char* leftstring = strdup(lefts.c_str());
char* rightstring = strdup(rights.c_str());
targetNode->getLeft()->setCode(leftstring);
targetNode->getRight()->setCode(rightstring);
free(leftstring);
free(rightstring);
}
void put_left_node()
{
Node* pleft = new Node();
pleft->setFrequency(q.top());
pleft->setCode("0");
put(pleft, proot);
appendcode(1);
}
char* get(int k)
{
return getItem(*proot, k);
}
char* getItem(Node root, int k)
{
//if there's no node
if (&root == nullptr) return "";
//if f or root > k, search left sibling
if (root.getFrequency() > k) return getItem(*(root.getLeft()), k);
//else, search right sibling
else if (root.getFrequency() < k) return getItem(*(root.getRight()), k);
//get it
else return root.getCode();
}
void put(Node* left, Node* right)
{
put_item(left,right);
}
void put_item(Node* left, Node* right)
{
//make new node that has sibling with left and right
Node* newnode = new Node();
newnode->setLeft(left);
newnode->setRight(right);
//exchange the new node and root without losing data
Node* temp;
temp = proot;
proot = newnode;
newnode = temp;
//proot's frequency : left f + right f
(*proot).setFrequency((*left).getFrequency() + (*right).getFrequency());
}
void printpost()
{
postorder(proot);
}
void postorder(Node* root)
{
if (root != nullptr)
{
if (root->getLeft() != nullptr) postorder(root->getLeft());
if (root->getRight() != nullptr) postorder(root->getRight());
printf("%d : %s ",root->getFrequency(), root->getCode());
}
}
private:
int N;
Node root;
};
You shouldn't use const char* and char* at all in c++ (unless when sometimes dealing with legacy or foreign interfaces).
Switch up your code to use eg. std::string or std::string_view (c++17) instead (string_view requires a bit more understanding to handle correctly and is const so to speak - so I would stick to string off the bat). Pass std::string by reference or by const reference where neccesary. The overhead of std::string is for most programs negliable.

Trouble with pointers C++

Well, i'm tried to build something like Binary Search Tree. And after some iterations i'm creating newnode and it has pointer which has already used. How to solve this problem, without classes. For example test,
9
1
7
5
21
22
27
25
20
10
Build it in reverse order (last is root, first cnt of vertex)
Here code:
#include <bits/stdc++.h>
using namespace std;
const int N = 3000;
int n;
int a[N];
struct node {
int v;
node *left, *right;
};
vector<int> ans;
node qwe;
void add(node *root, int elem) {
if (elem > root->v) {
if (root->right != NULL) {
add(root->right, elem);
} else {
node newnode{};
newnode.v = elem;
newnode.right = NULL;
newnode.left = NULL;
node *lsls;
lsls = &newnode;
root->right = lsls;
}
} else {
if (root->left != NULL) {
add(root->left, elem);
} else {
node newnode;
newnode.v = elem;
newnode.right = NULL;
newnode.left = NULL;
node *lsls;
lsls = &newnode;
root->left = lsls;
}
}
}
int main() {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);
cout.tie(nullptr);
cin >> n;
for (int i = 0; i < n; ++i) {
cin >> a[i];
}
qwe.v = a[n - 1];
qwe.left = NULL;
qwe.right = NULL;
node *pointer;
pointer = &qwe;
for (int i = n - 2; i > -1; --i) {
add(pointer, a[i]);
}
pointer = &qwe;
return 0;
}

fix segmentation fault in trie c++

I'm using a trie implementation to store and search words in c++ programming language. In using the search() function, I am getting a segmentation fault when searching for a specific word. It seems that the error occurred in checking if the struct is null.
here is the error message:
Program received signal SIGSEGV, Segmentation fault.
0x000055555555b2ff in search (this=0x55555577ee70,
wordlist=0x55555577ef00, word="a1g6os") at test.cc:30
if (!pCrawl->children[index])
here is the source code:
#include <bits/stdc++.h>
using namespace std;
const int ALPHABET_SIZE = 26;
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}
Both #Some programmer dude and #JohnnyJohansson have pointed out the root cause. The live test showed where the code read the array out-of-bounds. Actually the fix is easy once you understand what happens. The following is the fixed code if you cannot figure it out by yourself. A live test of it is here cee.studio
#include<iostream>
using namespace std;
const int ALPHABET_SIZE = 75; // increase the range
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}

Trie C++ Strings not completely filling all the values in a text file

i'm trying to sort a large amount of strings alphabetically and by length and it seems i'm only sorting around 1/7 of the amount needed. I am trying to sort around 100,000 words, when I sorted 70000 I ended up with 8000 words sorted and can't seem to find why it isn't working. Any help would be much appreciated
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
using namespace std;
class Node
{
public:
char value; // the character value
bool end; // indicates whether this node completes a word
Node * children[93]; // represents the 93 ascii values for 33-126
Node(char newChar);
~Node();
};
class Trie
{
public:
Trie();
~Trie();
void addWord(string word);
Node * getRoot();
private:
Node * root;
};
Node::Node(char newChar)
{
value = newChar;
for (int i = 0; i < 93; ++i)
children[i] = NULL;
}
Node::~Node()
{
delete[] children;
}
Trie::Trie()
{
root = new Node(' ');
root->end = true;
}
Trie::~Trie()
{
delete root;
}
Node * Trie::getRoot()
{
return root;
}
void Trie::addWord(string word)
{
Node * currentNode = root;
for (int i = 0; i < word.size(); ++i)
{
char currentChar = word.at(i);
int index = currentChar - '!';
if (currentNode->children[index] != NULL)
{
currentNode = currentNode->children[index];
}
else
{
Node * newNode = new Node(currentChar);
currentNode->children[index] = newNode;
currentNode = newNode;
}
if (i == word.size() - 1)
{
currentNode->end = true;
}
}
}
void alphabetize(Node * node, vector<string> & sorting, string prefix = "") //why dont i have to declare this?
{
if (node->end)
{
sorting.push_back(prefix);
}
for (int i = 0; i < 93; ++i)
{
if (node->children[i] != NULL)
{
string currentString = prefix + node->children[i]->value; //store all characters
alphabetize(node->children[i], sorting, currentString);
}
else
{
}
}
}
int main()
{
Trie * t = new Trie();
string tempS;
int lengthCounter = 0;
ifstream fin;
fin.open("test.txt");
vector< vector<string> > sortLength;
vector <string> row(0, "");
vector<string> sorted;
while(fin >> tempS)
{
while(tempS.length() > lengthCounter)
{
sortLength.push_back(row);
lengthCounter++;
}
t->addWord(tempS);
}
alphabetize(t->getRoot(),sorted); //filled with sorted vector
for(int i = 0; i < sorted.size(); i++)
{
sortLength[sorted[i].length()-1].push_back(sorted[i]);
}
for(int k = 0; k < sortLength.size(); k++)
{
for(int l = 0; l < sortLength[k].size(); l++)
{
cout << sortLength[k][l] << "\n";
}
}
cout << sorted.size();
return 0;
}

Searching words in a very big file

Given a large file, we need to store the words so that searching of a word can be done in constant time. Also how will we find the 10% most frequently occurring words in the file?
What I have achieved so far is searching the word through trie implementation.
Please suggest some way to find the 10% most frequent words.
#include<iostream>
#include<cstdio>
using namespace std;
class Node
{
public:
char value;
Node* right;
Node* down;
Node()
{
right=down=NULL;
}
};
class Trie
{
public:
Node* head;
Trie()
{
head=NULL;
}
void insert(string s);
void search(string s);
};
void Trie::insert(string s)
{
if(head==NULL)
{
Node* f=new Node();
head=f;
Node* temp=f;
f->value=s[0];
for(int i=1;i<s.length();i++)
{
Node* n=new Node();
n->value=s[i];
temp->down=n;
temp=n;
if(i==s.length()-1)
n->down=NULL;
}
}
else
{
Node* ptr=head;
int i=0;
while(1)
{
if(i==s.length())break;
if(ptr->value==s[i])
{
i++;
if(ptr->down)
ptr=ptr->down;
else
{
Node* temp=new Node();
ptr->down=temp;
temp->value=s[i];
ptr=temp;
}
}
else if(ptr->value!=s[i])
{
if(ptr->right)
ptr=ptr->right;
else
{
Node*temp=new Node();
ptr->right=temp;
temp->value=s[i];
ptr=temp;
}
}
}
}
}
void Trie::search(string s)
{
Node* ptr=head;
int i=0;
while(1)
{
if(ptr->value==s[i])
{
//cout<<ptr->value<<endl;
ptr=ptr->down;
i++;
}
else if(ptr->value!=s[i])
{
ptr=ptr->right;
}
if(ptr==NULL)break;
}
if(i==s.length()+1)cout<<"String found\n";
else cout<<"String not found\n";
}
int main()
{
Trie t;
FILE* input;
char s[100];
input=fopen("big.txt","r");
int i=0;
while( (fgets(s,sizeof(s),input) ) !=NULL)
{
int i=0; int j=0;
char str[47];
while(s[i]!='\0')
{
if(s[i]==' ' || s[i+1]=='\0')
{
str[j]='\0';
j=0;
t.insert(str);
i++;
continue;
}
str[j]=s[i];
j++;
i++;
}
}
t.search("Dates");
//t.search("multinational");
fclose(input);
}
A hash will let you look up the words in constant time.
You could probably use some kind of partitioning like that used in quicksort to find a word that occurs at least 10% out of the file.
The obvious solution is to store the contents of the file in some appropriate STL container such as std::set and then run find() on that container.
If you insist on doing this manually, a binary tree will grow increasingly slower the more data you put inside it. Plus you must keep balancing it. A hash table with chaining would be a more efficient ADT for huge amounts of data.
If you use a tree you cannot get constant time. The binary tree you are building has a logarithmic time complexity.
If it is possible to build an index consider an inverted index. This would still not help you with the constant time (I don't see how you can achieve that anyway), but can help you figure what are the words used the most, since for every word it stores the positions in the file where the word is found. You can actually combine that into your tree.
Here is the similar c++ code using priority queue, map and trie.
for simplicity its reading from vector strings but can be easily modified to read words from file.
//Find the top K frequent words in a file or stream, C++
//This is a working solution for priority_queue for your reference.
#include <iostream>
#include <vector>
#include <queue>
#include <unordered_map>
using namespace std;
#define K_TH 3
class TrieNode;
typedef struct HeapNode
{
string word;
int frequency;
HeapNode(): frequency(0), word(""){} ;
TrieNode *trieNode;
}HeapNode;
class TrieNode
{
private:
int frequency = 0;
bool m_isLeaf = false;
string word = "";
unordered_map<char, TrieNode*> children;
HeapNode *heapNode = NULL;
public:
TrieNode() {}
TrieNode(char c)
{
children[c] = new TrieNode();
this->m_isLeaf = false;
}
void setWord(string word)
{
this->word = word;
}
string getWord()
{
return this->word;
}
bool isLeaf(void)
{
return this->m_isLeaf;
}
void setLeaf(bool leaf)
{
this->m_isLeaf = leaf;
}
TrieNode* getChild(char c)
{
if (children[c] != NULL)
return children[c];
return NULL;
}
void insert(char c)
{
children[c] = new TrieNode();
}
int getFrequency()
{
return this->frequency;
}
void setFrequency(int frequency)
{
this->frequency = frequency;
}
void setHeapNode(HeapNode *heapNode)
{
this->heapNode = heapNode;
}
HeapNode* getHeapNode()
{
return heapNode;
}
bool operator()(HeapNode* &a, HeapNode* &b)
{
return (a->frequency > b->frequency);
}
};
class Trie
{
private:
TrieNode *root = NULL;
public:
Trie()
{
if (!root)
{
this->root = new TrieNode();
}
}
TrieNode* insert(string word)
{
if (!root)
root = new TrieNode();
TrieNode* current = root;
int length = word.length();
//insert "abc"
for(int i = 0; i < length; ++i)
{
if (current->getChild(word.at(i)) == NULL)
{
current->insert(word.at(i));
}
current = current->getChild(word.at(i));
}
current->setLeaf(true);
current->setWord(word);
current->setFrequency(current->getFrequency() + 1);
return current;
}
};
struct cmp
{
bool operator()(HeapNode* &a, HeapNode* &b)
{
return (a->frequency > b->frequency);
}
};
typedef priority_queue<HeapNode*, vector<HeapNode*>, cmp > MinHeap;
void insertUtils(Trie *root, MinHeap &pq, string word )
{
if (!root)
return;
TrieNode* current = root->insert(word);
HeapNode *heapNode = current->getHeapNode();
if(heapNode)// if word already present in heap
{
heapNode->frequency += 1;
}else if (pq.empty() || pq.size() < K_TH)
{// if word not present in heap and heap is not full;
heapNode = new HeapNode();
heapNode->word = word;
heapNode->frequency = 1;
heapNode->trieNode = current;
current->setHeapNode(heapNode);
pq.push(heapNode);
}else if (pq.top()->frequency < current->getFrequency())
{ // if word is not present and heap is full;
HeapNode *temp = pq.top();
//remove first element and add current word
pq.pop();
delete temp;
heapNode = new HeapNode();
current->setHeapNode(heapNode);
pq.push(heapNode);
}
}
void printKMostFrequentWords(vector<std::string> input)
{
Trie *root = new Trie();
MinHeap minHeap;
for (vector<string>::iterator it = input.begin(); it != input.end(); ++it)
{
insertUtils(root, minHeap, *it);
}
while(!minHeap.empty())
{
HeapNode *heapNode = minHeap.top();
cout << heapNode->word << ":" << heapNode->frequency << endl;
minHeap.pop();
}
}
int main() {
vector<std::string>input( {
"abc", "def", "ghi",
"jkl", "abc", "def",
"mno", "xyz", "abc"
} ) ;
printKMostFrequentWords(input);
}