Wrong Answer for SPOJ PHONELST - c++

Here is the link to the problem: http://www.spoj.com/problems/PHONELST/
The judge gives wrong answer around the second set of test cases. Here is my code for the problem, please help me out.Thanks in advance.
#include<iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include<vector>
using namespace std;
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
// Alphabet size (# of symbols)
#define ALPHABET_SIZE (10)
// Converts key current character into index
// use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'0')
// trie node
struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];
// isLeaf is true if the node represents
// end of a word
bool isLeaf;
};
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
struct TrieNode *pNode = NULL;
pNode = (struct TrieNode *)malloc(sizeof(struct TrieNode));
if (pNode)
{
int i;
pNode->isLeaf = false;
for (i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
}
return pNode;
}
// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
bool insert(struct TrieNode *root, string key)
{
int level;
int length = key.length();
int index;
struct TrieNode *pCrawl = root;
for (level = 0; level < length; level++)
{
index = CHAR_TO_INDEX(key[level]);
if(pCrawl->isLeaf)
{
return 0;
}
else if (!pCrawl->children[index])
{
pCrawl->children[index] = getNode();
}
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isLeaf = true;
return 1;
}
int main()
{
int t;
cin>>t;
while(t--)
{
int n;
cin>>n;
struct TrieNode *root = getNode();
vector<string>v;
bool ok=1;
string keys;
for(int z=0;z<n;z++)
{
cin>>keys;
v.push_back(keys);
}
for(int z=0;z<n&&ok;++z)
{
ok=insert(root,v[z]);
}
if(ok)
cout<<"YES"<<endl;
else
cout<<"NO"<<endl;
}
return 0;
}

After inserting into the vector all the phone numbers, the vector needs to be sorted. The reason is that if insertion is done without sorting the array, for the test case below the code gives wrong answer.
2
91190
911
The judge accepts the solution after the change mentioned above is made.

Related

TRIE data structure implementation in c++

I have written a simple code to implement a trie data structure in c++. But when I run this program, it gives segmentation error as an output.
Kindly please correct me, where i have been wrong.
#include <bits/stdc++.h>
using namespace std;
struct trienode {
struct trienode * child[26];
bool isEnd;
trienode()
{
isEnd = false;
for(int i = 0; i < 26; i++)
{
child[i] = NULL;
}
}
};
struct trienode * root;
void insert_str(string &s, int n)
{
trienode * curr = root;
int i;
for(i = 0; i < n; i++)
{
int index = s[i] - 'a';
if(curr -> child[index] == NULL)
{
curr -> child[index] = new trienode();
}
else
{
curr = curr -> child[index];
}
}
curr -> isEnd = true;
}
int main()
{
string s1 = "yash";
insert_str(s1, 4);
}
You haven't allocated any memory for your root node.
Normally you would have a separate class to handle the trie as a whole. It can then allocate the root node.
class trie
{
public:
trie()
{
root = new trienode();
}
void insert_str(string &s, int n)
{
...
}
private:
trienode* root;
};
int main()
{
trie t;
string s1 = "yash";
t.insert_str(s1, 4);
}

C++ Implement the Node Count in Trie

I have below code and i need help to do node count to below codes! Anyone can help me to write that function?
I have already words count, but need s help in the Node count!
// C++ implementation to count words in a trie
#include <bits/stdc++.h>
using namespace std;
#define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])
// Alphabet size (# of symbols)
#define ALPHABET_SIZE (26)
// Converts key current character into index
// use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
// Trie node
struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];
// isLeaf is true if the node represents
// end of a word
bool isLeaf;
};
// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
struct TrieNode *pNode = new TrieNode;
pNode->isLeaf = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
// If not present, inserts key into trie
// If the key is prefix of trie node, just
// marks leaf node
void insert(struct TrieNode *root, const char *key)
{
int length = strlen(key);
struct TrieNode *pCrawl = root;
for (int level = 0; level < length; level++)
{
int index = CHAR_TO_INDEX(key[level]);
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isLeaf = true;
}
// Function to count number of words
int wordCount(struct TrieNode *root)
{
int result = 0;
// Leaf denotes end of a word
if (root -> isLeaf)
result++;
for (int i = 0; i < ALPHABET_SIZE; i++)
if (root -> children[i])
result += wordCount(root -> children[i]);
return result;
}
// Driver
int main()
{
// Input keys (use only 'a' through 'z'
// and lower case)
char keys[][8] = {"the", "a", "there", "answer",
"any", "by", "bye", "their"};
struct TrieNode *root = getNode();
// Construct Trie
for (int i = 0; i < ARRAY_SIZE(keys); i++)
insert(root, keys[i]);
cout << wordCount(root);
return 0;
}
You can do a simple inorder traversal of the tree.
int inorderTraversal(TrieNode* pNode)
{
if (!pNode)
return 0;
int count = 0;
for (int i = 0; i < ALPHABET_SIZE; ++i)
count += inorderTraversal(pNode->children[i]);
return count + 1;
}

fix segmentation fault in trie c++

I'm using a trie implementation to store and search words in c++ programming language. In using the search() function, I am getting a segmentation fault when searching for a specific word. It seems that the error occurred in checking if the struct is null.
here is the error message:
Program received signal SIGSEGV, Segmentation fault.
0x000055555555b2ff in search (this=0x55555577ee70,
wordlist=0x55555577ef00, word="a1g6os") at test.cc:30
if (!pCrawl->children[index])
here is the source code:
#include <bits/stdc++.h>
using namespace std;
const int ALPHABET_SIZE = 26;
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - 'a';
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}
Both #Some programmer dude and #JohnnyJohansson have pointed out the root cause. The live test showed where the code read the array out-of-bounds. Actually the fix is easy once you understand what happens. The following is the fixed code if you cannot figure it out by yourself. A live test of it is here cee.studio
#include<iostream>
using namespace std;
const int ALPHABET_SIZE = 75; // increase the range
struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
bool isEndOfWord;
};
struct TrieNode *getNode(void) {
struct TrieNode *pNode = new TrieNode;
pNode->isEndOfWord = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
return pNode;
}
void insert(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
pCrawl->children[index] = getNode();
pCrawl = pCrawl->children[index];
}
// mark last node as leaf
pCrawl->isEndOfWord = true;
}
// Returns true if key presents in trie, else
// false
bool search(struct TrieNode *root, string key) {
struct TrieNode *pCrawl = root;
for (int i = 0; i < key.length(); i++) {
int index = key[i] - '0'; // lower the low bound
if (!pCrawl->children[index])
return false;
pCrawl = pCrawl->children[index];
}
return (pCrawl != NULL && pCrawl->isEndOfWord);
}
int main() {
string keys[] = {"the", "a", "there",
"answer", "any", "by",
"bye", "their" };
int n = sizeof(keys)/sizeof(keys[0]);
struct TrieNode *root = getNode();
for (int i = 0; i < n; i++)
insert(root, keys[i]);
// Search for different keys
search(root, "a1g6os")? cout << "Yes\n" :
cout << "No\n";
return 0;
}

Using an array of struct counting the number of occurrence of a word in a text file C++

Hi everyone this is my first time in Stackoverflow. I have a question regarding counting the occurrence of words in text file using C++. This is my code so far. I have to create an array struct of index of the word and the counter of each word then store all of them in an AVL tree. After opening the file and read a word, I look for it in the avl tree or trie. If it is there, use the node's index to increment the word's Cnt. If it is not there, add it to the word array and put its position in the next struct and put the structs position in the avl tree. Also I set the struct Cnt to 1. The problem I am having now is it seems like my program doesn't process the counting properly therefore it only prints out 0. Please give me recommendation on how I can fix the bug. Please find my code below:
#include <iostream>
#include <fstream>
#include <string>
#include <cstdlib>
#include <cstring>
#include <ctype.h>
#include <stdio.h>
#include <string>
#include <cctype>
#include <stdlib.h>
#include <stdbool.h>
using namespace std;
struct Node* insert(struct Node* node, int key) ;
void preOrder(struct Node *root) ;
void removePunct(char str[]);
int compareWord(char word1[], char word2[] );
struct Stats {
int wordPos, wordCnt;
};
Stats record[50000];
int indexRec = 0;
char word[50000*10] ;
int indexWord = 0;
int main() {
ifstream fin;
string fname;
char line[200], wordArray[500000];
cout << "Enter the text file name:" << endl;
cin >> fname;
fin.open(fname.c_str());
if (!fin) {
cerr << "Unable to open file" << endl;
exit(1);
}
struct Node *root = NULL;
while (!fin.eof() && fin >> line) { //use getline
for(int n=0,m=0; m!=strlen(line); m+=n) {
sscanf(&line[m],"%s%n",word,&n);
removePunct(word);
//strcpy(&wordArray[indexWord],word);
int flag = compareWord(wordArray, word);
if(flag==-1) {
strcpy(&wordArray[indexWord],word);
record[indexRec].wordPos = indexWord;
record[indexRec].wordCnt = 1;
root = insert(root, record[indexRec].wordPos);
indexWord+=strlen(word)+1;
// indexes of the word array
indexRec++;
cout << wordArray[indexWord] << " ";
} else
record[flag].wordCnt++;
cout << record[indexRec].wordCnt;
cout << endl;
}
/*for(int x = 0; x <= i; x++)
{
cout << record[x].wordPos << record[x].wordCnt << endl;
}*/
}
fin.close();
return 0;
}
void removePunct(char str[]) {
char *p;
int bad = 0;
int cur = 0;
while (str[cur] != '\0') {
if (bad < cur && !ispunct(str[cur]) && !isspace(str[cur])) {
str[bad] = str[cur];
}
if (ispunct(str[cur]) || isspace(str[cur])) {
cur++;
} else {
cur++;
bad++;
}
}
str[bad] = '\0';
for (p= str; *p!= '\0'; ++p) {
*p= tolower(*p);
}
return;
}
int compareWord(char word1[], char word2[] ) {
int x = strcmp(word1, word2);
if (x == 0 ) return x++;
if (x != 0) return -1;
}
struct Node {
int key;
struct Node *left;
struct Node *right;
int height;
};
// A utility function to get maximum of two integers
int max(int a, int b);
// A utility function to get height of the tree
int height(struct Node *N) {
if (N == NULL)
return 0;
return N->height;
}
// A utility function to get maximum of two integers
int max(int a, int b) {
return (a > b)? a : b;
}
/* Helper function that allocates a new node with the given key and
NULL left and right pointers. */
struct Node* newNode(int key) {
struct Node* node = (struct Node*)
malloc(sizeof(struct Node));
node->key = key;
node->left = NULL;
node->right = NULL;
node->height = 1; // new node is initially added at leaf
return(node);
}
// A utility function to right rotate subtree rooted with y
// See the diagram given above.
struct Node *rightRotate(struct Node *y) {
struct Node *x = y->left;
struct Node *T2 = x->right;
// Perform rotation
x->right = y;
y->left = T2;
// Update heights
y->height = max(height(y->left), height(y->right))+1;
x->height = max(height(x->left), height(x->right))+1;
// Return new root
return x;
}
// A utility function to left rotate subtree rooted with x
// See the diagram given above.
struct Node *leftRotate(struct Node *x) {
struct Node *y = x->right;
struct Node *T2 = y->left;
// Perform rotation
y->left = x;
x->right = T2;
// Update heights
x->height = max(height(x->left), height(x->right))+1;
y->height = max(height(y->left), height(y->right))+1;
// Return new root
return y;
}
// Get Balance factor of node N
int getBalance(struct Node *N) {
if (N == NULL)
return 0;
return height(N->left) - height(N->right);
}
// Recursive function to insert key in subtree rooted
// with node and returns new root of subtree.
struct Node* insert(struct Node* node, int key) {
/* 1. Perform the normal BST insertion */
if (node == NULL)
return(newNode(key));
if (key < node->key)
node->left = insert(node->left, key);
else if (key > node->key)
node->right = insert(node->right, key);
else // Equal keys are not allowed in BST
return node;
/* 2. Update height of this ancestor node */
node->height = 1 + max(height(node->left),
height(node->right));
/* 3. Get the balance factor of this ancestor
node to check whether this node became
unbalanced */
int balance = getBalance(node);
// If this node becomes unbalanced, then
// there are 4 cases
// Left Left Case
if (balance > 1 && key < node->left->key)
return rightRotate(node);
// Right Right Case
if (balance < -1 && key > node->right->key)
return leftRotate(node);
// Left Right Case
if (balance > 1 && key > node->left->key) {
node->left = leftRotate(node->left);
return rightRotate(node);
}
// Right Left Case
if (balance < -1 && key < node->right->key) {
node->right = rightRotate(node->right);
return leftRotate(node);
}
/* return the (unchanged) node pointer */
return node;
}
void preOrder(struct Node *root) {
if(root != NULL) {
printf("%d ", root->key);
preOrder(root->left);
preOrder(root->right);
}
}
One problem (I cannot see if this is the only problem) is that you have code like this, deleting all the intermediate lines:
record[indexRec].wordCnt = 1;
if find word fails
indexRec++;
cout << record[indexRec].wordCnt;
So when you have a new word (if I understand the code correctly!) you are printing out the next record. One fix would be:
if (flag==-1)
cout << record[indexRec-1].wordCnt;
else
cout << record[indexRec].wordCnt;
There's a lot of other issues, like compareWord() is very wrong, you should decide if you really want to use C++ or just C with std::cout, the file reading code is odd, you're including both C and C++ versions of standard headers, etc, but these are issues for another question!

Searching words in a very big file

Given a large file, we need to store the words so that searching of a word can be done in constant time. Also how will we find the 10% most frequently occurring words in the file?
What I have achieved so far is searching the word through trie implementation.
Please suggest some way to find the 10% most frequent words.
#include<iostream>
#include<cstdio>
using namespace std;
class Node
{
public:
char value;
Node* right;
Node* down;
Node()
{
right=down=NULL;
}
};
class Trie
{
public:
Node* head;
Trie()
{
head=NULL;
}
void insert(string s);
void search(string s);
};
void Trie::insert(string s)
{
if(head==NULL)
{
Node* f=new Node();
head=f;
Node* temp=f;
f->value=s[0];
for(int i=1;i<s.length();i++)
{
Node* n=new Node();
n->value=s[i];
temp->down=n;
temp=n;
if(i==s.length()-1)
n->down=NULL;
}
}
else
{
Node* ptr=head;
int i=0;
while(1)
{
if(i==s.length())break;
if(ptr->value==s[i])
{
i++;
if(ptr->down)
ptr=ptr->down;
else
{
Node* temp=new Node();
ptr->down=temp;
temp->value=s[i];
ptr=temp;
}
}
else if(ptr->value!=s[i])
{
if(ptr->right)
ptr=ptr->right;
else
{
Node*temp=new Node();
ptr->right=temp;
temp->value=s[i];
ptr=temp;
}
}
}
}
}
void Trie::search(string s)
{
Node* ptr=head;
int i=0;
while(1)
{
if(ptr->value==s[i])
{
//cout<<ptr->value<<endl;
ptr=ptr->down;
i++;
}
else if(ptr->value!=s[i])
{
ptr=ptr->right;
}
if(ptr==NULL)break;
}
if(i==s.length()+1)cout<<"String found\n";
else cout<<"String not found\n";
}
int main()
{
Trie t;
FILE* input;
char s[100];
input=fopen("big.txt","r");
int i=0;
while( (fgets(s,sizeof(s),input) ) !=NULL)
{
int i=0; int j=0;
char str[47];
while(s[i]!='\0')
{
if(s[i]==' ' || s[i+1]=='\0')
{
str[j]='\0';
j=0;
t.insert(str);
i++;
continue;
}
str[j]=s[i];
j++;
i++;
}
}
t.search("Dates");
//t.search("multinational");
fclose(input);
}
A hash will let you look up the words in constant time.
You could probably use some kind of partitioning like that used in quicksort to find a word that occurs at least 10% out of the file.
The obvious solution is to store the contents of the file in some appropriate STL container such as std::set and then run find() on that container.
If you insist on doing this manually, a binary tree will grow increasingly slower the more data you put inside it. Plus you must keep balancing it. A hash table with chaining would be a more efficient ADT for huge amounts of data.
If you use a tree you cannot get constant time. The binary tree you are building has a logarithmic time complexity.
If it is possible to build an index consider an inverted index. This would still not help you with the constant time (I don't see how you can achieve that anyway), but can help you figure what are the words used the most, since for every word it stores the positions in the file where the word is found. You can actually combine that into your tree.
Here is the similar c++ code using priority queue, map and trie.
for simplicity its reading from vector strings but can be easily modified to read words from file.
//Find the top K frequent words in a file or stream, C++
//This is a working solution for priority_queue for your reference.
#include <iostream>
#include <vector>
#include <queue>
#include <unordered_map>
using namespace std;
#define K_TH 3
class TrieNode;
typedef struct HeapNode
{
string word;
int frequency;
HeapNode(): frequency(0), word(""){} ;
TrieNode *trieNode;
}HeapNode;
class TrieNode
{
private:
int frequency = 0;
bool m_isLeaf = false;
string word = "";
unordered_map<char, TrieNode*> children;
HeapNode *heapNode = NULL;
public:
TrieNode() {}
TrieNode(char c)
{
children[c] = new TrieNode();
this->m_isLeaf = false;
}
void setWord(string word)
{
this->word = word;
}
string getWord()
{
return this->word;
}
bool isLeaf(void)
{
return this->m_isLeaf;
}
void setLeaf(bool leaf)
{
this->m_isLeaf = leaf;
}
TrieNode* getChild(char c)
{
if (children[c] != NULL)
return children[c];
return NULL;
}
void insert(char c)
{
children[c] = new TrieNode();
}
int getFrequency()
{
return this->frequency;
}
void setFrequency(int frequency)
{
this->frequency = frequency;
}
void setHeapNode(HeapNode *heapNode)
{
this->heapNode = heapNode;
}
HeapNode* getHeapNode()
{
return heapNode;
}
bool operator()(HeapNode* &a, HeapNode* &b)
{
return (a->frequency > b->frequency);
}
};
class Trie
{
private:
TrieNode *root = NULL;
public:
Trie()
{
if (!root)
{
this->root = new TrieNode();
}
}
TrieNode* insert(string word)
{
if (!root)
root = new TrieNode();
TrieNode* current = root;
int length = word.length();
//insert "abc"
for(int i = 0; i < length; ++i)
{
if (current->getChild(word.at(i)) == NULL)
{
current->insert(word.at(i));
}
current = current->getChild(word.at(i));
}
current->setLeaf(true);
current->setWord(word);
current->setFrequency(current->getFrequency() + 1);
return current;
}
};
struct cmp
{
bool operator()(HeapNode* &a, HeapNode* &b)
{
return (a->frequency > b->frequency);
}
};
typedef priority_queue<HeapNode*, vector<HeapNode*>, cmp > MinHeap;
void insertUtils(Trie *root, MinHeap &pq, string word )
{
if (!root)
return;
TrieNode* current = root->insert(word);
HeapNode *heapNode = current->getHeapNode();
if(heapNode)// if word already present in heap
{
heapNode->frequency += 1;
}else if (pq.empty() || pq.size() < K_TH)
{// if word not present in heap and heap is not full;
heapNode = new HeapNode();
heapNode->word = word;
heapNode->frequency = 1;
heapNode->trieNode = current;
current->setHeapNode(heapNode);
pq.push(heapNode);
}else if (pq.top()->frequency < current->getFrequency())
{ // if word is not present and heap is full;
HeapNode *temp = pq.top();
//remove first element and add current word
pq.pop();
delete temp;
heapNode = new HeapNode();
current->setHeapNode(heapNode);
pq.push(heapNode);
}
}
void printKMostFrequentWords(vector<std::string> input)
{
Trie *root = new Trie();
MinHeap minHeap;
for (vector<string>::iterator it = input.begin(); it != input.end(); ++it)
{
insertUtils(root, minHeap, *it);
}
while(!minHeap.empty())
{
HeapNode *heapNode = minHeap.top();
cout << heapNode->word << ":" << heapNode->frequency << endl;
minHeap.pop();
}
}
int main() {
vector<std::string>input( {
"abc", "def", "ghi",
"jkl", "abc", "def",
"mno", "xyz", "abc"
} ) ;
printKMostFrequentWords(input);
}