Aho-Corasick Algo in c/c++ for hex - c++

my problem - i am trying to use aho-corasick algo that i found for c++ while searching net, it currently only search for char based string, i want it to modify it to search for hex based string of varied character. any help to improve the code is greatly appreciated. if i just modify my string text it goes into an ifinite loop.
int buildMatchingMachine(const vector<string> &words, char lowestChar = 'a', char highestChar = 'z')
{
memset(out, 0, sizeof out);
memset(f, -1, sizeof f);
memset(g, -1, sizeof g);
int states = 1; // Initially, we just have the 0 state
for (int i = 0; i < words.size(); ++i)
{
const string &keyword = words[i];
int currentState = 0;
for (int j = 0; j < keyword.size(); ++j)
{
int c = keyword[j] - lowestChar;
if (g[currentState][c] == -1)
{ // Allocate a new node
g[currentState][c] = states++;
}
currentState = g[currentState][c];
}
out[currentState] |= (1 << i); // There's a match of keywords[i] at node currentState.
}
// State 0 should have an outgoing edge for all characters.
for (int c = 0; c < MAXC; ++c)
{
if (g[0][c] == -1)
{
g[0][c] = 0;
}
}
// Now, let's build the failure function
queue<int> q;
for (int c = 0; c <= highestChar - lowestChar; ++c)
{ // Iterate over every possible input
// All nodes s of depth 1 have f[s] = 0
if (g[0][c] != -1 && g[0][c] != 0)
{
f[g[0][c]] = 0;
q.push(g[0][c]);
}
}
while (q.size())
{
int state = q.front();
q.pop();
for (int c = 0; c <= highestChar - lowestChar; ++c)
{
if (g[state][c] != -1)
{
int failure = f[state];
while (g[failure][c] == -1)
{
failure = f[failure];
}
failure = g[failure][c];
f[g[state][c]] = failure;
out[g[state][c]] |= out[failure]; // Merge out values
q.push(g[state][c]);
}
}
}
return states;
}
int openFile::findNextState(int currentState, char nextInput, char lowestChar = 'a')
{
int answer = currentState;
int c = nextInput - lowestChar;
while (g[answer][c] == -1)
answer = f[answer];
return g[answer][c];
}

I have found a working solution, you just need to redefine the lowest char and highest char of hex based symbols to its corresponding ascii value not int value also change the MAXS and MAXC to suitable number, now the code works for hex based values.

Related

leetcode problem 1255 Maximum Score Words Formed by Letters

Given a list of words, a list of single letters (might be repeating), and score of every character.
Return the maximum score of any valid set of words formed by using the given letters (words[i] cannot be used two or more times).
It is not necessary to use all characters in letters and each letter can only be used once. Score of letters 'a', 'b', 'c', ... ,'z' is given by score[0], score[1], ... , score[25] respectively
My approach is
finding score of a word
then can that word be formed or not
and finally, get the result code is given below
class Solution {
public:
// score finder func
int scoreFinder(vector<int>& score , string s){
int ans = 0;
for(int i = 0; i < s.size(); i++){
char ch = s[i];
ans += score[ch -'a'];
}
return ans;
}
// word can be formed or not
bool canFormed(string s , unordered_map<char,int>& myMap){
for(int i = 0; i < s.size(); i++){
if(myMap.count(s[i]) <= 0){
return false;
break;
}else{
myMap[s[i]]--;
}
}
return true;
}
int maxScoreWords(vector<string>& words, vector<char>& letters, vector<int>& score){
// freq Count of letters
/* unordered_map<char,int> map;
for(int i = 0; i < letters.size(); i++){
map[letters[i]]++;
}*/
int result = 0; // final score is stored in it
int idx = 0;
while(idx < words.size()){
// creating new map every time so that check for all possible words combinations
unordered_map<char,int> myMap;
for(int j = 0; j < letters.size(); j++){
myMap[letters[j]] ++; //= map[letters[j]];
}
int tempResult = 0;
for(int i = idx; i < words.size(); i++){
string temp = words[i];
if(canFormed(temp , myMap)){
tempResult += scoreFinder(score , temp);
}
}
result = max(result , tempResult);
idx++;
}
return result;
}
};
Input:
words = ["dog","cat","dad","good"],
letters = ["a","a","c","d","d","d","g","o","o"],
score = [1,0,9,5,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0]
Output:
23
but I am getting the wrong output and I am unable to find the error in my code, my output is 33 for this testcase.
There is a minor bug in your code.
In the function canFormed you decrement the counter als in the case where it is 0 already. We could simply add an additional if-statement or rewrite the whole condition.
Please see one possible solution below:
// word can be formed or not
bool canFormed(string s, unordered_map<char, int>& myMap) {
for (int i = 0; i < s.size(); i++) {
if (myMap.count(s[i]) <= 0) {
return false;
}
else {
if (myMap[s[i]] > 0) {
myMap[s[i]]--;
}
else return false;
}
}
return true;
}
Then it should work.
Solved with backtracking in JAVA
public int maxScoreWords(String[] words, char[] letters, int[] score) {
int[] counts = new int[26];
for (char letter : letters) {
counts[letter - 'a']++;
}
return wordHelper(words, counts, score, 0);
}
static int wordHelper(String[] words, int[] counts, int[] score, int index) {
// base case
if (index > words.length - 1) {
return 0;
}
// recursive case - exclude
int excludedSum = wordHelper(words, counts, score, index + 1);
// recursive case - include
int includedSum = 0;
boolean recursionCall = true;
int wordScore = 0;
for (char c : words[index].toCharArray()) {
counts[c - 'a']--;
if (counts[c - 'a'] < 0) {
recursionCall = false;
}
wordScore += score[c - 'a'];
}
if (recursionCall) {
includedSum = wordScore + wordHelper(words, counts, score, index + 1);
}
for (char c : words[index].toCharArray()) {
counts[c - 'a']++;
}
return Math.max(excludedSum, includedSum);
}

Read a char array and modify it

So for this program the user gives a text and it must print the text back modifying in these ways by adding and removing space between words:
Each line should contain 60 characters and be aligned left and right
In each line the biggest space between two words must be bigger than the smallest by only one character and a bigger space must be righter than a smaller one. This is what I have managed to do yet, but without any success.
I tried C++ strings instead of a char array but my knowledge about them is limited yet.
#include <iostream>
using namespace std;
bool isletter(char c) {
return c >= 'a' and c <= 'z' or c >= 'A' and c <= 'Z';
}
int main() {
int c, i = 0, constant = 0, counter = 0;
char text[1500], original[1500];
do {
c = getchar();
original[i] = c;
i++;
} while (c != EOF);
for (int j = 0; j <= i; j++) {
if (original[j] == ' ') {
int n = 0;
j = constant;
while (not isletter(original[j])) {
n++;
text[constant] = original[j];
j = j + n;
}
counter++;
} else if (text[j] == '\n') {
text[j] = ' ';
counter++;
} else {
text[j] = original[j];
counter++;
}
}
for (int i = 0; i < counter; i++) {
cout << text[i];
}
}

Huffman coding c++

So I am working on Huffman coding for a project. However, my code just doesn't work. When i ran it on visual studio, it didn't give me an error. What I was trying to do is to read a file and put all of them into a string. And get the frequency for each character in that string. But I think when the file got a little bit large, it seems like my code is running in a infinite loop. Can anyone explain anything to me? By the way, I had a sorted function that I used to sort a vector of node* by their frequency.
ifstream infile;
infile.open(filename);
string q;
string line;
while (getline(infile, line))
{
q += line;
}
char y;
int count = 0;
int check = 0;
for (int i = 0; i < q.size(); i++) //if the string gets big, it seems to become an infinite loop in here
{
y = q[i];
for (int x = i - 1; x > 0; x--) //make sure not counting the same char
{
if (y == q[x])
{
check++;
}
}
if (check == 0)
{
for (int i = 0; i < q.size(); i++)
{
if (q[i] == y)
{
count++;
}
}
node*x = new node;
x->char1 = y; //my node have char
x->freq = count; //my node has frequency
list1.push_back(x);
}
count = 0;
check = 0;
}
sort(list1.begin(), list1.end(), sorter); //sort them from small to big
while (list1.size() > 1)
{
node*left = list1[0];
node*right = list1[1];
list1.erase(list1.begin(), list1.begin() + 2);
double sum = left->freq + right->freq;
node* x = new node;
x->freq = sum;
x->left = left;
x->right = right;
list1.push_back(x);
sort(list1.begin(), list1.end(), sorter);
}
list1.clear();
return true;
The following is my sort function
static struct {
bool operator()(NodeInterface* a, NodeInterface* b) {
if (a->getFrequency() == b->getFrequency()) {//if the frequencies are even,
if (b->getCharacter() == '\0') return false;
if (a->getCharacter() != '\0') {
return (int)a->getCharacter() < (int)b->getCharacter();
}
return false;
}
return a->getFrequency() < b->getFrequency();
}
} sorter;
I see two major problems.
You have a for loop inside a for loop both initializing and using int i
Change the variable name of the inner loop.
for (int i = 0; i < q.size(); i++) //if the string gets big, it seems to become an infinite loop in here
.
.
if (check == 0)
{
for (int i = 0; i < q.size(); i++) //Change this to int j for example
{
.
.
And the Sorter struct. I would rewrite it as this.
static struct {
bool operator()(NodeInterface* a, NodeInterface* b) {
if (a->getFrequency() == b->getFrequency()) {//if the frequencies are even,
if (b->getCharacter() == '\0') return false;
if (a->getCharacter() == '\0') return true;
return (int)a->getCharacter() < (int)b->getCharacter();
}
return a->getFrequency() < b->getFrequency();
}
} sorter;
A few suggestions for your for loop:
for (int i = 0; i < q.size(); i++) //if the string gets big, it seems to become an infinite loop in here
{
y = q[i];
//You can avoid this entire loop by using a structure like map
for (int x = i - 1; x > 0; x--) //make sure not counting the same char
{
if (y == q[x])
{
check++;
//break; //if you use a loop, break it once you find the character.
}
}
if (check == 0)
{
for (int j = 0; j < q.size(); j++)//Renamed variable + you can start this loop from j = i as you know there is no occurrence of y before that.
{
if (q[i] == y)
{
count++;
}
}
node*x = new node;
x->char1 = y; //my node have char
x->freq = count; //my node has frequency
list1.push_back(x);
}
count = 0;
check = 0;
}

Printing an integer value from an ASCII character

I am trying to implement the function stoi() in c++. I have made an int array arr to store the integer ASCII of all elements of char_arr. This works fine if I print the values from my char_arr array because its a character array. But, how do I transfer my integer values from the char array to an int array and print only the numbers and not their ASCII?
Code:
int stoi(){
int *arr = new int [strlen(char_arr)];
for (int i=0; char_arr[i]!='\0'; ++i){
arr[i] = char_arr[i];
}
for (int i=0; char_arr[i] != '\0'; ++i){
if (arr[i] >= 48 && arr[i] <= 57){
cout << char_arr[i];
}
}
}
First of all, remove the first loop and use char_arr directly. You don't need to hold ints to make it work.
As for printing int values, you can use this:
for (int i = 0; char_arr[i] != '\0'; ++i) {
if (char_arr[i] >= '0' && char_arr[i] <= '9') { //I would suggest you to use this syntax instead of raw ASCII codes.
cout << (char_arr[i] - '0');
}
}
int stoi(){
/* if you do not use arr.
int *arr = new int[strlen(char_arr)];
for (int i = 0; char_arr[i] != '\0'; ++i){
arr[i] = char_arr[i];
}
*/
int sign = 1, value = 0;
if (*char_arr == '+') {
++char_arr;
}
else if (*char_arr == '-') {
++char_arr;
sign = -1;
}
while (*char_arr) {
if (*char_arr >= '0' && *char_arr <= '9') {
value = value * 10 + *char_arr - '0';
++char_arr;
} else {
break;
}
}
return sign * value;
}
Here's the one I came up with:
#include <cstdio>
#include <cstring>
#define _BASE_ 10
int main(int argc, char **argv)
{
char ascii[] = "474927";
signed int value = 0;
signed int ascii_len = strlen(ascii);
int pos = 0;
for(signed int i = ascii_len-1; i >= 0; i--)
{
if(i == 0 && ascii[i] == '-')
{
value *= -1;
continue;
}
int base = 1;
if(pos > 0)
{
base = _BASE_;
for(int j = 1; j < pos; j++)
base *= _BASE_;
}
value += base * (ascii[i] - 48);
pos++;
}
printf("Value: %d\n", value);
return 0;
}

Trouble with finding a c-string substring in C++

long time lurker, first time poster. I have been working on this problem for the last six hours hours.
Problem:
Implement the following functions. Each function deals with null terminated C-Style strings. You can assume that any char array passed into the functions will contain null terminated data. Place all of the functions in a single file and then create a main() function that tests the functions thoroughly.
Note: You may not use any c-string functions other than strlen().
I am having trouble with the fourth function.
The desired behavior is: This function returns the index in string s where the substring can first be found. For example if s is "Skyscraper" and substring is "ysc" the function would return 2. It should return -1 if the substring does not appear in the string.
prototype:
int findSubstring(char *str, char substring[])
Here's my two starts for function definitions, I'm not really sure if either is going in the right direction, I'm having a lot of trouble keeping the loop iterations in my head, any help would be TREMENDOUSLY appreciated.
int findSubstring(char *str, char substring[]){
int subS = -1, index1 = 0, index2 = 0;
int length1 = (strlen(str) - 1);
int length2 = (strlen(substring) - 1);
if(length1 > length2){
for(int i = 0; i <= length2; i++){
for(int j = 0; j <= length1; j++){
if(*(substring + i) == *(str + j) && *(substring +i) != '\0' ){
i++;
if(index1 == 0){
index1 = i;
}
}
if( *(substring + i) == '\0'){
subS = i + 2;
}
}
}
}
if (length1 < length2){
cout << "Invalid, substring exceeds size of string!" << endl;
}
return subS;
}
int findSubstring(char *str, char substring[]){
int index = -1;
int lengthStr = (strlen(str) - 1);
int lengthSub = (strlen(substring) - 1);
if (lengthStr < lengthSub){
cout << "Invalid input, substring exceeds size of string!" << endl;
}
if( lengthSub == 0){
cout << "";
}
if (lengthStr > lengthSub){
for(int i = 0; i <= lengthSub; i++){
for(int j = 0; j <= lengthStr; j++){
}
return index;
}
//You can replace my str.size() and subString.size() by the size of each c-string.
int stringPointerOperation( string str, string subString )
{
int pos=0;
bool notFound;
for(int i = 0; i < str.size() ; i++)
{
notFound= false;
if(str[i] == subString[0])
{
pos=i;
for(int k = 0 ; k < subString.size() && k < str.size() ; k++,i++)
{
if(subString[k] != str[i] )
{
notFound=true;
break;
}
}
}
}
if(notFound)
return -1;
else
return pos;
}
You are using the wrong strategy for finding a sub-string in a string. The outer for loop needs to iterate over the main string and the inner for loop needs to iterate over the sub-string.
Say you are looking for "de" in "abcdef". The strategy that I find easier to understand and implement is:
Can I find "de" starting from 0 of "abcdef". No, I can't.
Can I find "de" starting from 1 of "abcdef". No, I can't.
Can I find "de" starting from 2 of "abcdef". No, I can't.
Can I find "de" starting from 3 of "abcdef". Yes, I can. Return 3.
Here's a version that works for me.
int findSubstring(char *str, char substring[]){
int i;
int j;
int length1 = strlen(str);
int length2 = strlen(substring);
if(length1 < length2){
std::cout << "Invalid, substring exceeds size of string!" << std::endl;
return -1;
}
for(i = 0; i < length1; i++){
for(j = 0; j < length2; j++){
// The index to use access the element of str
// needs to be offset by i.
if( str[i+j] != substring[j] )
{
break;
}
}
if ( j == length2 )
{
return i;
}
}
return -1;
}