Struct Node {
Node *N[SIZE];
int value;
};
struct Trie {
Node *root;
Node* findNode(Key *key) {
Node *C = &root;
char u;
while (1) {
u = key->next();
if (u < 0) return C;
// if (C->N[0] == C->N[0]); // this line will speed up execution significantly
C = C->N[u];
if (C == 0) return 0;
}
}
void addNode(Key *key, int value){...};
};
In this implementation of Prefix Tree (aka Trie) I found out that 90% of findNode() execution time is taken by a single operation C=C->N[u];
In my attempt to speed up this code, I randomly added the line that is commented in the snipped above, and code became 30% faster! Why is that?
UPDATE
Here is complete program.
#include "stdio.h"
#include "sys/time.h"
long time1000() {
timeval val;
gettimeofday(&val, 0);
val.tv_sec &= 0xffff;
return val.tv_sec * 1000 + val.tv_usec / 1000;
}
struct BitScanner {
void *p;
int count, pos;
BitScanner (void *p, int count) {
this->p = p;
this->count = count;
pos = 0;
}
int next() {
int bpos = pos >> 1;
if (bpos >= count) return -1;
unsigned char b = ((unsigned char*)p)[bpos];
if (pos++ & 1) return (b >>= 4);
return b & 0xf;
}
};
struct Node {
Node *N[16];
__int64_t value;
Node() : N(), value(-1) { }
};
struct Trie16 {
Node root;
bool add(void *key, int count, __int64_t value) {
Node *C = &root;
BitScanner B(key, count);
while (true) {
int u = B.next();
if (u < 0) {
if (C->value == -1) {
C->value = value;
return true; // value added
}
C->value = value;
return false; // value replaced
}
Node *Q = C->N[u];
if (Q) {
C = Q;
} else {
C = C->N[u] = new Node;
}
}
}
Node* findNode(void *key, int count) {
Node *C = &root;
BitScanner B(key, count);
while (true) {
char u = B.next();
if (u < 0) return C;
// if (C->N[0] == C->N[1]);
C = C->N[0+u];
if (C == 0) return 0;
}
}
};
int main() {
int T = time1000();
Trie16 trie;
__int64_t STEPS = 100000, STEP = 500000000, key;
key = 0;
for (int i = 0; i < STEPS; i++) {
key += STEP;
bool ok = trie.add(&key, 8, key+222);
}
printf("insert time:%i\n",time1000() - T); T = time1000();
int err = 0;
key = 0;
for (int i = 0; i < STEPS; i++) {
key += STEP;
Node *N = trie.findNode(&key, 8);
if (N==0 || N->value != key+222) err++;
}
printf("find time:%i\n",time1000() - T); T = time1000();
printf("errors:%i\n", err);
}
This is largely a guess but from what I read about CPU data prefetcher it would only prefetch if it sees multiple access to the same memory location and that access matches prefetch triggers, for example looks like scanning. In your case if there is only single access to C->N the prefetcher would not be interested, however if there are multiple and it can predict that the later access is further into the same bit of memory that can make it to prefetch more than one cache line.
If the above was happening then C->N[u] would not have to wait for memory to arrive from RAM therefore would be faster.
It looks like what you are doing is preventing processor stalls by delaying the execution of code until the data is available locally.
Doing it this way is very error prone unlikely to continue working consistently. The better way is to get the compiler to do this. By default most compilers generate code for a generic processor family. BUT if you look at the available flags you can usually find flags for specifying your specific processor so it can generate more specific code (like pre-fetches and stall code).
See: GCC: how is march different from mtune? the second answer goes into some detail: https://stackoverflow.com/a/23267520/14065
Since each write operation is costly than the read.
Here If you see that,
C = C->N[u]; it means CPU is executing write in each iteration for the variable C.
But when you perform if (C->N[0] == C->N[1]) dummy++; write on dummy is executed only if C->N[0] == C->N[1]. So you have save many write instructions of CPU by using if condition.
Related
The snippet below is not returning the correct text. The code takes in a pointer to the root node of a Huffman code tree and a binary text, which it then converts. However, every time it returns a single letter repeated.
string decode(Node *root, string code) {
string d = ""; char c; Node *node = root;
for (int i = 0; i < code.size(); i++) {
node = (code[i] == '0') ? node->left_child : node->right_child;
if ((c = node->value) < 128) {
d += c;
node = root;
}
}
return d;
}
The code for the Node object:
class Node {
public:
Node(int i, Node *l = nullptr, Node *r = nullptr) {
value = i;
left_child = l;
right_child = r;
}
int value;
Node *left_child;
Node *right_child;
};
The code for building the tree:
Node* buildTree(vector<int> in, vector<int> post, int in_left, int in_right, int *post_index) {
Node *node = new Node(post[*post_index]);
(*post_index)--;
if (in_left == in_right) {
return node;
}
int in_index;
for (int i = in_left; i <= in_right; i++) {
if (in[i] == node->value) {
in_index = i;
break;
}
}
node->right_child = buildTree(in, post, in_index + 1, in_right, post_index);
node->left_child = buildTree(in, post, in_left, in_index - 1, post_index);
return node;
}
Example tree:
130
/ \
129 65
/ \
66 128
/ \
76 77
Example I/O:
Input: 101010010111
Output: A�A�A��A�AAA
The diamond characters are the numbers greater than 128.
You are putting the value in a char, which for most C++ compilers is signed. But not all -- whether char is signed or unsigned is implementation defined. A signed char is in the range –128 to 127, so it is always less than 128. (Your compiler should have warned you about that.)
You need to use int c; instead of char c; in decode(), and do d += (char)c;. Then your first code snippet will correctly return ALABAMA.
By the way, there needs to be an error check in decode(), verifying that you exit the loop with node equal to root. Otherwise, there were some bits provided that ended in the middle of a code, and so were not decoded to a symbol.
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
When implementing a call stack trace for tracking allocation in my overridden new function, I am using ::malloc to create an untracked call stack object that is then put into a linked list. When my harness finishes new-ing off all of the test cases, the list is sound. However, when i go to report the list (print to console) there are now values that should not be there and are causing it to crash. Below is the simplified version (I apologize that even simplified it is still a lot of code), I am hoping someone can make since of this:
Macros
#define convertToKiB(size) size * 1024UL
#define convertToMiB(size) size * (1024UL * 1024UL)
#define convertToGiB(size) size * (1024UL * 1024UL * 1024UL)
#define convertToReadableBytes(size) ((uint32_t)size > convertToKiB(2) && (uint32_t)size < convertToMiB(2)) ? (float)size / (float)convertToKiB(1) : ((uint32_t)size > convertToMiB(2) && (uint32_t)size < convertToGiB(2)) ? (float)size / (float)convertToMiB(1) : ((uint32_t)size > convertToGiB(2)) ? (float)size / (float)convertToMiB(1) : (float)size
#define convertToReadableBytesString(size) ((uint32_t)size > convertToKiB(2) && (uint32_t)size < convertToMiB(2)) ? "KiB" : ((uint32_t)size > convertToMiB(2) && (uint32_t)size < convertToGiB(2)) ? "MiB" : ((uint32_t)size > convertToGiB(2)) ? "GiB" : "B"
Globals
const uint8_t MAX_FRAMES_PER_CALLSTACK = 128;
const uint16_t MAX_SYMBOL_NAME_LENGTH = 128;
const uint32_t MAX_FILENAME_LENGTH = 1024;
const uint16_t MAX_DEPTH = 128;
typedef BOOL(__stdcall *sym_initialize_t)(IN HANDLE hProcess, IN PSTR UserSearchPath, IN BOOL fInvadeProcess);
typedef BOOL(__stdcall *sym_cleanup_t)(IN HANDLE hProcess);
typedef BOOL(__stdcall *sym_from_addr_t)(IN HANDLE hProcess, IN DWORD64 Address, OUT PDWORD64 Displacement, OUT PSYMBOL_INFO Symbol);
typedef BOOL(__stdcall *sym_get_line_t)(IN HANDLE hProcess, IN DWORD64 dwAddr, OUT PDWORD pdwDisplacement, OUT PIMAGEHLP_LINE64 Symbol);
static HMODULE g_debug_help;
static HANDLE g_process;
static SYMBOL_INFO* g_symbol;
static sym_initialize_t g_sym_initialize;
static sym_cleanup_t g_sym_cleanup;
static sym_from_addr_t g_sym_from_addr;
static sym_get_line_t g_sym_get_line_from_addr_64;
static int g_callstack_count = 0;
static callstack_list* g_callstack_root = nullptr;
CallStack Object
struct callstack_line_t
{
char file_name[128];
char function_name[256];
uint32_t line;
uint32_t offset;
};
class CallStack
{
public:
CallStack();
uint32_t m_hash;
uint8_t m_frame_count;
void* m_frames[MAX_FRAMES_PER_CALLSTACK];
};
CallStack::CallStack()
: m_hash(0)
, m_frame_count(0) {}
bool CallstackSystemInit()
{
// Load the dll, similar to OpenGL function fecthing.
// This is where these functions will come from.
g_debug_help = LoadLibraryA("dbghelp.dll");
if (g_debug_help == nullptr) {
return false;
}
// Get pointers to the functions we want from the loded library.
g_sym_initialize = (sym_initialize_t)GetProcAddress(g_debug_help, "SymInitialize");
g_sym_cleanup = (sym_cleanup_t)GetProcAddress(g_debug_help, "SymCleanup");
g_sym_from_addr = (sym_from_addr_t)GetProcAddress(g_debug_help, "SymFromAddr");
g_sym_get_line_from_addr_64 = (sym_get_line_t)GetProcAddress(g_debug_help, "SymGetLineFromAddr64");
// Initialize the system using the current process [see MSDN for details]
g_process = ::GetCurrentProcess();
g_sym_initialize(g_process, NULL, TRUE);
// Preallocate some memory for loading symbol information.
g_symbol = (SYMBOL_INFO *) ::malloc(sizeof(SYMBOL_INFO) + (MAX_FILENAME_LENGTH * sizeof(char)));
g_symbol->MaxNameLen = MAX_FILENAME_LENGTH;
g_symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
return true;
}
void CallstackSystemDeinit()
{
// cleanup after ourselves
::free(g_symbol);
g_symbol = nullptr;
g_sym_cleanup(g_process);
FreeLibrary(g_debug_help);
g_debug_help = NULL;
}
// Can not be static - called when
// the callstack is freed.
void DestroyCallstack(CallStack *ptr)
{
::free(ptr);
}
CallStack* CreateCallstack(uint8_t skip_frames)
{
// Capture the callstack frames - uses a windows call
void *stack[MAX_DEPTH];
DWORD hash;
// skip_frames: number of frames to skip [starting at the top - so don't return the frames for "CreateCallstack" (+1), plus "skip_frame_" layers.
// max_frames to return
// memory to put this information into.
// out pointer to back trace hash.
uint32_t frames = CaptureStackBackTrace(1 + skip_frames, MAX_DEPTH, stack, &hash);
// create the callstack using an untracked allocation
CallStack *cs = (CallStack*) ::malloc(sizeof(CallStack));
// force call the constructor (new in-place)
cs = new (cs) CallStack();
// copy the frames to our callstack object
unsigned int frame_count = min(MAX_FRAMES_PER_CALLSTACK, frames);
cs->m_frame_count = frame_count;
::memcpy(cs->m_frames, stack, sizeof(void*) * frame_count);
cs->m_hash = hash;
return cs;
}
//------------------------------------------------------------------------
// Fills lines with human readable data for the given callstack
// Fills from top to bottom (top being most recently called, with each next one being the calling function of the previous)
//
// Additional features you can add;
// [ ] If a file exists in yoru src directory, clip the filename
// [ ] Be able to specify a list of function names which will cause this trace to stop.
uint16_t CallstackGetLines(callstack_line_t *line_buffer, const uint16_t max_lines, CallStack *cs)
{
IMAGEHLP_LINE64 line_info;
DWORD line_offset = 0; // Displacement from the beginning of the line
line_info.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
unsigned int count = min(max_lines, cs->m_frame_count);
unsigned int idx = 0;
for (unsigned int i = 0; i < count; ++i) {
callstack_line_t *line = &(line_buffer[idx]);
DWORD64 ptr = (DWORD64)(cs->m_frames[i]);
if (FALSE == g_sym_from_addr(g_process, ptr, 0, g_symbol)) {
continue;
}
strcpy_s(line->function_name, 256, g_symbol->Name);
BOOL bRet = g_sym_get_line_from_addr_64(
GetCurrentProcess(), // Process handle of the current process
ptr, // Address
&line_offset, // Displacement will be stored here by the function
&line_info); // File name / line information will be stored here
if (bRet)
{
line->line = line_info.LineNumber;
strcpy_s(line->file_name, 128, line_info.FileName);
line->offset = line_offset;
}
else {
// no information
line->line = 0;
line->offset = 0;
strcpy_s(line->file_name, 128, "N/A");
}
++idx;
}
return idx;
}
Operators
// Treat as Linked List Node
struct callstack_list
{
CallStack* current_stack = nullptr;
uint16_t total_allocation = 0;
callstack_list* next = nullptr;
};
struct allocation_meta
{
uint16_t size;
callstack_list callstack_node;
};
void* operator new(const size_t size)
{
uint16_t alloc_size = (uint16_t)size + (uint16_t)sizeof(allocation_meta);
allocation_meta *ptr = (allocation_meta*)::malloc((size_t)alloc_size);
ptr->size = (uint16_t)size;
ptr->callstack_node.current_stack = CreateCallstack(0);
ptr->callstack_node.total_allocation = (uint16_t)size;
ptr->callstack_node.next = nullptr;
bool run = true;
callstack_list* currentNode = nullptr;
while (g_callstack_root != nullptr && run)
{
if (currentNode == nullptr)
{
currentNode = g_callstack_root;
}
if (currentNode->next != nullptr)
{
currentNode = currentNode->next;
}
else
{
currentNode->next = &ptr->callstack_node;
run = false;
}
}
if (g_callstack_root == nullptr)
{
g_callstack_root = &ptr->callstack_node;
}
return ptr + 1;
}
void operator delete(void* ptr)
{
if (nullptr == ptr)
return;
allocation_meta *data = (allocation_meta*)ptr;
data--;
if (data->callstack_node.current_stack != nullptr)
DestroyCallstack(data->callstack_node.current_stack);
bool run = true;
callstack_list* currentNode = nullptr;
while (g_callstack_root != nullptr && run && &data->callstack_node != NULL)
{
if (currentNode == nullptr && g_callstack_root != &data->callstack_node)
{
currentNode = g_callstack_root;
}
else
{
g_callstack_root = nullptr;
run = false;
continue;
}
if (currentNode->next != nullptr && currentNode->next != &data->callstack_node)
{
currentNode = currentNode->next;
}
else
{
currentNode->next = nullptr;
run = false;
}
}
::free(data);
}
Test Harness
void ReportVerboseCallStacks(const char* start_time_str = "", const char* end_time_str = "")
{
callstack_list* currentNode = g_callstack_root;
unsigned int totalSimiliarAllocs = 0;
uint32_t totalSimiliarSize = 0;
while (currentNode != nullptr)
{
callstack_list* nextNode = currentNode->next;
uint32_t& currentHash = currentNode->current_stack->m_hash;
uint32_t nextHash;
if (nextNode == nullptr)
nextHash = currentHash + 1;
else
nextHash = nextNode->current_stack->m_hash;
if (nextHash == currentHash)
{
totalSimiliarSize += currentNode->total_allocation;
totalSimiliarAllocs++;
}
if (nextHash != currentHash)
{
//Print total allocs for type and total size
float reportedBytes = convertToReadableBytes(totalSimiliarSize);
std::string size = convertToReadableBytesString(totalSimiliarSize);
char collection_buffer[128];
sprintf_s(collection_buffer, 128, "\nGroup contained %s allocation(s), Total: %0.3f %s\n", std::to_string(totalSimiliarAllocs).c_str(), reportedBytes, size.c_str());
printf(collection_buffer);
//Reset total allocs and size
totalSimiliarAllocs = 0;
totalSimiliarSize = 0;
}
// Printing a call stack, happens when making report
char line_buffer[512];
callstack_line_t lines[128];
unsigned int line_count = CallstackGetLines(lines, 128, currentNode->current_stack);
for (unsigned int i = 0; i < line_count; ++i)
{
// this specific format will make it double click-able in an output window
// taking you to the offending line.
//Print Line For Call Stack
sprintf_s(line_buffer, 512, " %s(%u): %s\n", lines[i].file_name, lines[i].line, lines[i].function_name);
printf(line_buffer);
}
currentNode = currentNode->next;
}
}
void Pop64List(int64_t* arr[], int size)
{
for (int index = 0; index < size; ++index)
{
arr[index] = new int64_t;
*arr[index] = (int64_t)index;
}
}
void Pop8List(int8_t* arr[], int size)
{
for (int index = 0; index < size; ++index)
{
arr[index] = new int8_t;
*arr[index] = (int8_t)index;
}
}
int main()
{
if (!CallstackSystemInit())
return 1;
const int SIZE_64 = 8000;
int64_t* arr_64[SIZE_64];
const int SIZE_8 = 10000;
int8_t* arr_8[SIZE_8];
Pop64List(arr_64, SIZE_64);
Pop8List(arr_8, SIZE_8);
ReportVerboseCallStacks();
CallstackSystemDeinit();
return 0;
}
I finally figured out the answer. In my reporting function I was using std::string to create some of the reporting objects. std::string calls ::new internally to create a small allocation, and then hammers additional memory as the string's internal array reallocates memory. Switching to C-strings solved my problem.
I'm having a problem with the code attached below. Essentially it generates a huge memory leak but I can't see where it happens.
What the code does is receiving an array of strings, called prints, containing numbers (nodes) separated by ',' (ordered by desc number of nodes), finding other compatible prints (compatible means that the other string has no overlapping nodes 0 excluded because every print contains it) and when all nodes are covered it calculates a risk function on the basis of a weighted graph. In the end it retains the solution having the lowest risk.
The problem is that leak you see in the picture. I really can't get where it comes from.
Here's the code:
#include "Analyzer.h"
#define INFINITY 999999999
// functions prototypes
bool areFullyCompatible(int *, int, string);
bool contains(int *, int, int);
bool selectionComplete(int , int);
void extractNodes(string , int *, int &, int);
void addNodes(int *, int &, string);
Analyzer::Analyzer(Graph *graph, string *prints, int printsLen) {
this->graph = graph;
this->prints = prints;
this->printsLen = printsLen;
this->actualResult = new string[graph->nodesNum];
this->bestResult = new string[graph->nodesNum];
this->bestReSize = INFINITY;
this->bestRisk = INFINITY;
this-> actualSize = -1;
}
void Analyzer::getBestResult(int &size) {
for (int i = 0; i < bestReSize; i++)
cout << bestResult[i] << endl;
}
void Analyzer::analyze() {
// the number of selected paths is at most equal to the number of nodes
int maxSize = this->graph->nodesNum;
float totRisk;
int *actualNodes = new int[maxSize];
int nodesNum;
bool newCycle = true;
for (int i = 0; i < printsLen - 1; i++) {
for (int j = i + 1; j < printsLen; j++) {
// initializing the current selection
if (newCycle) {
newCycle = false;
nodesNum = 0;
extractNodes(prints[i], actualNodes, nodesNum, maxSize);
this->actualResult[0] = prints[i];
this->actualSize = 1;
}
// adding just fully compatible prints
if (areFullyCompatible(actualNodes, nodesNum, prints[j])) {
this->actualResult[actualSize] = prints[j];
actualSize++;
addNodes(actualNodes, nodesNum, prints[j]);
}
if (selectionComplete(nodesNum, maxSize)) {
// it means it's no more a possible best solution with the minimum number of paths
if (actualSize > bestReSize) {
break;
}
// calculating the risk associated to the current selection of prints
totRisk = calculateRisk();
// saving the best result
if (actualSize <= bestReSize && totRisk < bestRisk) {
bestReSize = actualSize;
bestRisk = totRisk;
for(int k=0;k<actualSize; k++)
bestResult[k] = actualResult[k];
}
}
}
newCycle = true;
}
}
float Analyzer::calculateRisk() {
float totRisk = 0;
int maxSize = graph->nodesNum;
int *nodes = new int[maxSize];
int nodesNum = 0;
for (int i = 0; i < actualSize; i++) {
extractNodes(this->actualResult[i], nodes, nodesNum, maxSize);
// now nodes containt all the nodes from the print but 0, so I add it (it's already counted but misses)
nodes[nodesNum-1] = 0;
// at this point I use the graph to calculate the risk
for (int i = 0; i < nodesNum - 1; i++) {
float add = this->graph->nodes[nodes[i]].edges[nodes[i+1]]->risk;
totRisk += this->graph->nodes[nodes[i]].edges[nodes[i+1]]->risk;
//cout << "connecting " << nodes[i] << " to " << nodes[i + 1] << " with risk " << add << endl;
}
}
delete nodes;
return totRisk;
}
// -------------- HELP FUNCTIONS--------------
bool areFullyCompatible(int *nodes, int nodesNum, string print) {
char *node;
char *dup;
int tmp;
bool flag = false;
dup = strdup(print.c_str());
node = strtok(dup, ",");
while (node != NULL && !flag)
{
tmp = atoi(node);
if (contains(nodes, nodesNum, tmp))
flag = true;
node = strtok(NULL, ",");
}
// flag signals whether an element in the print is already contained. If it is, there's no full compatibility
if (flag)
return false;
delete dup;
delete node;
return true;
}
// adds the new nodes to the list
void addNodes(int *nodes, int &nodesNum, string print) {
char *node;
char *dup;
int tmp;
// in this case I must add the new nodes to the list
dup = strdup(print.c_str());
node = strtok(dup, ",");
while (node != NULL)
{
tmp = atoi(node);
if (tmp != 0) {
nodes[nodesNum] = tmp;
nodesNum++;
}
node = strtok(NULL, ",");
}
delete dup;
delete node;
}
// verifies whether a node is already contained in the nodes list
bool contains(int *nodes, int nodesNum, int node) {
for (int i = 0; i < nodesNum; i++)
if (nodes[i] == node)
return true;
return false;
}
// verifies if there are no more nodes to be added to the list (0 excluded)
bool selectionComplete(int nodesNum, int maxSize) {
return nodesNum == (maxSize-1);
}
// extracts nodes from a print add adds them to the nodes list
void extractNodes(string print, int *nodes, int &nodesNum, int maxSize) {
char *node;
char *dup;
int idx = 0;
int tmp;
dup = strdup(print.c_str());
node = strtok(dup, ",");
while (node != NULL)
{
tmp = atoi(node);
// not adding 0 because every prints contains it
if (tmp != 0) {
nodes[idx] = tmp;
idx++;
}
node = strtok(NULL, ",");
}
delete dup;
delete node;
nodesNum = idx;
}
You have forgotten to delete several things and used the wrong form of delete for arrays where you have remembered, e.g.
float Analyzer::calculateRisk() {
float totRisk = 0;
int maxSize = graph->nodesNum;
int *nodes = new int[maxSize];
//...
delete [] nodes; //<------- DO THIS not delete nodes
The simplest solution is to avoid using raw pointers and use smart ones instead. Or a std::vector if you just want to store stuff somewhere to index into.
You have new without corresponding delete
this->actualResult = new string[graph->nodesNum];
this->bestResult = new string[graph->nodesNum];
These should be deleted somewhere using delete [] ...
You allocate actualNodes in analyze() but you don't release the memory anywhere:
int *actualNodes = new int[maxSize];
In Addition, Analyzer::bestResult and Analyzer::actualResult are allocated in the constructor of Analyzer but not deallocated anywhere.
this->actualResult = new string[graph->nodesNum];
this->bestResult = new string[graph->nodesNum];
If you must use pointers, I really suggest to use smart pointers, e.g. std::unique_ptr and/or std::shared_ptr when using C++11 or later, or a Boost equivalent when using C++03 or earlier. Otherwise, using containers, e.g. std::vector is preferred.
PS: You're code also has a lot of mismatches in terms of allocation and deallocation. If memory is allocated using alloc/calloc/strdup... it must be freed using free. If memory is allocated using operator new it must be allocated with operator delete. If memory is allocated using operator new[] it must be allocated with operator delete[]. And I guess you certainly should not delete the return value of strtok.
I'am facing a problem which should be solved using Aho-Corasick automaton. I'am given a set of words (composed with '0' or '1') - patterns and I must decide if it is possible to create infinite text, which wouldn't contain any of given patterns. I think, the solution is to create Aho-Corasick automaton and search for a cycle without matching states, but I'm not able to propose a good way to do that. I thought of searching the states graph using DFS, but I'm not sure if it will work and I have an implementation problem - let's assume, that we are in a state, which has an '1' edge - but state pointed by that edge is marked as matching - so we cannot use that edge, we can try fail link (current state doesn't have '0' edge) - but we must also remember, that we could not go with '1' edge from state pointed by fail link of the current one.
Could anyone correct me and show me how to do that? I've written Aho-Corasick in C++ and I'am sure it works - I also understand the entire algorithm.
Here is the base code:
class AhoCorasick
{
static const int ALPHABET_SIZE = 2;
struct State
{
State* edge[ALPHABET_SIZE];
State* fail;
State* longestMatchingSuffix;
//Vector used to remember which pattern matches in this state.
vector< int > matching;
short color;
State()
{
for(int i = 0; i < ALPHABET_SIZE; ++i)
edge[i] = 0;
color = 0;
}
~State()
{
for(int i = 0; i < ALPHABET_SIZE; ++i)
{
delete edge[i];
}
}
};
private:
State root;
vector< int > lenOfPattern;
bool isFailComputed;
//Helper function used to traverse state graph.
State* move(State* curr, char letter)
{
while(curr != &root && curr->edge[letter] == 0)
{
curr = curr->fail;
}
if(curr->edge[letter] != 0)
curr = curr->edge[letter];
return curr;
}
//Function which computes fail links and longestMatchingSuffix.
void computeFailLink()
{
queue< State* > Q;
root.fail = root.longestMatchingSuffix = 0;
for(int i = 0; i < ALPHABET_SIZE; ++i)
{
if(root.edge[i] != 0)
{
Q.push(root.edge[i]);
root.edge[i]->fail = &root;
}
}
while(!Q.empty())
{
State* curr = Q.front();
Q.pop();
if(!curr->fail->matching.empty())
{
curr->longestMatchingSuffix = curr->fail;
}
else
{
curr->longestMatchingSuffix = curr->fail->longestMatchingSuffix;
}
for(int i = 0; i < ALPHABET_SIZE; ++i)
{
if(curr->edge[i] != 0)
{
Q.push(curr->edge[i]);
State* state = curr->fail;
state = move(state, i);
curr->edge[i]->fail = state;
}
}
}
isFailComputed = true;
}
public:
AhoCorasick()
{
isFailComputed = false;
}
//Add pattern to automaton.
//pattern - pointer to pattern, which will be added
//fun - function which will be used to transform character to 0-based index.
void addPattern(const char* const pattern, int (*fun) (const char *))
{
isFailComputed = false;
int len = strlen(pattern);
State* curr = &root;
const char* pat = pattern;
for(; *pat; ++pat)
{
char tmpPat = fun(pat);
if(curr->edge[tmpPat] == 0)
{
curr = curr->edge[tmpPat] = new State;
}
else
{
curr = curr->edge[tmpPat];
}
}
lenOfPattern.push_back(len);
curr->matching.push_back(lenOfPattern.size() - 1);
}
};
int alphabet01(const char * c)
{
return *c - '0';
}
I didn't look through your code, but I know very simple and efficient implementation.
First of all, lets add Dictionary Suffix Links to the tree (their description you can find in Wikipedia). Then you have to look through all your tree and somehow mark matching nodes and nodes that have Dict Suffix Links as bad nodes. The explanation of these actions is obvious: you don't need all the matching nodes, or nodes that have a matching suffix in them.
Now you have an Aho-Corasick tree without any matching nodes. If you just run DFS algo on the resulting tree, you will get what you want.
I was trying to implement exponential tree from documentation, but here is one place in the code which is not clear for me how to implement it:
#include<iostream>
using namespace std;
struct node
{
int level;
int count;
node **child;
int data[];
};
int binary_search(node *ptr,int element)
{
if(element>ptr->data[ptr->count-1]) return ptr->count;
int start=0;
int end=ptr->count-1;
int mid=start+(end-start)/2;
while(start<end)
{
if(element>ptr->data[mid]) { start=mid+1;}
else
{
end=mid;
}
mid=start+(end-start)/2;
}
return mid;
}
void insert(node *root,int element)
{
node *ptr=root,*parent=NULL;
int i=0;
while(ptr!=NULL)
{
int level=ptr->level,count=ptr->count;
i=binary_search(ptr,element);
if(count<level){
for(int j=count;j<=i-1;j--)
ptr->data[j]=ptr->data[j-1];
}
ptr->data[i]=element;
ptr->count=count+1;
return ;
}
parent=ptr,ptr=ptr->child[i];
//Create a new Exponential Node at ith child of parent and
//insert element in that
return ;
}
int main()
{
return 0;
}
Here is a link for the paper I'm referring to:
http://www.ijcaonline.org/volume24/number3/pxc3873876.pdf
This place is in comment, how can I create a new exponential node at level i? Like this?
parent->child[i]=new node;
insert(parent,element);
The presence of the empty array at the end of the structure indicates this is C style code rather than C++ (it's a C Hack for flexible arrays). I'll continue with C style code as idiomatic C++ code would prefer use of standard containers for the child and data members.
Some notes and comments on the following code:
There were a number of issues with the pseudo-code in the linked paper to a point where it is better to ignore it and develop the code from scratch. The indentation levels are unclear where loops end, all the loop indexes are not correct, the check for finding an insertion point is incorrect, etc....
I didn't include any code for deleting the allocated memory so the code will leak as is.
Zero-sized arrays may not be supported by all compilers (I believe it is a C99 feature). For example VS2010 gives me warning C4200 saying it will not generate the default copy/assignment methods.
I added the createNode() function which gives the answer to your original question of how to allocate a node at a given level.
A very basic test was added and appears to work but more thorough tests are needed before I would be comfortable with the code.
Besides the incorrect pseudo-code the paper has a number of other errors or at least questionable content. For example, concerning Figure 2 it says "which clearly depicts that the slope of graph is linear" where as the graph is clearly not linear. Even if the author meant "approaching linear" it is at least stretching the truth. I would also be interested in the set of integers they used for testing which doesn't appear to be mentioned at all. I assumed they used a random set but I would like to see at least several sets of random numbers used as well as several predefined sets such as an already sorted or inversely sorted set.
.
int binary_search(node *ptr, int element)
{
if (ptr->count == 0) return 0;
if (element > ptr->data[ptr->count-1]) return ptr->count;
int start = 0;
int end = ptr->count - 1;
int mid = start + (end - start)/2;
while (start < end)
{
if (element > ptr->data[mid])
start = mid + 1;
else
end = mid;
mid = start + (end - start)/2;
}
return mid;
}
node* createNode (const int level)
{
if (level <= 0) return NULL;
/* Allocate node with 2**(level-1) integers */
node* pNewNode = (node *) malloc(sizeof(node) + sizeof(int)*(1 << (level - 1)));
memset(pNewNode->data, 0, sizeof(int) * (1 << (level - 1 )));
/* Allocate 2**level child node pointers */
pNewNode->child = (node **) malloc(sizeof(node *)* (1 << level));
memset(pNewNode->child, 0, sizeof(int) * (1 << level));
pNewNode->count = 0;
pNewNode->level = level;
return pNewNode;
}
void insert(node *root, int element)
{
node *ptr = root;
node *parent = NULL;
int i = 0;
while (ptr != NULL)
{
int level = ptr->level;
int count = ptr->count;
i = binary_search(ptr, element);
if (count < (1 << (level-1)))
{
for(int j = count; j >= i+1; --j)
ptr->data[j] = ptr->data[j-1];
ptr->data[i] = element;
++ptr->count;
return;
}
parent = ptr;
ptr = ptr->child[i];
}
parent->child[i] = createNode(parent->level + 1);
insert(parent->child[i], element);
}
void InOrderTrace(node *root)
{
if (root == NULL) return;
for (int i = 0; i < root->count; ++i)
{
if (root->child[i]) InOrderTrace(root->child[i]);
printf ("%d\n", root->data[i]);
}
if (root->child[root->count]) InOrderTrace(root->child[root->count]);
}
void testdata (void)
{
node* pRoot = createNode(1);
for (int i = 0; i < 10000; ++i)
{
insert(pRoot, rand());
}
InOrderTrace(pRoot);
}