I used the library streed2006.cpp from source. The code has memory leak in deletion of edges. I cleared the number of edges from hashtable using the following code:
//throwing away the edges from hashtable
for(int t=0;t<HASH_TABLE_SIZE;t++)
{
Edges[t].Remove();
Edges[t].start_node == -1
}
valgrind output:
3,920 bytes in 245 blocks are definitely lost in loss record 9 of 12
==6301== at 0x4029F34: operator new(unsigned int) (in /usr/lib/valgrind /vgpreload_memcheck-x86-linux.so)
==6301== by 0x804A683: Edge::SplitEdge(Suffix&) (suffix_tree.cpp:555)
==6301== by 0x804B02F: AddPrefix(Suffix&, int) (suffix_tree.cpp:753)
Please guide me how to delete the edges.
was able to remove the memory leak. Following is the solution:
void AddPrefix( Suffix &active, int last_char_index )
{
int parent_node;
int last_parent_node = -1;
for ( ; ; ) {
Edge edge;
parent_node = active.origin_node;
if ( active.Explicit() ) {
edge = Edge::Find( active.origin_node, T[ last_char_index ] );
if ( edge.start_node != -1 )
break;
} else { //implicit node, a little more complicated
edge = Edge::Find( active.origin_node, T[ active.first_char_index ] );
int span = active.last_char_index - active.first_char_index;
if ( T[ edge.first_char_index + span + 1 ] == T[ last_char_index ] )
break;
parent_node = edge.SplitEdge( active );
}
Edge *new_edge = new Edge( last_char_index, T.N, parent_node );
new_edge->Insert();
//cout << "Created edge to new leaf: " << *new_edge << "\n";
AddSuffixLink( last_parent_node, parent_node );
if ( active.origin_node == 0 ) {
//cout << "Can't follow suffix link, I'm at the root\n";
active.first_char_index++;
} else {
/*
cout << "Following suffix link from node "
<< active.origin_node
<< " to node "
<< Suffix_Nodes[ active.origin_node ].suffix_node
<< ".\n";
*/
active.origin_node = Suffix_Nodes[ active.origin_node ].suffix_node;
//cout << "New prefix : " << active << "\n";
}
active.Canonize();
delete(new_edge);
new_edge = NULL;
}
AddSuffixLink( last_parent_node, parent_node );
active.last_char_index++; //Now the endpoint is the next active point
active.Canonize();
};
and
int Edge::SplitEdge( Suffix &s )
{
//cout << "Splitting edge: " << *this << "\n";
Remove();
Edge *new_edge =
new Edge( first_char_index,
first_char_index + s.last_char_index - s.first_char_index,
s.origin_node );
new_edge->Insert();
Suffix_Nodes[ new_edge->end_node ].suffix_node = s.origin_node;
first_char_index += s.last_char_index - s.first_char_index + 1;
start_node = new_edge->end_node;
Insert();
//cout << "New edge: " << *new_edge << "\n";
//cout << "Old edge: " << *this << "\n";
delete(new_edge);
//return new_edge->end_node;
return(start_node);
}
void AddPrefix( Suffix &active, int last_char_index )
{
int parent_node;
int last_parent_node = -1;
for ( ; ; ) {
Edge edge;
parent_node = active.origin_node;
if ( active.Explicit() ) {
edge = Edge::Find( active.origin_node, T[ last_char_index ] );
if ( edge.start_node != -1 )
break;
} else { //implicit node, a little more complicated
edge = Edge::Find( active.origin_node, T[ active.first_char_index ] );
int span = active.last_char_index - active.first_char_index;
if ( T[ edge.first_char_index + span + 1 ] == T[ last_char_index ] )
break;
parent_node = edge.SplitEdge( active );
}
Edge *new_edge = new Edge( last_char_index, T.N, parent_node );
new_edge->Insert();
//cout << "Created edge to new leaf: " << *new_edge << "\n";
AddSuffixLink( last_parent_node, parent_node );
if ( active.origin_node == 0 ) {
//cout << "Can't follow suffix link, I'm at the root\n";
active.first_char_index++;
} else {
/*
cout << "Following suffix link from node "
<< active.origin_node
<< " to node "
<< Suffix_Nodes[ active.origin_node ].suffix_node
<< ".\n";
*/
active.origin_node = Suffix_Nodes[ active.origin_node ].suffix_node;
//cout << "New prefix : " << active << "\n";
}
active.Canonize();
//ADDED THIS DELETE HERE
delete(new_edge);
new_edge = NULL;
}
AddSuffixLink( last_parent_node, parent_node );
active.last_char_index++; //Now the endpoint is the next active point
active.Canonize();
};
and
int Edge::SplitEdge( Suffix &s )
{
//cout << "Splitting edge: " << *this << "\n";
Remove();
Edge *new_edge =
new Edge( first_char_index,
first_char_index + s.last_char_index - s.first_char_index,
s.origin_node );
new_edge->Insert();
Suffix_Nodes[ new_edge->end_node ].suffix_node = s.origin_node;
first_char_index += s.last_char_index - s.first_char_index + 1;
start_node = new_edge->end_node;
Insert();
//cout << "New edge: " << *new_edge << "\n";
//cout << "Old edge: " << *this << "\n";
//ADDED THIS DELETE HERE
delete(new_edge);
//return new_edge->end_node;
return(start_node);
}
Related
all!
I request a JSON from TMDB and save it as a file on the local harddisk. Then I read in the file and decode it with nlohmann::json. The next step is to iterate the json data and extract a few parts of the info. While it is no problem getting types „string“, „boolean“ etc. I’m struggling with an „array“ type. (Later on the type „object“ might show same problems…) Goal is to transform the json data into some „ini“ style type like
[tt1234567]
title = abcdefghij
runtime = 123
...
I iterate through the root of the decoded json by:
using json = nlohmann::json;
{
auto jsonData = json::parse( jsonText );
// std::cout << jsonData.dump( 1 ) << "\n";
for ( const auto &jsonItem : jsonData.items() )
{
jsonKey = jsonItem.key();
jsonValue = "";
if ( jsonItem.value().is_null() ) { jsonValue = "(null)"; }
else if ( jsonItem.value().is_boolean() ) { if ( jsonItem.value() ) { jsonValue = "(boolean) yes"; } else { jsonValue = "(boolean) no"; } }
else if ( jsonItem.value().is_string() ) { jsonValue = "(string) '" + string_left( jsonItem.value(), 25 ) + "'"; }
[ . . . ]
std::cout << jsonKey << ": " << jsonValue << "\n";
Screen output is like:
adult: (boolean) no
belongs_to_collection: (null)
budget: (unsigned) 45000000
credits: (object)
genres: (array) [ . . . ]
[ et al ]
My problem is that I don’t know the correct syntax to handle the „array“ type and, in fact, I’m not quite sure if it is really an array despite the fact it is enclosed in []. The code block
else if ( jsonItem.key() == "genres" ) // array
{
std::cout << " jsonItem: " << jsonItem << "\n"; // {"genres":[{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}]}
jsonKey = jsonItem.key();
std::cout << " jsonKey: " << jsonKey << "\n"; // genres
// jsonValue = jsonItem.value(); // <-- returns array, but jsonValue expects string
// std::cout << " jsonValue: " << jsonValue << "\n";
auto jsonValueArray = jsonItem.value().array();
std::cout << " jsonValueArray: " << jsonValueArray << " (" << sizeof( jsonValueArray ) << ")\n"; // [] (16)
auto jsonValueFlat = jsonItem.value().flatten();
std::cout << " jsonValueFlat: " << jsonValueFlat << "\n"; // {"/0/id":12,"/0/name":"Abenteuer","/1/id":28,"/1/name":"Action"}
std::cout << " " << jsonKey << " elements: " << jsonValueArray.size() << "\n"; // 0
i = 0;
// for ( const auto &jsonValue : jsonValueArray )
// for ( i = jsonValueArray.begin(); i < jsonValueArray.end(); i++ )
for ( i = 0; i < jsonValueArray.size(); i++ )
{
std::cout << jsonValue << "\n";
iniKey = "Genre" + std::to_string( i );
iniValue = "";
iniValue = jsonValue;
iniText.append( iniKey );
iniText.append( " = " );
iniText.append( iniValue );
iniText.append( "\n" );
// i++;
}
}
produces
jsonItem: {"genres":[{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}]}
jsonKey: genres
jsonValueArray: [] (16)
jsonValueFlat: {"/0/id":12,"/0/name":"Abenteuer","/1/id":28,"/1/name":"Action"}
genres elements: 0
So I see a jsonItem with „genres: [xxx]“ content, thus it is identified as an array. The „sizeof“ returns 16 and I interpret it as 4 pointer with 4 bytes each (or 2 with 8 bytes?). On the other hand the array() function seems to return an empty array [] with 0 elements. And now I’m stuck…
What I want to achieve: Extracting the „genres“ list from the json and concatenate the elements with „;“ like
genres = Abenteuer;Action
in the above exampe.
Michael
Ok, I figured it out. The central point was the misunderstanding of .array() from the JSON object. After fiddling around a bit the following branch
else if ( jsonItem.key() == "genres" ) // array
{
std::cout << " jsonItem: " << jsonItem << "\n"; // {"genres":[{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}]}
jsonKey = jsonItem.key();
std::cout << " jsonKey: " << jsonKey << "\n"; // genres
auto jsonValueArray = jsonItem.value(); // [{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}] (16)
std::cout << " jsonValueArray: " << jsonValueArray << " (" << sizeof( jsonValueArray ) << ")\n"; // [] (16)
i = 0;
for ( const auto &jsonValue : jsonValueArray )
{
iniKey = "Genres";
iniValue = jsonValue["name"];
std::cout << " " << jsonValue << " --> key:'" << iniKey << "', value:'" << iniValue << "'\n";
if ( i == 0 )
{
iniText.append( iniKey );
iniText.append( " = " );
}
else
{
iniText.append( ";" );
}
iniText.append( iniValue );
i++;
}
iniText.append( "\n" );
std::cout << " genres added: " << std::to_string( i ) << "\n";
}
now produces
genres: (array) [ . . . ]
jsonItem: {"genres":[{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}]}
jsonKey: genres
jsonValueArray: [{"id":12,"name":"Abenteuer"},{"id":28,"name":"Action"}] (16)
{"id":12,"name":"Abenteuer"} --> key:'Genres', value:'Abenteuer'
{"id":28,"name":"Action"} --> key:'Genres', value:'Action'
genres added: 2
resp.
[tt0446013]
adult = no
Genres = Abenteuer;Action
ID_TMDB = 1534
ID_IMDB = tt0446013
Title_Original = Pathfinder
Overview = An sich stammt der junge
Release_Date = 2007-01-11
Runtime = 99
Tagline = Zwei Welten , ein Krieger
Title = Pathfinder - Fährte des
and that's the output I wanted to have.
Everything is working good instead I want to get top 3 students by their marks to be displayed but I am getting top 3 students of the stack. I am a beginner. Please help
Add some students (5 or 6) and their marks.
when I click 4, it want it to display top 3 students by marks.
I think I am missing some part where I need to compare marks of the students but having some trouble.
#include <iostream>
#include <string>
using namespace std;
static std::size_t sizeOfStack { 0 };
class Student
{
public:
string name;
int marks;
Student* nextStudent;
void setName( string setn )
{
name = setn;
}
string getName( )
{
return name;
}
void setMarks( int setm )
{
marks = setm;
}
int getMarks( )
{
return marks;
}
void setNextStudent( Student* setAddress )
{
nextStudent = setAddress;
}
Student* getNextStudent()
{
return nextStudent;
}
};
class Stack
{
public:
Student* headStudent { NULL };
bool isEmpty( )
{
if ( headStudent == NULL )
{
cout << '\n' << "Stack is Empty" << '\n';
return true;
}
else
{
return false;
}
}
void push( )
{
string studentName;
int marks;
Student* newNode = new Student;
cout << "Enter the name of Student: " << '\n';
cin >> studentName;
newNode->setName( studentName );
cout << "Enter the Marks of Student: " << '\n';
cin >> marks;
newNode->setMarks( marks );
newNode->setNextStudent( NULL );
if ( headStudent == NULL )
{
headStudent = newNode;
}
else
{
Student* ptr = headStudent;
while ( ptr->getNextStudent( ) != NULL )
{
ptr = ptr->getNextStudent();
}
ptr->setNextStudent( newNode );
}
cout << '\n' << "Student Data saved successfully" << '\n';
++sizeOfStack;
}
void pop( )
{
if ( !isEmpty( ) )
{
Student* pre = headStudent;
Student* ptr = headStudent;
while ( ptr->getNextStudent() != NULL )
{
pre = ptr;
ptr = ptr->getNextStudent( );
}
if ( ptr == headStudent )
{
headStudent = NULL;
}
else
{
pre->setNextStudent( NULL );
}
delete ptr;
--sizeOfStack;
cout << '\n' << "Student Data Remove Successfully from database" << '\n';
}
}
void display( )
{
if ( !isEmpty( ) )
{
string studentNames[ sizeOfStack ] { };
int studentMarks[ sizeOfStack ] { };
int i { };
int j { };
cout << "*************************" << '\n';
cout << "The Data of All students: " << '\n';
cout << "Name\t\tMarks"<< '\n';
Student* ptr = headStudent;
while ( ptr != NULL )
{
studentNames[i++] = ptr->getName( );
studentMarks[j++] = ptr->getMarks( );
ptr = ptr->getNextStudent( );
}
for ( std::size_t idx = sizeOfStack - 1; idx >= 0; --idx )
{
cout<< studentNames[ idx ] << "\t\t" << studentMarks[ idx ] <<'\n';
}
cout << "*************************" << '\n';
}
}
void shortStack( )
{
for ( std::size_t i = 0; i < sizeOfStack; ++i )
{
Student* ptr = headStudent;
for ( std::size_t j = 0; j < sizeOfStack - 1; ++j )
{
string tmp;
if ( ptr->getMarks( ) < ptr->getNextStudent( )->getMarks( ) )
{
// ptr->setMarks(ptr->getMarks() + ptr->getNextStudent() );
// some statement Goes here
}
ptr = ptr->getNextStudent( );
}
}
}
void top( )
{
if ( !isEmpty( ) )
{
// shortStack();
cout << "\n\n" << "Top Positions:" << '\n';
cout << '\n' << "Name\t\tMarks";
Student* ptr = headStudent;
for( std::size_t idx = 0; idx < 3; ++idx )
{
cout << '\n' << ptr->getName( ) << "\t\t" << ptr->getMarks( );
ptr = ptr->getNextStudent( );
}
cout << "\n\n";
}
}
};
int main( )
{
Stack s;
while ( true )
{
int choice { };
cout << '\n' << "*************************************************" << '\n';
cout << "1. To Add a student in the stack" << '\n';
cout << "2. To Remove a student from stack" << '\n';
cout << "3. Display all students of stack" << '\n';
cout << "4. Display top 3 students of stack" << '\n';
cout << "5. Press 5 or any other key to close the program" << '\n';
cout << "*************************************************" << '\n';
cout << "Enter your choice (1, 2, 3, 4, 5):" << '\n';
cin >> choice;
switch ( choice )
{
case 1:
s.push( );
break;
case 2:
s.pop( );
break;
case 3:
s.display( );
break;
case 4:
s.top( );
break;
default:
exit( 0 );
break;
}
}
return 0;
}
First problem that i encountered within your code is in your display function:
for ( std::size_t idx = sizeOfStack - 1; idx >= 0; --idx )
This for statement can be interpreted as "Decrement idx until it becomes less than 0". Unfortunately this can never be the case because of the type of your idx variable.
std::size_t is defined for unsigned values so if you decrement it further it becomes a very large number rather than being negative, which ends up as a segmentation fault in your code.
Following lines might be useful to overcome that issue if you really need to use std::size_t.
for ( std::size_t idx = sizeOfStack; idx > 0; --idx )
{
cout << studentNames[ idx-1 ] << "\t\t" << studentMarks[ idx-1 ] <<'\n';
}
Secondly, sorting the elements in your stack can help with the problem that you stated in your post. That way you will be able to print the top 3 marks easily.
Here's the sorting algorithm that i can provide for you:
void sortStack( )
{
Student* ptr = headStudent;
Student std_array[sizeOfStack];
// copy the elements in the stack
for(std::size_t i = 0; i < sizeOfStack; i++)
{
std_array[i] = *ptr;
ptr = ptr->getNextStudent();
}
// sort elements of the array
for(std::size_t j = 0; j < sizeOfStack-1; j++)
{
for(std::size_t k = j+1; k < sizeOfStack; k++)
{
if(std_array[j].getMarks() < std_array[k].getMarks())
{
Student tmpStd = std_array[j];
std_array[j] = std_array[k];
std_array[k] = tmpStd;
}
}
}
// update the stack according to the sorted values
ptr = headStudent;
for(std::size_t l = 0; l < sizeOfStack; l++)
{
ptr->setName(std_array[l].getName());
ptr->setMarks(std_array[l].getMarks());
ptr = ptr->getNextStudent();
}
}
You are doing virtual university assignment I think. In this task instructor told us to use only stack using link list. No need of array implementation will be considered !!!!1
I'm relatively new programming c++. I'm implementing a tree like index for a db using unorderd_map on the implementation of the tree data structure to store the children nodes. As im working with tree like structures the construction an search methods are recursive, also i store the pointers of the nodes, so i suspect i may have a sort of not well handled memory issue. I'm getting a segmentation fault. Next is my code and the output of it.
#include <memory>
#include <sstream>
#include <unordered_map>
#include <iostream>
#include <string>
#include <sqlite3.h>
#include "aux_functions.cpp"
#include <math.h>
using namespace std;
class TreeLikeIndex
{
public:
TreeLikeIndex(string attribute, string indices, int indices_count, short int is_leaf, unordered_map<string, TreeLikeIndex*> children);
TreeLikeIndex(string indices, int indices_count);
TreeLikeIndex();
string search(unordered_map<string, string> *);
private:
string indices;
int indices_count;
short int is_leaf;
string attribute;
unordered_map<string, TreeLikeIndex*> children;
};
string TreeLikeIndex::search(unordered_map<string, string> * _tuple)
{
if((*_tuple).empty() || this->is_leaf ) return this->indices;
string att_val = (*_tuple)[this->attribute];
(*_tuple).erase(this->attribute);
TreeLikeIndex * child_with_that_value = this->children[att_val];
return (*child_with_that_value).search(_tuple);
}
class DecisionTreeLikeIndexer
{
public:
DecisionTreeLikeIndexer(string, string, string);
int rebuild_index();
TreeLikeIndex * get_index();
private:
TreeLikeIndex * build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count);
TreeLikeIndex * index;
string source_db_address;
string dest_folder_address;
time_t time_of_last_build;
unordered_set<string> columns_names;
string source_table_name;
unordered_set<string> temp_tables_names;
string id_column_name;
sqlite3 * source_db_connection;
int table_count;
};
int DecisionTreeLikeIndexer::rebuild_index()
{
this->index = this->build_index(this->columns_names, 0, "", 0);
this->time_of_last_build = time(NULL);
return 0;
}
TreeLikeIndex * DecisionTreeLikeIndexer::get_index()
{
return this->index;
}
DecisionTreeLikeIndexer::DecisionTreeLikeIndexer(string source_db_address, string table_name, string dest_folder_address)
{
this->source_db_address = source_db_address;
this->dest_folder_address = dest_folder_address;
this->columns_names = Aux::get_column_names(source_db_address, table_name);
this->source_table_name = table_name;
this->id_column_name = "rowid";
this->source_db_connection = Aux::get_db_connection(this->source_db_address);
// Getting count of this table
sqlite3_stmt* statement;
string query = "SELECT count(*) FROM " + this->source_table_name + ";";
if(sqlite3_prepare(this->source_db_connection, query.c_str(), -1, &statement, 0) == SQLITE_OK)
{
int res = sqlite3_step(statement);
const unsigned char * count_char = sqlite3_column_text(statement,0);
if(res == SQLITE_ROW)
{
stringstream _temp;
_temp << count_char;
_temp >> this->table_count;
}
sqlite3_finalize(statement);
}
else
{
cout << "Error initializating Indexer (Getting initial table count): " << sqlite3_errmsg(this->source_db_connection) << endl;
}
}
TreeLikeIndex * DecisionTreeLikeIndexer::build_index(unordered_set<string> attributes_list, int depth, string comma_separated_ids, int ids_list_count)
{
if( attributes_list.size() <=1 || (depth > 0 && ids_list_count <= 1))
{
Aux::tabs(depth);
cout << "Leaf at depth: " << depth << " Ids are: " << comma_separated_ids << " Ids count: " << ids_list_count << endl;
static TreeLikeIndex * node = new TreeLikeIndex((string)comma_separated_ids, (int)ids_list_count);
return node;
}
string source_table = this->source_table_name;
int count = this->table_count;
if(depth > 0)
{
while(1)
{
source_table = *Aux::get_random_list_of_strings(1).begin();
if(this->temp_tables_names.insert(source_table).second) break;
}
const string create_temp_table_stmnt = "CREATE TEMP TABLE " + source_table + " AS SELECT * FROM " + this->source_table_name + " WHERE " + this->id_column_name + " IN(" + comma_separated_ids + ")";
sqlite3_exec(this->source_db_connection, create_temp_table_stmnt.c_str(),Aux::sqlt_callback,0,NULL);
count = ids_list_count;
Aux::tabs(depth);
cout << "Not root node" << endl;
}
Aux::tabs(depth);
cout << "Source table is: " << source_table << " Table count is: " << count << endl;
Aux::tabs(depth);
cout << "Attributes list is: "; for_each(attributes_list.begin(), attributes_list.end(),[](string v){cout << v << " ";});
cout << endl;
const double E = log2(count) ;
Aux::tabs(depth);
cout << "Entropy of node: " << E << endl;
string best_attribute;
double best_gain;
unordered_set<string> best_attribute_values;
for(string attr: attributes_list)
{
Aux::tabs(depth+1);
cout << "Analysing attribute: " << attr << endl;
const string get_at_count_values_query = "SELECT " + attr + ", count(" + attr + ") FROM " + source_table + " GROUP BY " + attr + ";";
sqlite3_stmt * stmnt;
double weighted_entropy = 0;
unordered_set<string> this_att_values;
if(sqlite3_prepare(this->source_db_connection, get_at_count_values_query.c_str(), -1, &stmnt, 0) == SQLITE_OK)
{
for(;;)
{
int res = sqlite3_step(stmnt);
if(res == SQLITE_DONE || res==SQLITE_ERROR)
{
double gAti = E - weighted_entropy;
Aux::tabs(depth+1);
cout << "Finish computing WE for att: " << attr << " Gain is: " << gAti << endl;
if(gAti > best_gain)
{
Aux::tabs(depth+1);
cout << "Found attribute with better gain." << endl;
best_gain = gAti;
best_attribute = attr;
best_attribute_values.clear();
Aux::tabs(depth+1);
for(string v:this_att_values)
{
best_attribute_values.insert(v);
}
cout << endl;
this_att_values.clear();
}
sqlite3_finalize(stmnt);
//delete &res;
break;
}
if(res == SQLITE_ROW)
{
string val = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
int vSize = sqlite3_column_int(stmnt,1);
Aux::tabs(depth+2);
this_att_values.insert(val);
double ratio = double(vSize) / double(count);
weighted_entropy += double(ratio) * double(log2(vSize));
Aux::tabs(depth+2);
cout << "Processing value: " << val << " With vSize: " << vSize << " Current WE is: " << weighted_entropy << endl;
}
}
}
}
Aux::tabs(depth);
cout << "Finish processing attributes list. Best attribute is: " << best_attribute << " Best gain is: " << best_gain << endl;
Aux::tabs(depth);
cout << "Best attribute values are: "; for_each(best_attribute_values.begin(), best_attribute_values.end(), [](string v){cout << v << ",";}); cout << endl;
unordered_map<string, TreeLikeIndex *> children;
for(string val: best_attribute_values)
{
const string get_ids_of_bestatt_val = "SELECT rowid FROM " + source_table + " WHERE " + best_attribute + " = " + val + ";";
int ids_count = 0;
sqlite3_stmt * stmnt;
string ids = "";
bool first = 1;
int next_depth = depth + 1;
unordered_set<string> next_attributes_set;
for(string attr: attributes_list) if(attr != best_attribute) next_attributes_set.insert(attr);
if(sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK)
{
for(;;)
{
int res = sqlite3_step(stmnt);
if(res == SQLITE_ROW)
{
string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
if(!first) ids += "," + id;
else ids += id;
ids_count++;
}
if(res == SQLITE_DONE || res == SQLITE_ERROR)
{
Aux::tabs(depth+1);
cout << "Adding branch for val: " << val << endl;
Aux::tabs(depth+1);
cout << " Next attributes are: "; for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
sqlite3_finalize(stmnt);
static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count);
pair<string, TreeLikeIndex*> child (val, temp_child);
children.insert(child);
}
}
}
}
Aux::tabs(depth);
cout << "Finish processing node, will return." << endl;
static TreeLikeIndex * no_leaf_node = new TreeLikeIndex(best_attribute, "all", count, 0, children);
return no_leaf_node;
}
}
TreeLikeIndex::TreeLikeIndex(std::string attribute, std::string indices, int indices_count, short int is_leaf, unordered_map<std::string, TreeLikeIndex*> children)
{
this->attribute = attribute;
this->indices = indices;
this->is_leaf = is_leaf;
this->children = children;
this->children.clear();
for(pair<string, TreeLikeIndex*> p: children) this->children.insert(p);
this->indices_count = indices_count;
}
TreeLikeIndex::TreeLikeIndex(string indices, int indices_count)
{
this->indices = indices;
this->indices_count = indices_count;
this->is_leaf = 1;
}
TreeLikeIndex::TreeLikeIndex()
{
this->indices = "";
this->indices_count = 0;
this->is_leaf = 1;
}
int main()
{
string source_db_address = "my_table";
string table_name = "b";
string dest_folder_address = ".";
DecisionTreeLikeIndexer indexer(source_db_address, table_name, dest_folder_address);
indexer.rebuild_index();
}
And the output is:
Source table is: b Table count is: 9
Attributes list is: cant_n_dec cant_n_des cant_n_control
Entropy of node: 3.16993
Analysing attribute: cant_n_dec
Processing value: 1 With vSize: 1 Current WE is: 0
Processing value: 2 With vSize: 4 Current WE is: 0.888889
Processing value: 3 With vSize: 2 Current WE is: 1.11111
Processing value: 4 With vSize: 1 Current WE is: 1.11111
Processing value: 5 With vSize: 1 Current WE is: 1.11111
Finish computing WE for att: cant_n_dec Gain is: 2.05881
Found attribute with better gain.
Analysing attribute: cant_n_des
Processing value: 1 With vSize: 2 Current WE is: 0.222222
Processing value: 2 With vSize: 4 Current WE is: 1.11111
Processing value: 3 With vSize: 2 Current WE is: 1.33333
Processing value: 5 With vSize: 1 Current WE is: 1.33333
Finish computing WE for att: cant_n_des Gain is: 1.83659
Analysing attribute: cant_n_control
Processing value: 1 With vSize: 2 Current WE is: 0.222222
Processing value: 2 With vSize: 3 Current WE is: 0.750543
Processing value: 3 With vSize: 3 Current WE is: 1.27886
Processing value: 5 With vSize: 1 Current WE is: 1.27886
Finish computing WE for att: cant_n_control Gain is: 1.89106
Finish processing attributes list. Best attribute is: cant_n_dec Best gain is: 2.05881
Best attribute values are: 1,2,3,4,5,
Adding branch for val: 1
Next attributes are: cant_n_control,cant_n_des, Depth is: 1 Ids are: 3 Ids count: 1
Leaf at depth: 1 Ids are: 3 Ids count: 1
Segmentation fault
I'm not shure but....
I think the problem can be in the following cycle
for(;;)
{
int res = sqlite3_step(stmnt);
if(res == SQLITE_ROW)
{
string id = std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
if(!first) ids += "," + id;
else ids += id;
ids_count++;
}
if(res == SQLITE_DONE || res == SQLITE_ERROR)
{
Aux::tabs(depth+1);
cout << "Adding branch for val: " << val << endl;
Aux::tabs(depth+1);
cout << " Next attributes are: "; for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
sqlite3_finalize(stmnt);
static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count);
pair<string, TreeLikeIndex*> child (val, temp_child);
children.insert(child);
}
}
I don't understand when terminate (no exit conditions in the for(;;), no return's and no break's in the block).
And I suspect that che segmentation fault is caused by the following instruction
int res = sqlite3_step(stmnt);
when, after the SQLITE_DONE or SQLITE_ERROR case (whith a call to
sqlite3_finalize(stmnt);
), the cycle is iterated again, with a stmnt invalid.
The following can be a solution?
if ( sqlite3_prepare(this->source_db_connection, get_ids_of_bestatt_val.c_str(), -1, &stmnt,0) == SQLITE_OK)
{
while ( sqlite3_step(stmnt) == SQLITE_ROW )
{
ids += ( first ? "" : "," )
+ std::string(reinterpret_cast<const char*>(sqlite3_column_text(stmnt,0)));
ids_count++;
}
Aux::tabs(depth+1);
cout << "Adding branch for val: " << val << endl;
Aux::tabs(depth+1);
cout << " Next attributes are: ";
for_each(next_attributes_set.begin(), next_attributes_set.end(), [](string v){cout << v << ",";});
cout << " Depth is: " << next_depth << " Ids are: " << ids << " Ids count: " << ids_count << endl;
sqlite3_finalize(stmnt);
static TreeLikeIndex * temp_child = this->build_index(next_attributes_set, next_depth, ids, ids_count);
pair<string, TreeLikeIndex*> child (val, temp_child);
children.insert(child);
}
for clarity this bug is from a program for a school assignment but the bug itself is related to problems with malloc rather than understanding the assignment. In the assignment I am only using one instance of this class so the question is mostly for future reference. The problem I am running into occurs when I utilize 2 different instances of my Heap class declared here:
a4.h
#include <iostream>
using namespace std;
/*
Class Declarations
********************************************************************
*/
// A Heap implemented with a growing array
class Heap{
public:
Heap();
~Heap();
void insert ( int item );
int remove();
void printheap();
private:
void trickleup ( int pos );
void trickledown ( int pos );
void swap ( int pos1 , int pos2 );
int* A;
int size;
int capacity;
};
/*
Class Methods
*********************************************************************
*/
// For Heap
Heap::Heap(){
A = NULL;
size = 0;
capacity = 0;
}
Heap::~Heap(){
delete A;
}
void Heap::insert ( int item ){
if ( size == capacity ){
int* newpointer = new int[(capacity*2)+1];
for (int i = 0; i < size; i++) newpointer[i] = A[i];
delete A;
A = newpointer;
}
A[size] = item;
size += 1;
trickleup (size-1);
return;
}
int Heap::remove(){
size -= 1;
int temp = A[0];
swap ( 0 , size );
trickledown (0);
return temp;
}
void Heap::printheap(){
cout << "Root -> [ ";
for (int i = 0; i < size; i++) cout << A[i] << " ";
cout << "]\n";
return;
}
void Heap::trickleup ( int pos ){
int p0 = pos;
int p1 = (pos-1)/2;
if ( p0 == 0 ){
trickledown (0);
return;
}
if ( A[p0] > A[p1] ){
swap ( p0 , p1 );
trickleup ( p1 );
}
else trickledown (p0);
return;
}
void Heap::trickledown ( int pos ){
int p0 = pos;
int p1 = (2*pos)+1;
int p2 = (2*pos)+2;
if ( p1 >= size ) return;
if ( p2 >= size ){
if ( A[p0] < A[p1] ) swap ( p0 , p1 );
return;
}
bool f1 = ( A[p0] < A[p1] );
bool f2 = ( A[p0] < A[p2] );
if ( (A[p1] >= A[p2]) && f1 ){
swap ( p0 , p1 );
trickledown ( p1 );
}
else if ( (A[p1] < A[p2]) && f2 ){
swap ( p0 , p2 );
trickledown ( p2 );
}
return;
}
void Heap::swap ( int pos1 , int pos2 ){
int temp = A[pos1];
A[pos1] = A[pos2];
A[pos2] = temp;
return;
}
The only time I use new to request memory is in the insert function.
The problem occurs when I run my test program compiled from htest.cpp and run both the sections for the h1 test and the h2 test. If I only run one of the two tests the problem does not occur. Here is the test program:
htest.cpp
#include <cstdlib>
#include <iostream>
#include "a4.h"
using namespace std;
int main(){
cout << "\nCreating h1 And h2\n\n";
Heap* h1 = new Heap();
Heap* h2 = new Heap();
cout << "\nAdding 0-6 To h1\n\n";
h1->insert ( 0 ); cout << "h1: "; h1->printheap();
h1->insert ( 1 ); cout << "h1: "; h1->printheap();
h1->insert ( 2 ); cout << "h1: "; h1->printheap();
h1->insert ( 3 ); cout << "h1: "; h1->printheap();
h1->insert ( 4 ); cout << "h1: "; h1->printheap();
h1->insert ( 5 ); cout << "h1: "; h1->printheap();
h1->insert ( 6 ); cout << "h1: "; h1->printheap();
cout << "\nRemoving All Elements From h1\n\n";
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "Removed: " << h1->remove();
cout << " h1: "; h1->printheap();
cout << "\nAdding 6-0 To h2\n\n";
h2->insert ( 6 ); cout << "h2: "; h2->printheap();
h2->insert ( 5 ); cout << "h2: "; h2->printheap();
h2->insert ( 4 ); cout << "h2: "; h2->printheap();
h2->insert ( 3 ); cout << "h2: "; h2->printheap();
h2->insert ( 2 ); cout << "h2: "; h2->printheap();
h2->insert ( 1 ); cout << "h2: "; h2->printheap();
h2->insert ( 0 ); cout << "h2: "; h2->printheap();
cout << "\nRemoving All Elements From h2\n\n";
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "Removed: " << h2->remove();
cout << " h2: "; h2->printheap();
cout << "\n";
return 0;
}
After compiling this program and running it (with the GNU C++ compiler) I get the following output:
Output
Creating h1 And h2
Adding 0-6 To h1
h1: Root -> [ 0 ]
h1: Root -> [ 1 0 ]
h1: Root -> [ 2 0 1 ]
h1: Root -> [ 3 2 1 0 ]
h1: Root -> [ 4 3 1 0 2 ]
h1: Root -> [ 5 3 4 0 2 1 ]
h1: Root -> [ 6 3 5 0 2 1 4 ]
Removing All Elements From h1
Removed: 6 h1: Root -> [ 5 3 4 0 2 1 ]
Removed: 5 h1: Root -> [ 4 3 1 0 2 ]
Removed: 4 h1: Root -> [ 3 2 1 0 ]
Removed: 3 h1: Root -> [ 2 0 1 ]
Removed: 2 h1: Root -> [ 1 0 ]
Removed: 1 h1: Root -> [ 0 ]
Removed: 0 h1: Root -> [ ]
Adding 6-0 To h2
htest: malloc.c:2372: sysmalloc: Assertion `(old_top == (((mbinptr) (((char *) &((av)->bins[((1) - 1) * 2])) - __builtin_offsetof (struct malloc_chunk, fd)))) && old_size == 0) || ((unsigned long) (old_size) >= (unsigned long)((((__builtin_offsetof (struct malloc_chunk, fd_nextsize))+((2 *(sizeof(size_t))) - 1)) & ~((2 *(sizeof(size_t))) - 1))) && ((old_top)->size & 0x1) && ((unsigned long) old_end & pagemask) == 0)' failed.
Aborted (core dumped)
I am wondering why the error appears as it doesn't seem like I am doing anything illegal with requesting memory. I would really appreciate it if someone could try to explain the problem clearly as I only have about one year of C++ experience.
*Edit: Changed the destructor Function to not delete size and capacity and added a delete A line to the insert function before re-sizing the array.
Sad that I didn't spot this one before. I'll throw myself onto my sword after answering.
Heap::insert never sets capacity. It stays 0, so inserts after the first do not trigger if (size == capacity) and do not resize A. As a result, A is run out of bounds and trashes the heap (the the memory heap, not the class Heap).
I recommend a small edit:
void Heap::insert(int item)
{
if (size == capacity)
{
capacity = (capacity * 2) + 1; // Note: many tests have shown that 1.5 is a
// better expansion factor than 2.
int* newpointer = new int[capacity];
for (int i = 0; i < size; i++)
newpointer[i] = A[i];
delete A;
A = newpointer;
}
A[size] = item;
size += 1;
trickleup(size - 1);
return;
}
In addition
Heap* h1 = new Heap();
Heap* h2 = new Heap();
do not need to be dynamically allocated and can be defined as
Heap h1;
Heap h2;
Among other advantages temporary allocation brings, such as improved spacial locality, this does not require the programmer to delete h1 and h2, something that is currently not done.
Now if you will excuse me, I must find where I left my sword.
I am trying to build a 2-dimensional tree based on recursion. I can sum up the algorithm as follows:
> ALGORITHM BuildKDTree(P,depth)
> 1. if P contains only one point
> 2. then return a leaf storing this point
> 3. else if depth is even
> 4. then split P with a vertical line through median x into P1 and P2 (left and right of the line, respectively)
> 5. else split P with a horizontal line through median y into P1 and P2 like before
> 6. RECURSION STEP -> v_left = BuildKDTree(P1,depth+1)
> 7. RECURSION STEP -> v_right = BuildKDTree(P2,depth+1)
> 8. Create a node v storing the line, make v_left the left child and v_right the right child
> 9. return the node v
Since it is the first time I am implementing recursion, I am having quite a lot of problems related to it. The code that I have written so far seems to be in an infinite loop until a segmentation fault is thrown. I was not able to find the bug so far in the code, I would appreciate some help.
// Point
struct Point{
int idx;
double xpos;
double ypos;
};
// Node in the k-d tree
struct Node{
char type;
Point coord;
Node* leftChild;
Node* rightChild;
double split;
};
// Function to find the median point
int findMedian( const vector<Point>& P, char line ){
vector<double> positions;
map<double,int> indices;
// Store the corresponding positions (vertical or horizontal splitting)
switch ( line ){
case 'x':
for( auto p: P ){
positions.push_back( p.xpos );
indices.insert( pair<double,int>(p.xpos,p.idx) );
}
break;
case 'y':
for( auto p: P ){
positions.push_back( p.ypos );
indices.insert( pair<double,int>(p.ypos,p.idx) );
}
break;
}
sort( positions.begin(), positions.end() );
cout << positions.size() << endl;
int middle_pt = (int)floor(positions.size()/2);
cout << indices[positions[middle_pt]] << "\t" << middle_pt << "\t" << positions[middle_pt] << endl;
return ( indices[positions[middle_pt]] );
}
// Function to build a k-d tree
Node buildKDTree( vector<Point> P, int depth ){
Node v;
// if P contains only one point, return a leaf storing this point;
// else if depth is even, split P with a vertical line through the median x ..
// .. into P1 (left of l) and P2 (right of l);
// when the depth is odd, do the vice versa.
if( P.size() == 1 ){
cout << "I am at the leaf!" << endl;
v.coord = P[0];
v.type = 'l';
return v;
}
else if( P.size() < 1 ){
cout << "Points size smaller than 1 " << P.size() << endl;
v.type = 'n';
return v;
}
else{
vector<Point> P1; // left of median
vector<Point> P2; // right of median
if( depth % 2 == 0 ) {
// Verical line through median x
char line = 'x';
v.type = line;
int mid_idx = findMedian( P, line );
v.split = P[mid_idx].xpos;
v.coord = P[mid_idx];
for( auto p: P ){
if( p.xpos < v.split ){
//cout << "Through x, left " << "\t" << p.xpos << "\t" << mid_coord << endl;
P1.push_back( p );
}
else{
//cout << "Through x, right " << "\t" << p.xpos << "\t" << mid_coord << endl;
P2.push_back( p );
}
}
}
else{
// Horizontal line through median y
char line = 'y';
v.type = line;
int mid_idx = findMedian( P, line );
v.split = P[mid_idx].ypos;
v.coord = P[mid_idx];
for( auto p: P ){
if( p.ypos < v.split ){
//cout << "Through y, left " << "\t" << p.ypos << "\t" << mid_coord << endl;
P1.push_back( p );
}
else{
//cout << "Through y, right " << "\t" << p.ypos << "\t" << mid_coord << endl;
P2.push_back( p );
}
}
}
cout << "depth is before at " << depth << endl;
Node temp1 = buildKDTree( P1, depth+1 );
depth = 2;
cout << "depth is after at " << depth << endl;
Node temp2 = buildKDTree( P2, depth+1 );
v.leftChild = &temp1;
v.rightChild = &temp2;
return v;
}
}
// +++++++
int main( int argc, char *argv[] ){
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++ Get the data
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Choose the data to be used
const int nsamp = samplePostData; // Sampling interval
const double dtSamp = nsamp*dt; // Time units between two data points
// Instantiate the data structure
vector<Cell> cells( M );
// Set filenames
char * x_input_file = argv[1]; // Filename for the x data
char * y_input_file = argv[2]; // Filename for the y data
// Read the data to the cells
int sample_cnt = -1;
int sample_data = 1;
char getX = 'x';
readData( cells, x_input_file, getX, sample_cnt, sample_data );
sample_cnt = -1;
char getY = 'y';
readData( cells, y_input_file, getY, sample_cnt, sample_data );
// Set general simulation variables
Data simData;
simData.setNumStep( cells[0].xpos.size() );
simData.setNumDelay( sqrt( cells[0].xpos.size() ) );
simData.setNumTotalDelay();
const double T = simData.getNumStep(); // Total time
const double D = simData.getNumDelay(); // Last delay time
const double TD = simData.getNumTotalDelay(); // Total time - last delay time
// Set the box
Box box;
box.setWidth( boxSize_x );
box.setHeight( boxSize_y );
const double Lx = box.getWidth();
const double Ly = box.getHeight();
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++ Do the analysis
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
vector<Point> points;
int i = 1000;
for( int m = 0; m < M; m++ ){
Point point_temp;
point_temp.xpos = (cells[m].xpos[i] - Lx*ifloor(cells[m].xpos[i]/Lx));
point_temp.ypos = (cells[m].ypos[i] - Ly*ifloor(cells[m].ypos[i]/Ly));
point_temp.idx = m;
points.push_back( point_temp );
}
vector<Node> tree;
int depth = 2;
tree.push_back( buildKDTree( points, depth ) );
cout << tree.size() << endl;
// for( auto j: tree ){
// cout << j.type << " " << j.coord.idx << " " << j.coord.xpos << " " << j.coord.ypos << " " << j.leftChild->coord.idx << " " << j.rightChild->coord.idx << " " << j.leftChild->coord.xpos << " " << j.rightChild->coord.ypos << "\n";
// }
}
The problem is you don't check for marking the same point twice as the median. It could easily be the case that (especially in dense systems) there are more than one point on the median line. If you don't mark explicitly the points that are used as the median before, then you will just use them again which will create infinite recursion in the tree.
My suggestion is to make a boolean array for each point and as you use these points as the median, just mark them, so that you don't use them again later.