Debugging large text files taking too much time - c++

I created a data bag structure. I read the text of a file and convert to lexicographical order. In order to do this I have to convert two string to lower case to compare them(One for the current node and one for the node that is next to it). But my problem is when I have big text files, it has to keep converting the string to lower case for each node I insert and sometimes it takes a long time to process. I was wondering if there are any ways better ways of adjusting this so I can increase the performance time.
void insert(string v)
{
if(head == NULL){ //empty list
head = new BagNode;
head->dataValue = v;
//head->dataCount = 0;
head->next = NULL;
}
else
{
BagNode * n = new BagNode; // new node
n->dataValue = v;
BagNode * current = head; //for traversal
//current = head;
n->dataCount = 0;
if(!isBefore(current->dataValue, v)) //new head
{
n->next = head;
head = n;
}
else{ //mid and tail insert
while(current->next && isBefore(current->next->dataValue,v))
{
current = current->next;
}
n->next = current->next;
current->next = n;
}
}
}
Compare Two Nodes
bool isBefore(string a, string b)
{
transform(a.begin(), a.end(), a.begin(), ::tolower);
transform(b.begin(), b.end(), b.begin(), ::tolower);
if(a == b) {
return true;
}
else {
return false;
}
}

I would remove the repeated calls to transform like this:
void insert(string v)
{
transform(v.begin(), v.end(), v.end(), ::tolower);
if(head == NULL){ //empty list
head = new BagNode;
head->dataValue = v;
//head->dataCount = 0;
head->next = NULL;
}
else
{
BagNode * n = new BagNode; // new node
n->dataValue = v;
BagNode * current = head; //for traversal
//current = head;
n->dataCount = 0;
if(!isBefore(current->dataValue, v)) //new head
{
n->next = head;
head = n;
}
else{ //mid and tail insert
while(current->next && isBefore(current->next->dataValue,v))
{
current = current->next;
}
n->next = current->next;
current->next = n;
}
}
}
bool isBefore(string a, string b)
{
if(a == b) {
return true;
}
else {
return false;
}
}
You can see some addition information on transform here. This suggestions that transform is loop over your range. A way to find something like this would be compile with -pg flags on GCC. This will enable profiling and you would see a hotspots with the functions isBefore and transform.

Related

Cloning a linked list where each node has a random pointer to any other node in linked list

Please help me find what is wrong with my code
(1).
You are given a Singly Linked List with N nodes where each node next pointing to its next node. You are also given M random pointers , where you will be given M number of pairs denoting two nodes a and b i.e. a->arb = b.
The task is to complete the function copyList() which takes one argument the head of the linked list to be cloned and should return the head of the cloned linked list.
NOTE : If their is any node whose arbitrary pointer is not given then its by default null.
I tried to write code for the above problem..but it is not working
// { Driver Code Starts
#include <bits/stdc++.h>
using namespace std;
struct Node {
int data;
Node *next;
Node *arb;
Node(int x) {
data = x;
next = NULL;
arb = NULL;
}
};
void print(Node *root) {
Node *temp = root;
while (temp != NULL) {
int k;
if (temp->arb == NULL)
k = -1;
else
k = temp->arb->data;
cout << temp->data << " " << k << " ";
temp = temp->next;
}
}
Node *copyList(Node *head);
void append(Node **head_ref, Node **tail_ref, int new_data) {
Node *new_node = new Node(new_data);
if (*head_ref == NULL) {
*head_ref = new_node;
} else
(*tail_ref)->next = new_node;
*tail_ref = new_node;
}
bool validation(Node *head, Node *res, Node *cloned_addr,
Node *generated_addr) {
if (generated_addr == cloned_addr) return false;
Node *temp1 = head;
Node *temp2 = res;
int len1 = 0, len2 = 0;
while (temp1 != NULL) {
len1++;
temp1 = temp1->next;
}
while (temp2 != NULL) {
len2++;
temp2 = temp2->next;
}
/*if lengths not equal */
if (len1 != len2) return false;
temp1 = head;
temp2 = res;
while (temp1 != NULL) {
if (temp1->data != temp2->data) return false;
if (temp1->arb != NULL and temp2->arb != NULL) {
if (temp1->arb->data != temp2->arb->data) return false;
} else if (temp1->arb != NULL and temp2->arb == NULL)
return false;
else if (temp1->arb == NULL and temp2->arb != NULL)
return false;
temp1 = temp1->next;
temp2 = temp2->next;
}
return true;
}
/* Driver program to test above function*/
int main() {
int T, i, n, l, k;
Node *generated_addr = NULL;
/*reading input stuff*/
cin >> T;
while (T--) {
generated_addr = NULL;
struct Node *head = NULL, *tail = NULL;
cin >> n >> k;
for (i = 1; i <= n; i++) {
cin >> l;
append(&head, &tail, l);
}
for (int i = 0; i < k; i++) {
int a, b;
cin >> a >> b;
Node *tempA = head;
int count = -1;
while (tempA != NULL) {
count++;
if (count == a - 1) break;
tempA = tempA->next;
}
Node *tempB = head;
count = -1;
while (tempB != NULL) {
count++;
if (count == b - 1) break;
tempB = tempB->next;
}
// when both a is greater than N
if (a <= n) tempA->arb = tempB;
}
/*read finished*/
generated_addr = head;
Node *res = copyList(head);
Node *cloned_addr = res;
cout << validation(head, res, cloned_addr, generated_addr) << endl;
}
return 0;
}
// } Driver Code Ends
/* the node structure is as follows
struct Node {
int data;
Node *next;
Node *arb;
Node(int x) {
data = x;
next = NULL;
arb = NULL;
}
};
*/
// Should return the head of the copied linked list the
// output will be 1 if successfully copied
Node *copyList(Node *head) {
if(!head)
return nullptr;
Node*q=head;
Node*clone=new Node(q->data);
clone->next=0;
clone->arb=q->arb;
Node*p=clone;
Node*r=q;
q=q->next;
while(q)
{
r->next=p;
p->next=new Node(q->data);
p=p->next;
p->next=0;
p->arb=q->arb;
r=q;
q=q->next;
}
r->next=p;
p=clone;
while(p)
{
if(p->arb)
p->arb=p->arb->next;
p=p->next;
}
return clone;
}
The pointers inside the list cannot be assigned until you have constructed the cloned list itself, because until then the nodes to point will not exist.
Therefore, you need two iterations: the first one to clone the list and make a dictionary that associates the original node with the clone, and the second one to update the pointers. The code would look like this:
Node *copyList(Node *head) {
if(!head)
return nullptr;
Node* it1 = head;
Node* clone = new Node;
Node* it2 = clone;
std::map<Node*, Node*> nodeDict;
nodeDict[nullptr] = nullptr;
// first iteration: create the list and the values
while(it1){
it2->data = it1->data;
nodeDict[it1] = it2;
it1 = it1->next;
it2->next = it1 ? new Node: nullptr;
it2 = it2->next;
}
// second iteration: connect the nodes
it1 = head;
it2 = clone;
while(it1){
it2->arb = nodeDict[it1->arb];
it1 = it1->next;
it2 = it2->next;
}
return clone;
}

Debug time issue

I created a data bag structure. I read the text of a file and convert to lexicographical order. In order to do this I have to convert two string to lower case to compare them(One for the current node and one for the node that is next to it). But my problem is when I have big text files, it has to keep converting the string to lower case for each node I insert and sometimes it takes a long time to process. I was wondering if there are any ways better ways of adjusting this so I can increase the performance time.
void insert(string v)
{
if(head == NULL){ //empty list
head = new BagNode;
head->dataValue = v;
//head->dataCount = 0;
head->next = NULL;
}
else
{
BagNode * n = new BagNode; // new node
n->dataValue = v;
BagNode * current = head; //for traversal
//current = head;
n->dataCount = 0;
if(!isBefore(current->dataValue, v)) //new head
{
n->next = head;
head = n;
}
else{ //mid and tail insert
while(current->next && isBefore(current->next->dataValue,v))
{
current = current->next;
}
n->next = current->next;
current->next = n;
}
}
}
Compare the two nodes
bool isBefore(string a, string b)
{
transform(a.begin(), a.end(), a.begin(), ::tolower);
transform(b.begin(), b.end(), b.begin(), ::tolower);
if(a == b) {
return true;
}
else {
return false;
}
}
to debugging a program in C or C++ using g++ or gcc
1) gcc/g++ -g myprogram.c/myprogram.cpp
the result will be a.out
2) gdb a.out
I hope it help you!

Need help solving memory leak error

When I run valgrind, I get one error at method insert for operator new;
I know this probably means that I have to delete node n how I tried so many things to try to delete it but it just gives me even more errors. Please hel.
class key_value_sequences {
public:
struct node{
int key;
vector<int> values;
node* next;
node* prev;
};
key_value_sequences() {
}
~key_value_sequences() {
}
key_value_sequences(const key_value_sequences& A) {
n = A.n;
head = A.head;
tail = A.tail;
v = A.v;
}
key_value_sequences& operator=(const key_value_sequences& A) {
if (this == &A) return *this;
n = A.n;
head = A.head;
tail = A.tail;
v = A.v;
return *this;
}
// YOU SHOULD USE C++ CONTAINERS TO AVOID RAW POINTERS
// IF YOU DECIDE TO USE POINTERS, MAKE SURE THAT YOU MANAGE MEMORY PROPERLY
// IMPLEMENT ME: SHOULD RETURN SIZE OF A SEQUENCE FOR GIVEN KEY
// IF NO SEQUENCE EXISTS FOR A GIVEN KEY RETURN -1
int size(int key) const {
if (find(v.begin(), v.end(), key)!=v.end()) {
node* temp = head;
while(temp != NULL) {
if (temp->key == key) {
return temp->values.size();
}
else temp = temp->next;
}
}
else return -1;
}
// IMPLEMENT ME: SHOULD RETURN POINTER TO A SEQUENCE FOR GIVEN KEY
// IF NO SEQUENCE EXISTS FOR A GIVEN KEY RETURN nullptr
const int* data(int key) const {
if (find(v.begin(), v.end(), key)!=v.end()) {
node* temp = head;
while(temp != NULL) {
if (temp->key == key) {
return temp->values.data();
}
else temp = temp->next;
}
}
else return nullptr;
}
// IMPLEMENT ME: INSERT VALUE INTO A SEQUENCE IDENTIFIED BY GIVEN KEY
void insert(int key, int value) {
if(v.size() == 0) { //empty list
v.push_back(key);
n = new node;
n->prev = NULL;
n->key = key;
n->values.push_back(value);
head = n;
tail = n;
}
else if((find(v.begin(), v.end(), key)!=v.end())) { //if key exists already
node* temp = head;
while(temp != NULL) {
if (temp->key == key) {
temp->values.push_back(value);
break;
}
else temp = temp->next;
}
}
else { //if theres no existing key
v.push_back(key);
n = new node;
n->key = key;
n->values.push_back(value);
n->prev = tail;
tail->next = n;
tail = n;
tail->next = NULL;
}
}
private:
vector<int> v;
node* n;
node* head;
node* tail;
}; // class key_value_sequences
#endif // A3_HPP
In insert method:
if(v.size() == 0) { //empty list
v.push_back(key);
n = new node;
n->prev = NULL;
n->key = key;
n->values.push_back(value);
head = n;
tail = n;
}
You are not setting the head->next to NULL. I suspect that could be the problem while inserting second value.
while(temp != NULL) { // <<<<< Is temp uninitialized?
if (temp->key == key) {
temp->values.push_back(value);
break;
}
else temp = temp->next;
Its pretty dangerous to not initialize the pointer to NULL

Linked list cstring insertion sort

This is my code for an insertion-sort, sorting via the cstring member of the node. The current code only inserts before the head. The code encapsulated in comments is the sorted insertion I am trying to make work. I'm thinking I have to use a predecessor and successor pointer, but maybe it's the comparison that is confusing me. Any help would be appreciated. Thanks!
#include <iostream>
#include "groclist.h"
void insert_item(Grocery_Item_Ptr &head, int quantity, const char name[])
{
bool exists = false;
bool done = false;
Grocery_Item_Ptr temp = NULL;
Grocery_Item_Ptr current = NULL;
Grocery_Item_Ptr pred = NULL;
Grocery_Item_Ptr succ = NULL;
if (head == NULL) {
head = new Grocery_Item;
head->quantity = quantity;
strncpy(head->name, name, MAX_ITEM_NAME_LEN);
head->next = NULL;
return;
}
else {
current = head;
while (current != NULL) {
if (strncmp(current->name, name, MAX_ITEM_NAME_LEN) == 0) {
current->quantity += quantity;
exists = true;
}
current = current->next;
}
if (exists) {
current = NULL;
return;
}
else {
current = head;
}
if (!exists) {
temp = new Grocery_Item;
temp->quantity = quantity;
strncpy(temp->name, name, MAX_ITEM_NAME_LEN);
/*
while (!done || current != NULL) {
if (strncmp(current->name, name, MAX_ITEM_NAME_LEN) < 0) {
pred = current;
succ = current->next;
current->next = temp;
temp->next = succ;
done = true;
}
if (!done) {
current = current->next;
}
}
*/
temp->next = head;
head = temp;
temp = NULL;
}
}
return;
}
One thing you are missing is keeping a reference to the predecessor while searching. This is necessary in order to keep the chain intact.
Here is a draft that should work (currently untested!):
while (!done || current != NULL)
{
//If we found the place to insert
if (strncmp(current->name, name, MAX_ITEM_NAME_LEN) < 0)
{
//If the place to insert was at head
if(pred == NULL)
{
//The new node becomes the head
head = temp;
}
else
{
//Set the previous nodes next to point at this node.
pred->next = temp;
}
//Always set the node to be inserted's next
//pointing to the node we should be inserted before
temp->next = current;
done = true;
}
if (!done)
{
//No match, keep looking but keep an updated pred pointer
pred = current;
current = current->next;
}
}
It's just pseudocode, but maybe it helps:
if(head == NULl)
{
//make newnode as head
}
if(head.name == new name)
{
//update quantity
}
if(head.name <new name)
{
//insert newnode before head
//make newnode as head
}
if (new name > head.name)
{
current = head;
succ = current.next;
while (succ && new name <succ.name)
{
curent = succ;
succ = succ.next
}
if(succ = NULL)
current->next = newnode
else
if new name = succ->name
update quantity
else
curent->next = newnode
newnode->next = succ;
}
I appreciate your input guys, if only for having me think about it in different ways. However my result is quite different, and I must truly thank my whiteboard.
if (strncmp(name, head->name, MAX_ITEM_NAME_LEN) < 0) {
temp->next = head;
head = temp;
temp = NULL;
}
else {
pred = head;
current = head->next;
do {
if (strncmp(name, current->name, MAX_ITEM_NAME_LEN) < 0) {
pred->next = temp;
temp->next = current;
done = true;
}
else if (current->next == NULL) {
current->next = temp;
done = true;
}
else {
pred = current;
current = current->next;
}
if (done) {
pred = NULL;
current = NULL;
}
} while (!done && current != NULL);
}

Double Linked List Insertion Sorting Bug

I have implemented an insertion sort in a double link list (highest to lowest) from a file of 10,000 ints, and output to file in reverse order.
To my knowledge I have implemented such a program, however I noticed in the ouput file, a single number is out of place. Every other number is in correct order.
The number out of place is a repeated number, but the other repeats of this number are in correct order. Its just strange how this number is incorrectly placed. Also the unsorted number is only 6 places out of sync.
I have looked through my program for days now with no idea where the problem lies, so I turn to you for help.
Below is the code in question,
(side note: can my question be deleted by myself? rather my colleges dont thieve my code, if not how can it be deleted?)
void DLLIntStorage::insertBefore(int inValue, node *nodeB)
{
node *newNode;
newNode = new node();
newNode->prev = nodeB->prev;
newNode->next = nodeB;
newNode->value = inValue;
if(nodeB->prev==NULL)
{
this->front = newNode;
}
else
{
nodeB->prev->next = newNode;
}
nodeB->prev = newNode;
}
void DLLIntStorage::insertAfter(int inValue, node *nodeB)
{
node *newNode;
newNode = new node();
newNode->next = nodeB->next;
newNode->prev = nodeB;
newNode->value = inValue;
if(nodeB->next == NULL)
{
this->back = newNode;
}
else
{
nodeB->next->prev = newNode;
}
nodeB->next = newNode;
}
void DLLIntStorage::insertFront(int inValue)
{
node *newNode;
if(this->front == NULL)
{
newNode = new node();
this->front = newNode;
this->back = newNode;
newNode->prev = NULL;
newNode->next = NULL;
newNode->value = inValue;
}
else
{
insertBefore(inValue, this->front);
}
}
void DLLIntStorage::insertBack(int inValue)
{
if(this->back == NULL)
{
insertFront(inValue);
}
else
{
insertAfter(inValue, this->back);
}
}
ifstream& operator>> (ifstream &in, DLLIntStorage &obj)
{
int readInt, counter = 0;
while(!in.eof())
{
if(counter==dataLength) //stops at 10,000
{
break;
}
in >> readInt;
if(obj.front != NULL )
{
obj.insertion(readInt);
}
else
{
obj.insertBack(readInt);
}
counter++;
}
return in;
}
void DLLIntStorage::insertion(int inValue)
{
node* temp;
temp = this->front;
if(temp->value >= inValue)
{
insertFront(inValue);
return;
}
else
{
while(temp->next!=NULL && temp!=this->back)
{
if(temp->value >= inValue)
{
insertBefore(inValue, temp);
return;
}
temp = temp->next;
}
}
if(temp == this->back)
{
insertBack(inValue);
}
}
Thankyou for your time.
I don't like this part
else
{
while(temp->next!=NULL && temp!=this->back)
{
if(temp->value >= inValue)
{
insertBefore(inValue, temp);
return;
}
temp = temp->next;
}
}
if(temp == this->back)
{
insertBack(inValue);
}
Imagine what happens if inValue is greater than all values except this->back->value. It gets inserted at the end instead before this->back. By the way, You are inserting equal integers in the reversed order, they are read. For integers it doesn't matter that much, but it could if You inserted other objects. I would change the code of the insertion method to this:
node* temp;
temp = this->front;
while(temp!=NULL)
{
if(temp->value > inValue)
{
insertBefore(inValue, temp);
return;
}
temp = temp->next;
}
insertBack(inValue);
Just some remarks.
while(!in.eof())
This will not stop the inside of the loop from seeing an EOF error. You want
while ( in >> readInt )
Also,
if(this->front == NULL)
and
void DLLIntStorage::insertion(int inValue)
{
node* temp;
temp = this->front;
if(temp->value >= inValue)
do not mix. Either the front can be NULL, or it cannot. Likewise, you need to decide whether to use temp->next!=NULL or temp!=this->back, but not both, as a loop termination condition.
My guess would be that some inconsistency between multiple linking conventions is causing the errant value to get pushed into the middle of the list.