Segmentation fault after main exit - c++

I Have tried executing the following code in gdb, but with gdb I don`t see any segmentation fault but without gdb If I run the following code in standalone mode segmentation fault occurs. The code is related to range sum query implemented using segment tree.
#include <iostream>
#include <vector>
using namespace std;
class segmentTree
{
private:
vector<int> a;
void constructUtil(vector<int>& , int , int , int );
int queryUtil(int , int , int , int , int );
void updateUtil(int , int , int , int , int );
public:
segmentTree(vector<int> );
int query(int , int );
void update(int , int );
~segmentTree();
};
segmentTree::segmentTree(vector<int> v)
{
int n = v.size();
a.resize((2*n) - 1);
constructUtil(v, 0 , n - 1, 0);
}
segmentTree::~segmentTree()
{
a.clear();
}
void segmentTree::constructUtil(vector<int>& v, int start, int end, int i)
{
if(start == end)
{
a[i] = v[start];
}
else
{
int mid = start + ((end - start) >> 1);
constructUtil(v, start, mid, ((2*i) + 1));
constructUtil(v, mid + 1, end, ((2*i) + 2));
a[i] = a[(2*i) + 1] + a[(2*i) + 2];
}
}
int segmentTree::queryUtil(int ss, int se, int rs, int re, int i)
{
if(se < rs || re < ss)
{
return 0;
}
else if(rs <= ss && se <= re)
{
return a[i];
}
else
{
int sm = ss + ((se - ss) >> 1);
return queryUtil(ss, sm, rs, re, 2*i + 1) + queryUtil(sm + 1, se, rs, re, 2*i + 2);
}
}
int segmentTree::query(int l, int r)
{
int n = ((a.size() + 1) >> 1);
if(l < 0 || r > n-1)
{
return 0;
}
return queryUtil(0, n-1, l , r, 0);
}
void segmentTree::updateUtil(int ss, int se, int i, int si, int x)
{
if(ss > i || se < i)
{
return ;
}
else if(ss == se)
{
a[si] = x;
}
else
{
int sm = ss + ((se - ss) >> 1);
updateUtil(ss, sm, i, (2*si) + 1, x);
updateUtil(sm + 1, se, i, (2*si) + 2, x);
a[si] = a[(2*si) + 1] + a[(2*si) + 2];
}
}
void segmentTree::update(int i, int x)
{
int n = ((a.size() + 1) >> 1);
if(i < 0 || i > n-1)
{
return ;
}
else
{
updateUtil(0, n-1, i, 0, x);
}
}
int main()
{
int arr[] = {1, 3, 5, 7, 9, 11};
int n = sizeof(arr)/sizeof(arr[0]);
vector<int> v(arr, arr + n);
segmentTree st(v);
// Print sum of values in array from index 1 to 3
cout << "Sum of values in given range = " << st.query(1, 3) << endl;
// Update: set arr[1] = 10 and update corresponding
// segment tree nodes
st.update(1, 10);
// Find sum after the value is updated
cout << "Updated sum of values in given range = " << st.query(1, 3) << endl;
return 0;
}

Consider segmentTree::constructUtil with v.size() == 3. Then in the inital call to constructUtil you have start == 0 and end == 2.
Thus we get mid = 1.
In the second recursive call we are then passing start = 1, end = 2 and i = 2. start != end and so the else is executed.
However in the else block a[(2*i)+2] is accessed (by the way, no need for the parantheses there). This index will be 6.
But if you look at the size of a, it was given as 2*n-1. 2*3-1 = 5, so 6 is clearly out-of-bounds.
I don't know what your intentions with the code are, but that there is undefined behavior. You can easily catch it by either using something like valgrind, by replacing a[...] with a.at(...) for debug purposes, by stepping through the code with gdb and actually following all the variables (there does not need to be a segmentation fault for your program to have undefined behavior) or by entering debug std::cout statements with the variable content everywhere that could cause the issue.

Related

Reason for Segmentation Fault During Depth First Search on Tree of Large Size C++

I'm trying to solve https://open.kattis.com/problems/rootedsubtrees and part of the solution requires finding the minimum distance between any 2 nodes on the tree. To do this, I'm using Lowest Common Ancestor as a subroutine. Part of my LCA code uses a DFS to traverse the tree. Somehow, running this code on a line graph of size 200000 leads to a segmentation fault during the DFS section of the code.
#pragma GCC optimize("Ofast")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,fma")
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef vector<int> vi;
#define fast_cin() \
ios_base::sync_with_stdio(false); \
cin.tie(NULL); \
cout.tie(NULL);
int n, q, idx;
vector<int> adjlist[200009];
vector<int> L, E,
H; // depth at traversal index, node at traversal index, first traversal index of node
void dfs(int cur, int depth) {
cout << "dfs " << cur << " " << idx << endl;
H[cur] = idx;
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
dfs(nxt, depth + 1);
E[idx] = cur; // backtrack to current node
L[idx++] = depth;
}
}
class SparseTable { // OOP style
private:
vi A, P2, L2;
vector<vi> SpT; // the Sparse Table
public:
SparseTable() {} // default constructor
SparseTable(vi &initialA) { // pre-processing routine
A = initialA;
int n = (int)A.size();
int L2_n = (int)log2(n) + 1;
P2.assign(L2_n, 0);
L2.assign(1 << L2_n, 0);
for (int i = 0; i <= L2_n; ++i) {
P2[i] = (1 << i); // to speed up 2^i
L2[(1 << i)] = i; // to speed up log_2(i)
}
for (int i = 2; i < P2[L2_n]; ++i)
if (L2[i] == 0) L2[i] = L2[i - 1]; // to fill in the blanks
// the initialization phase
SpT = vector<vi>(L2[n] + 1, vi(n));
for (int j = 0; j < n; ++j) SpT[0][j] = j; // RMQ of sub array [j..j]
// the two nested loops below have overall time complexity = O(n log n)
for (int i = 1; P2[i] <= n; ++i) // for all i s.t. 2^i <= n
for (int j = 0; j + P2[i] - 1 < n; ++j) { // for all valid j
int x = SpT[i - 1][j]; // [j..j+2^(i-1)-1]
int y = SpT[i - 1][j + P2[i - 1]]; // [j+2^(i-1)..j+2^i-1]
SpT[i][j] = A[x] <= A[y] ? x : y;
}
}
int RMQ(int i, int j) {
int k = L2[j - i + 1]; // 2^k <= (j-i+1)
int x = SpT[k][i]; // covers [i..i+2^k-1]
int y = SpT[k][j - P2[k] + 1]; // covers [j-2^k+1..j]
return A[x] <= A[y] ? x : y;
}
};
int LCA(int u, int v, SparseTable &SpT) {
if (H[u] > H[v]) swap(u, v);
return E[SpT.RMQ(H[u], H[v])];
}
int APSP(int u, int v, SparseTable &SpT) {
int ancestor = LCA(u, v, SpT);
return L[H[u]] + L[H[v]] - 2 * L[H[ancestor]];
}
int main() {
fast_cin();
cin >> n >> q;
L.assign(2 * (n + 9), 0);
E.assign(2 * (n + 9), 0);
H.assign(n + 9, -1);
idx = 0;
int u, v;
for (int i = 0; i < n - 1; i++) {
cin >> u >> v;
u--;
v--;
adjlist[u].emplace_back(v);
adjlist[v].emplace_back(u);
}
dfs(0, 0);
SparseTable SpT(L);
ll d;
while (q--) {
cin >> u >> v;
u--;
v--;
d = (ll) APSP(u, v, SpT) + 1;
cout << (ll) n - d + (d) * (d + 1) / 2 << endl;
}
return 0;
}
Using the following Python Code to generate the input of a large line graph
n = 200000
q = 1
print(n, q)
for i in range(1, n):
print(i, i+1)
print(1, 200000)
I get the following last few lines of output before my program crashes.
.
.
.
dfs 174494 174494
dfs 174495 174495
dfs 174496 174496
dfs 174497 174497
dfs 174498 174498
Segmentation fault (core dumped)
Is the problem an issue of exhausting stack space with the recursion or something else?
You posted a lot of code, but here is one obvious error in the SparseMatrix class:
std::vector<int> P2;
//...
P2.assign(L2_n, 0);
for (int i = 0; i <= L2_n; ++i)
{
P2[i] = (1 << i); // <-- Out of bounds access when i == L2_n
To show you the error, change that line of code to this:
P2.at(i) = (1 << i); // <-- Out of bounds access when i == L2_n
You will now get a std::out_of_range exception thrown.
If you write a loop using <=, that loop will be considered suspicious, since a lot of off-by-one and buffer overrun errors occur with loop conditions written this way.
I believe stack exhaustion was the main problem in running the code on my machine. I re-implemented the DFS in an iterative fashion.
stack<tuple<int, int, bool>> st; // cur, depth, first_time
st.push ({0, 0, 1});
while (!st.empty()) {
auto [cur, depth, first_time] = st.top();
st.pop();
if (first_time){
H[cur] = idx;
}
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
st.push({cur, depth, 0});
st.push({nxt, depth+1, 1});
break;
}
}
and my code was able to run the large testcase on my machine.
I'm not sure is this is relevant to the original question, but after this change, the code still flagged a run-time error on the online judge and I eventually realized that the issue was that the sparse table was using too much memory, so I fixed that by avoiding wasted declared but not used memory spaces in rows of the sparse table. Then the online judge deemed it as being too slow. So I reverted the DFS code back to the recursive version, and it was accepted. Note that the accepted solution actually crashes on my machine when running the large testcase... I guess my machine has a more limited stack space than the online grader.
The accepted solution is here
#pragma GCC optimize("Ofast")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,fma")
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef vector<int> vi;
#define fast_cin() \
ios_base::sync_with_stdio(false); \
cin.tie(NULL); \
cout.tie(NULL);
int n, q, idx;
vector<int> adjlist[(int)2e5 + 9];
vector<int> L, E,
H; // depth at traversal index, node at traversal index, first traversal index of node
void dfs(int cur, int depth) {
H[cur] = idx;
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
dfs(nxt, depth + 1);
E[idx] = cur; // backtrack to current node
L[idx++] = depth;
}
}
class SparseTable { // OOP style
private:
vi A, P2, L2;
vector<vi> SpT; // the Sparse Table
public:
SparseTable() {} // default constructor
SparseTable(vi &initialA) { // pre-processing routine
A = initialA;
int n = (int)A.size();
int L2_n = (int)log2(n) + 1;
P2.assign(L2_n + 1, 0);
L2.assign((1 << L2_n) + 1, 0);
for (int i = 0; i <= L2_n; ++i) {
P2[i] = (1 << i); // to speed up 2^i
L2[(1 << i)] = i; // to speed up log_2(i)
}
for (int i = 2; i < P2[L2_n]; ++i)
if (L2[i] == 0) L2[i] = L2[i - 1]; // to fill in the blanks
// the initialization phase
SpT = vector<vi>(L2[n] + 1, vi());
SpT[0] = vi(n, 0);
for (int j = 0; j < n; ++j) SpT[0][j] = j; // RMQ of sub array [j..j]
// the two nested loops below have overall time complexity = O(n log n)
for (int i = 1; P2[i] <= n; ++i) { // for all i s.t. 2^i <= n
SpT[i] = vi(n + 1 - P2[i]); // initialize SpT[i]
for (int j = 0; j + P2[i] - 1 < n; ++j) { // for all valid j
int x = SpT[i - 1][j]; // [j..j+2^(i-1)-1]
int y = SpT[i - 1][j + P2[i - 1]]; // [j+2^(i-1)..j+2^i-1]
SpT[i][j] = A[x] <= A[y] ? x : y;
}
}
}
int RMQ(int i, int j) {
int k = L2[j - i + 1]; // 2^k <= (j-i+1)
int x = SpT[k][i]; // covers [i..i+2^k-1]
int y = SpT[k][j - P2[k] + 1]; // covers [j-2^k+1..j]
return A[x] <= A[y] ? x : y;
}
};
int LCA(int u, int v, SparseTable &SpT) {
if (H[u] > H[v]) swap(u, v);
return E[SpT.RMQ(H[u], H[v])];
}
int APSP(int u, int v, SparseTable &SpT) {
int ancestor = LCA(u, v, SpT);
return L[H[u]] + L[H[v]] - 2 * L[H[ancestor]];
}
int main() {
fast_cin();
cin >> n >> q;
L.assign(2 * (n), 0);
E.assign(2 * (n), 0);
H.assign(n, -1);
idx = 0;
int u, v;
for (int i = 0; i < n - 1; i++) {
cin >> u >> v;
u--;
v--;
adjlist[u].emplace_back(v);
adjlist[v].emplace_back(u);
}
dfs(n - 1, 0);
SparseTable SpT(L);
ll d;
while (q--) {
cin >> u >> v;
u--;
v--;
d = (ll)APSP(u, v, SpT) + 1LL;
cout << (ll)n - d + (d) * (d + 1) / (ll)2 << endl;
}
return 0;
}

How to find indexes of elements in recursive-knapsack?

I'm trying to implement recursive Knapsack which would return 2 things:
Max value we get by filling knapsack.
Indexes of the elements considered for filling the knapsack.
Please note that I don't want to use Dynamic Programming Approach to get this done (by reverse iterating the 2-D matrix to get the indexes of elements). I want to understand how it can be done in recursive knapsack approach?
This is what I have tried below (getting correct MaxValue (op#1) BUT not getting correct list of indexes (op#2)):
int knapsack(vector<int> wt, vector<int> val, int W, int N, vector<int>& idx)
{
if (W == 0 || N == 0) return 0;
if (wt[N - 1] <= W)
{
int consider = val[N - 1] + knapsack(wt, val, W - wt[N - 1], N - 1, idx);
int dontconsider = knapsack(wt, val, W, N - 1, idx);
if (consider > dontconsider)
{
idx.push_back(N-1);
}
return max(consider, dontconsider);
}
else
{
return knapsack(wt, val, W, N - 1, idx);
}
}
int main()
{
vector<int> wt = { 10, 20, 30 };
vector<int> val = { 60, 100, 120 };
int W = 50;
vector<int> idx; // this should retain the indexes of the elements considered for knapsack.
cout << knapsack(wt, val, W, wt.size(), idx);
cout << "\nIndex of elements considered for knapsack: ";
for (int i = 0; i < idx.size(); i++)
cout << idx[i] << " ";
getchar();
return 0;
}
Expected output should be:
220
Index of elements considered for knapsack: 1 2
Please help me. Thank you.
The vector idx is passed by reference: vector<int>& idx.
The issue is that here:
int consider = val[N - 1] + knapsack(wt, val, W - wt[N - 1], N - 1, idx);
int dontconsider = knapsack(wt, val, W, N - 1, idx);
This vector idx is modified twice.
One solution is to create a temporary vector for the first call...
#include <iostream>
#include <vector>
#include <algorithm>
int knapsack(const std::vector<int>& wt, const std::vector<int>& val, int W, int N, std::vector<int>& idx) {
if (W == 0 || N == 0) return 0;
if (wt[N - 1] <= W) {
std::vector<int> idx0;
int consider = val[N - 1] + knapsack(wt, val, W - wt[N - 1], N - 1, idx0);
int dontconsider = knapsack(wt, val, W, N - 1, idx);
if (consider > dontconsider) {
idx = idx0;
idx.push_back(N-1);
return consider;
}
return dontconsider;
}
else {
return knapsack(wt, val, W, N - 1, idx);
}
}
int main()
{
std::vector<int> wt = { 10, 20, 30 };
std::vector<int> val = { 60, 100, 120 };
int W = 50;
std::vector<int> idx; // this should retain the indexes of the elements considered for knapsack.
std::cout << knapsack(wt, val, W, wt.size(), idx);
std::
cout << "\nIndex of elements considered for knapsack: ";
for (int i = 0; i < idx.size(); i++)
std::cout << idx[i] << " ";
std::cout << "\n";
//getchar();
return 0;
}
Damien's diagnostic is correct. The problem is the repetition of already considered entries in the idx vector in the multiple evaluated branches of the iterations.
I would only add a new approach which I think would be more simple. Instead of using a vector, you can simply change your code making idx a set object, which is a container like a vector, but with one important difference: it does not allow repeated elements. So, if you insert an element already contained in a set, it will not add a new one.
#include <iostream>
#include <vector>
#include <algorithm>
#include <set>
using namespace std;
int knapsack(const vector<int> wt, const vector<int> val, const int W, int N, set<int>& idx)
{
if (W == 0 || N == 0) return 0;
if (wt[N - 1] <= W)
{
int consider = val[N - 1] + knapsack(wt, val, W - wt[N - 1], N - 1, idx);
int dontconsider = knapsack(wt, val, W, N - 1, idx);
if (consider > dontconsider)
{
idx.insert(N-1);
}
return max(consider, dontconsider);
}
else
{
return knapsack(wt, val, W, N - 1, idx);
}
}
int main()
{
vector<int> wt = { 10, 20, 30 };
vector<int> val = { 60, 100, 120 };
int W = 50;
set<int> idx; // this should retain the indexes of the elements considered for knapsack.
cout << "Maximum value obtained: " << knapsack(wt, val, W, wt.size(), idx);
cout << "\nIndex of elements considered for knapsack: ";
for (auto i : idx)
cout << i << " ";
cout << endl;
getchar();
return 0;
}
Here is my example code using a set (note I had to make small adjustments, since there are some methods like pusk_back that are vector specific):

How to get the minimum XOR of a given value and the value from a query of range for a given array

Given an array A of n integers and given queries in the form of range [l , r] and a value x, find the minimum of A[i] XOR x where l <= i <= r and x will be different for different queries.
I tried solving this problem using segment trees but I am not sure what type of information I should store in them as x will be different for different queries.
0 < number of queries <= 1e4
0 < n <= 1e4
To solve this I used a std::vector as basis (not an array, or std::array), just for flexibility.
#include <algorithm>
#include <stdexcept>
#include <vector>
int get_xored_max(const std::vector<int>& values, const size_t l, const size_t r, const int xor_value)
{
// check bounds of l and r
if ((l >= values.size()) || (r >= values.size()))
{
throw std::invalid_argument("index out of bounds");
}
// todo check l < r
// create left & right iterators to create a smaller vector
// only containing the subset we're interested in.
auto left = values.begin() + l;
auto right = values.begin() + r + 1;
std::vector<int> range{ left, right };
// xor all the values in the subset
for (auto& v : range)
{
v ^= xor_value;
}
// use the standard library function for finding the iterator to the maximum
// then use the * to dereference the iterator and get the value
auto max_value = *std::max_element(range.begin(), range.end());
return max_value;
}
int main()
{
std::vector<int> values{ 1,3,5,4,2,4,7,9 };
auto max_value = get_xored_max(values, 0u, 7u, 3);
return 0;
}
Approach - Trie + Offline Processing
Time Complexity - O(N32)
Space Complexity - O(N32)
Edit:
This Approach will fail. I guess, we have to use square root decomposition instead of two pointers approach.
I have solved this problem using Trie for finding minimum xor in a range of [l,r]. I solved queries by offline processing by sorting them.
Input format:
the first line has n (no. of elements) and q (no. of queries). the second line has all n elements of the array. each subsequent line has a query and each query has 3 inputs l, r and x.
Example -
Input -
3 3
2 1 2
1 2 3
1 3 2
2 3 5
First, convert all 3 queries into queries sorted by l and r.
converted queries -
1 2 3
1 3 2
2 3 5
Key here is processing over sorted queries using two pointers approach.
#include <bits/stdc++.h>
using namespace std;
const int N = (int)2e4 + 77;
int n, q, l, r, x;
int a[N], ans[N];
vector<pair<pair<int, int>, pair<int, int>>> queries;
// Trie Implementation starts
struct node
{
int nxt[2], cnt;
void newnode()
{
memset(nxt, 0, sizeof(nxt));
cnt = 0;
}
} trie[N * 32];
int tot = 1;
void update(int x, int v)
{
int p = 1;
for (int i = 31; i >= 0; i--)
{
int id = x >> i & 1;
if (!trie[p].nxt[id])
{
trie[++tot].newnode();
trie[p].nxt[id] = tot;
}
p = trie[p].nxt[id];
trie[p].cnt += v;
}
}
int minXor(int x)
{
int res = 0, p = 1;
for (int i = 31; i >= 0; i--)
{
int id = x >> i & 1;
if (trie[p].nxt[id] and trie[trie[p].nxt[id]].cnt)
p = trie[p].nxt[id];
else
{
p = trie[p].nxt[id ^ 1];
res |= 1 << i;
}
}
return res;
}
// Trie Implementation ends
int main()
{
cin >> n >> q;
for (int i = 1; i <= n; i += 1)
{
cin >> a[i];
}
for (int i = 1; i <= q; i += 1)
{
cin >> l >> r >> x;
queries.push_back({{l, r}, {x, i}});
}
sort(queries.begin(), queries.end());
int left = 1, right = 1;
for (int i = 0; i < q; i += 1)
{
int l = queries[i].first.first;
int r = queries[i].first.second;
int x = queries[i].second.first;
int index = queries[i].second.second;
while (left < l)
{
update(a[left], -1);
left += 1;
}
while (right <= r)
{
update(a[right], 1);
right += 1;
}
ans[index] = minXor(x);
}
for (int i = 1; i <= q; i += 1)
{
cout << ans[i] << " \n";
}
return 0;
}
Edit: with O(number of bits) code
Use a binary tree to store the values of A, look here : Minimum XOR for queries
What you need to change is adding to each node the range of indexes for A corresponding to the values in the leafs.
# minimal xor in a range
nbits=16 # Number of bits for numbers
asize=5000 # Array size
ntest=50 # Number of random test
from random import randrange
# Insert element a iindex iin the tree (increasing i only)
def tinsert(a,i,T):
for b in range(nbits-1,-1,-1):
v=((a>>b)&1)
T[v+2].append(i)
if T[v]==[]:T[v]=[[],[],[],[]]
T=T[v]
# Buildtree : builds a tree based on array V
def build(V):
T=[[],[],[],[]] # Init tree
for i,a in enumerate(V): tinsert(a,i,T)
return(T)
# Binary search : is T intersec [a,b] non empty ?
def binfind(T,a,b):
s,e,om=0,len(T)-1,-1
while True:
m=(s+e)>>1
v=T[m]
if v<a:
s=m
if m==om: return(a<=T[e]<=b)
elif v>b:
e=m
if m==om: return(a<=T[s]<=b)
else: return(True) # a<=T(m)<=b
om=m
# Look for the min xor in a give range index
def minx(x,s,e,T):
if s<0 or s>=(len(T[2])+len(T[3])) or e<s: return
r=0
for b in range(nbits-1,-1,-1):
v=((x>>b)&1)
if T[v+2]==[] or not binfind(T[v+2],s,e): # not nr with b set to v ?
v=1-v
T=T[v]
r=(r<<1)|v
return(r)
# Tests the code on random arrays
max=(1<<nbits)-1
for i in range(ntest):
A=[randrange(0,max) for i in range(asize)]
T=build(A)
x,s=randrange(0,max),randrange(0,asize-1)
e=randrange(s,asize)
if min(v^x for v in A[s:e+1])!=x^minx(x,s,e,T):
print('error')
I was able to solve this using segment tree and tries as suggested by #David Eisenstat
Below is an implementation in c++.
I constructed a trie for each segment in the segment tree. And finding the minimum xor is just traversing and matching the corresponding trie using each bit of the query value (here)
#include <bits/stdc++.h>
#define rep(i, a, b) for (int i = a; i < b; i++)
using namespace std;
const int bits = 7;
struct trie {
trie *children[2];
bool end;
};
trie *getNode(void)
{
trie *node = new trie();
node->end = false;
node->children[0] = NULL;
node->children[1] = NULL;
return node;
}
trie *merge(trie *l, trie *r)
{
trie *node = getNode();
// Binary 0:
if (l->children[0] && r->children[0])
node->children[0] = merge(l->children[0], r->children[0]);
else if (!r->children[0])
node->children[0] = l->children[0];
else if (!l->children[0])
node->children[0] = r->children[0];
// Binary 1:
if (l->children[1] && r->children[1])
node->children[1] = merge(l->children[1], r->children[1]);
else if (!r->children[1])
node->children[1] = l->children[1];
else if (!l->children[1])
node->children[1] = r->children[1];
return node;
}
void insert(trie *root, int num)
{
int mask = 1 << bits;
int bin;
rep(i, 0, bits + 1)
{
bin = ((num & mask) >> (bits - i));
if (!root->children[bin]) root->children[bin] = getNode();
root = root->children[bin];
mask = mask >> 1;
}
root->end = true;
}
struct _segTree {
int n, height, size;
vector<trie *> tree;
_segTree(int _n)
{
n = _n;
height = (int)ceil(log2(n));
size = (int)(2 * pow(2, height) - 1);
tree.resize(size);
}
trie *construct(vector<int> A, int start, int end, int idx)
{
if (start == end) {
tree[idx] = getNode();
insert(tree[idx], A[start]);
return tree[idx];
}
int mid = start + (end - start) / 2;
tree[idx] = merge(construct(A, start, mid, 2 * idx + 1),
construct(A, mid + 1, end, 2 * idx + 2));
return tree[idx];
}
int findMin(int num, trie *root)
{
int mask = 1 << bits;
int bin;
int rnum = 0;
int res = 0;
rep(i, 0, bits + 1)
{
bin = ((num & mask) >> (bits - i));
if (!root->children[bin]) {
bin = 1 - bin;
if (!root->children[bin]) return res ^ num;
}
rnum |= (bin << (bits - i));
root = root->children[bin];
if (root->end) res = rnum;
mask = mask >> 1;
}
return res ^ num;
}
int Query(int X, int start, int end, int qstart, int qend, int idx)
{
if (qstart <= start && qend >= end) return findMin(X, tree[idx]);
if (qstart > end || qend < start) return INT_MAX;
int mid = start + (end - start) / 2;
return min(Query(X, start, mid, qstart, qend, 2 * idx + 1),
Query(X, mid + 1, end, qstart, qend, 2 * idx + 2));
}
};
int main()
{
int n, q;
vector<int> A;
vector<int> L;
vector<int> R;
vector<int> X;
cin >> n;
A.resize(n, 0);
rep(i, 0, n) cin >> A[i];
cin >> q;
L.resize(q);
R.resize(q);
X.resize(q);
rep(i, 0, q) cin >> L[i] >> R[i] >> X[i];
//---------------------code--------------------//
_segTree segTree(n);
segTree.construct(A, 0, n - 1, 0);
rep(i, 0, q)
{
cout << segTree.Query(X[i], 0, n - 1, L[i], R[i], 0) << " ";
}
return 0;
}
Time complexity : O((2n - 1)*k + qklogn)
Space complexity : O((2n - 1)*2k)
k -> number of bits

My code gets a TLE (time limit exceeded) on an online judge even though I have coded according to the editorial

This is the question link - QSET - Codechef
This is the editorial link - QSET - Editorial
Basically the question queries for the number of substring in some range [L, R]. I have implemented a segment tree to solve this question. I have closely followed the editorial.
I have created a struct to represent a node of the segment tree.
Can someone explain to me how to make this program faster? I'm guessing faster I/O is the key here. Is that so?
#include <iostream>
#include <vector>
#include <algorithm>
#include <string>
#define ll long long
using namespace std;
struct stnode
{
ll ans; // the answer for this interval
ll pre[3]; // pre[i] denotes number of prefixes of interval which modulo 3 give i
ll suf[3]; // suf[i] denotes number of suffixes of interval which modulo 3 give i
ll total; // sum of interval modulo 3
void setLeaf(int value)
{
if (value % 3 == 0) ans = 1;
else ans = 0;
pre[0] = pre[1] = pre[2] = 0;
suf[0] = suf[1] = suf[2] = 0;
pre[value % 3] = 1;
suf[value % 3] = 1;
total = value % 3;
}
void merge(stnode leftChild, stnode rightChild)
{
ans = leftChild.ans + rightChild.ans;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if ((i + j) % 3 == 0) ans += leftChild.suf[i] * rightChild.pre[j];
pre[0] = pre[1] = pre[2] = 0;
suf[0] = suf[1] = suf[2] = 0;
for (int i = 0; i < 3; i++)
{
pre[i] += leftChild.pre[i] + rightChild.pre[(3 - leftChild.total + i) % 3];
suf[i] += rightChild.suf[i] + leftChild.suf[(3 - rightChild.total + i) % 3];
}
total = (leftChild.total + rightChild.total) % 3;
}
} segtree[400005];
void buildST(string digits, int si, int ss, int se)
{
if (ss == se)
{
segtree[si].setLeaf(digits[ss] - '0');
return;
}
long left = 2 * si + 1, right = 2 * si + 2, mid = (ss + se) / 2;
buildST(digits, left, ss, mid);
buildST(digits, right, mid + 1, se);
segtree[si].merge(segtree[left], segtree[right]);
}
stnode getValue(int qs, int qe, int si, int ss, int se)
{
if (qs == ss && se == qe)
return segtree[si];
stnode temp;
int mid = (ss + se) / 2;
if (qs > mid)
temp = getValue(qs, qe, 2 * si + 2, mid + 1, se);
else if (qe <= mid)
temp = getValue(qs, qe, 2 * si + 1, ss, mid);
else
{
stnode temp1, temp2;
temp1 = getValue(qs, mid, 2 * si + 1, ss, mid);
temp2 = getValue(mid + 1, qe, 2 * si + 2, mid + 1, se);
temp.merge(temp1, temp2);
}
return temp;
}
void updateTree(int si, int ss, int se, int index, int new_value)
{
if (ss == se)
{
segtree[si].setLeaf(new_value);
return;
}
int mid = (ss + se) / 2;
if (index <= mid)
updateTree(2 * si + 1, ss, mid, index, new_value);
else
updateTree(2 * si + 2, mid + 1, se, index, new_value);
segtree[si].merge(segtree[2 * si + 1], segtree[2 * si + 2]);
}
int main()
{
ios_base::sync_with_stdio(false);
int n, m; cin >> n >> m;
string digits; cin >> digits;
buildST(digits, 0, 0, n - 1);
while (m--)
{
int q; cin >> q;
if (q == 1)
{
int x; int y; cin >> x >> y;
updateTree(0, 0, n - 1, x - 1, y);
}
else
{
int c, d; cin >> c >> d;
cout << getValue(c-1, d-1, 0, 0, n - 1).ans << '\n';
}
}
}
I am getting TLE for larger test cases, ie subtasks 3 and 4 (check the problem page). For subtasks 1 and 2, it gets accepted.
[www.codechef.com/viewsolution/5909107] is an accepted solution. It has pretty much the same code structure except that scanf is used instead of cin. But, I turned off the sync_with_stdio so that shouldn't be a differentiator, right?
I found out what was making this program slow. In the buildST function, I pass the string digits. Since the function is recursive, and the input is fairly large, this creates many copies of the string digits thus incurring large overhead.
I declared a char digits[] at the start of the program and modified the method buildST as follows (basically same but without string digits as a parameter:
void buildST(int si, int ss, int se)
{
if (ss == se)
{
segtree[si].setLeaf(digits[ss] - '0');
return;
}
long left = 2 * si + 1, right = 2 * si + 2, mid = (ss + se) / 2;
buildST(left, ss, mid);
buildST(right, mid + 1, se);
segtree[si].merge(segtree[left], segtree[right]);
}
This solution got accepted.

C++ implementation of knapsack branch and bound

I am trying to a C++ implementation of this knapsack problem using branch and bounding. There is a Java version on this website here: Implementing branch and bound for knapsack
I'm trying to make my C++ version print out the 90 that it should, however it's not doing that, instead, it's printing out 5.
Does anyone know where and what the problem may be?
#include <queue>
#include <iostream>
using namespace std;
struct node
{
int level;
int profit;
int weight;
int bound;
};
int bound(node u, int n, int W, vector<int> pVa, vector<int> wVa)
{
int j = 0, k = 0;
int totweight = 0;
int result = 0;
if (u.weight >= W)
{
return 0;
}
else
{
result = u.profit;
j = u.level + 1;
totweight = u.weight;
while ((j < n) && (totweight + wVa[j] <= W))
{
totweight = totweight + wVa[j];
result = result + pVa[j];
j++;
}
k = j;
if (k < n)
{
result = result + (W - totweight) * pVa[k]/wVa[k];
}
return result;
}
}
int knapsack(int n, int p[], int w[], int W)
{
queue<node> Q;
node u, v;
vector<int> pV;
vector<int> wV;
Q.empty();
for (int i = 0; i < n; i++)
{
pV.push_back(p[i]);
wV.push_back(w[i]);
}
v.level = -1;
v.profit = 0;
v.weight = 0;
int maxProfit = 0;
//v.bound = bound(v, n, W, pV, wV);
Q.push(v);
while (!Q.empty())
{
v = Q.front();
Q.pop();
if (v.level == -1)
{
u.level = 0;
}
else if (v.level != (n - 1))
{
u.level = v.level + 1;
}
u.weight = v.weight + w[u.level];
u.profit = v.profit + p[u.level];
u.bound = bound(u, n, W, pV, wV);
if (u.weight <= W && u.profit > maxProfit)
{
maxProfit = u.profit;
}
if (u.bound > maxProfit)
{
Q.push(u);
}
u.weight = v.weight;
u.profit = v.profit;
u.bound = bound(u, n, W, pV, wV);
if (u.bound > maxProfit)
{
Q.push(u);
}
}
return maxProfit;
}
int main()
{
int maxProfit;
int n = 4;
int W = 16;
int p[4] = {2, 5, 10, 5};
int w[4] = {40, 30, 50, 10};
cout << knapsack(n, p, w, W) << endl;
system("PAUSE");
}
I think you have put the profit and weight values in the wrong vectors. Change:
int p[4] = {2, 5, 10, 5};
int w[4] = {40, 30, 50, 10};
to:
int w[4] = {2, 5, 10, 5};
int p[4] = {40, 30, 50, 10};
and your program will output 90.
I believe what you are implementing is not a branch & bound algorithm exactly. It is more like an estimation based backtracking if I have to match it with something.
The problem in your algorithm is the data structure that you are using. What you are doing is to simply first push all the first levels, and then to push all second levels, and then to push all third levels to the queue and get them back in their order of insertion. You will get your result but this is simply searching the whole search space.
Instead of poping the elements with their insertion order what you need to do is to branch always on the node which has the highest estimated bound. In other words you are always branching on every node in your way regardless of their estimated bounds. Branch & bound technique gets its speed benefit from branching on only one single node each time which is most probable to lead to the result (has the highest estimated value).
Example : In your first iteration assume that you have found 2 nodes with estimated values
node1: 110
node2: 80
You are pushing them both to your queue. Your queue became "n2-n1-head" In the second iteration you are pushing two more nodes after branching on node1:
node3: 100
node4: 95
and you are adding them to you queue as well("n4-n3-n2-head". There comes the error. In the next iteration what you are going to get will be node2 but instead it should be node3 which has the highest estimated value.
So if I don't miss something in your code both your implementation and the java implementation are wrong. You should rather use a priority queue (heap) to implement a real branch & bound.
You are setting the W to 16, so the result is 5. The only item you can take into the knapsack is item 3 with profit 5 and weight 10.
#include <bits/stdc++.h>
using namespace std;
struct Item
{
float weight;
int value;
};
struct Node
{
int level, profit, bound;
float weight;
};
bool cmp(Item a, Item b)
{
double r1 = (double)a.value / a.weight;
double r2 = (double)b.value / b.weight;
return r1 > r2;
}
int bound(Node u, int n, int W, Item arr[])
{
if (u.weight >= W)
return 0;
int profit_bound = u.profit;
int j = u.level + 1;
int totweight = u.weight;
while ((j < n) && (totweight + arr[j].weight <= W))
{
totweight = totweight + arr[j].weight;
profit_bound = profit_bound + arr[j].value;
j++;
}
if (j < n)
profit_bound = profit_bound + (W - totweight) * arr[j].value /
arr[j].weight;
return profit_bound;
}
int knapsack(int W, Item arr[], int n)
{
sort(arr, arr + n, cmp);
queue<Node> Q;
Node u, v;
u.level = -1;
u.profit = u.weight = 0;
Q.push(u);
int maxProfit = 0;
while (!Q.empty())
{
u = Q.front();
Q.pop();
if (u.level == -1)
v.level = 0;
if (u.level == n-1)
continue;
v.level = u.level + 1;
v.weight = u.weight + arr[v.level].weight;
v.profit = u.profit + arr[v.level].value;
if (v.weight <= W && v.profit > maxProfit)
maxProfit = v.profit;
v.bound = bound(v, n, W, arr);
if (v.bound > maxProfit)
Q.push(v);
v.weight = u.weight;
v.profit = u.profit;
v.bound = bound(v, n, W, arr);
if (v.bound > maxProfit)
Q.push(v);
}
return maxProfit;
}
int main()
{
int W = 55; // Weight of knapsack
Item arr[] = {{10, 60}, {20, 100}, {30, 120}};
int n = sizeof(arr) / sizeof(arr[0]);
cout << "Maximum possible profit = "
<< knapsack(W, arr, n);
return 0;
}
**SEE IF THIS HELPS**