Matlab Spdiags equivalent in EIGEN C++ - c++

I'm searching for an equivalent of A=Spdiags(B,d,N,N)in C++. This function extracts the diagonals element of the matrix B by taking the columns of B and placing them along the diagonals specified by the vector d. N N are the size of the output matrix A.
I've searched in Eigen, but it seems that it does not exist.
any ideas?

There's no built in method as far as I know but it's not too hard to do this by building a new matrix via indices. Notice that the kth diagonal runs from index (max(1, 1-k), max(1, 1-k)+k) to (min(m, n-k), min(m, n-k)+k)
template <typename Scalar>
Eigen::SparseMatrix<Scalar> spdiags(const Matrix<Scalar, -1, -1>& B, const Eigen::Matrix<int, -1, 1>& d, size_t m, size_t n) {
Eigen::SparseMatrix<Scalar> A(m,n);
typedef Eigen::Triplet<Scalar> T;
std::vector<T> triplets;
triplets.reserve(std::min(m,n)*d.size());
for (int k = 0; k < d.size(); k++) {
int i_min = std::max(0, -d(k));
int i_max = std::min(m - 1, n - d(k) - 1);
int B_idx_start = m >= n ? d(k) : 0;
for (int i = i_min; i <= i_max; i++) {
triplets.push_back( T(i, i+k, B(B_idx_start + i, k)) );
}
}
A.setFromTriplets(triplets.begin(), triplets.end());
return A;
}
Note I haven't tested this but you get the idea. The first index into B is a little weird but I think it's right.
Other version, spdiags(A):
Eigen::MatrixXd spdiags(const Eigen::SparseMatrix<double>& A) {
// find nonzero diagonals by iterating over all nonzero elements
// d(i) = 1 if the ith diagonal of A contains a nonzero, 0 else
Eigen::VectorXi d = Eigen::VectorXi::Zero(A.rows() + A.cols() - 1);
for (int k=0; k < A.outerSize(); ++k) {
for (SparseMatrix<double>::InnerIterator it(A,k); it; ++it) {
d(it.col() - it.row() + A.rows() - 1) = 1;
}
}
int num_diags = d.sum();
Eigen::MatrixXd B(std::min(A.cols(), A.rows()), num_diags);
// fill B with diagonals
int B_col_idx = 0;
int B_row_sign = A.rows() >= A.cols() ? 1 : -1;
for (int i = 1 - A.rows(); i <= A.cols() - 1; i++) {
if (d(i + A.rows() - 1)) {
const auto& diag = A.diagonal(i);
int B_row_start = std::max(0, B_row_sign * i);
B.block(B_row_start, B_col_idx, diag.size(), 1) = diag;
B_col_idx++;
}
}
return B;
}
same disclaimer: haven't tested, but should work. Replace double with template <typename Scalar> as before if you want

here is a solution i've made. I've implemented the diagonal(i) because this function is not taken account by my eigen version (how can i know which version i use?). I obtain a good results with this, but i don't know if can more optimize it :
void spdiags(Eigen::SparseMatrix<double> A)
{
//Extraction of the diagnols before the main diagonal
vector<double> vec1; int flag=0;int l=0;
int i=0; int j=0; vector<vector<double> > diagD;
vector<vector<double> > diagG; int z=0; int z1=0;
for(int i=0;i<A.rows();i++)
{l=i;
do
{
if(A.coeff(l,j)!=0)
flag=1;
vec1.push_back(A.coeff(l,j));
l++;j++;
}while(l<A.rows() && j<A.cols());
if(flag==1) {diagG.resize(diagG.size()+1);diagG[z]=vec1; z++; }
vec1.clear(); l=0;j=0; flag=0; cout<<endl;
}
flag=0;z=0; vec1.clear();
// Extraction of the diagonals after the main diagonal
for(int i=1;i<A.cols();i++)
{l=i;
do
{
if(A.coeff(j,l)!=0)
flag=1;
vec1.push_back(A.coeff(j,l));
l++;j++;
}while(l<A.cols() && j<A.rows());
if(flag==1) {diagD.resize(diagD.size()+1);diagD[z]=vec1; z++; }
vec1.clear(); l=0;j=0; flag=0; cout<<endl;
}
// End extraction of the diagonals
Eigen::VectorXi d = Eigen::VectorXi::Zero(A.rows() + A.cols() - 1);
for (int k=0; k < A.outerSize(); ++k)
{
for (SparseMatrix<double>::InnerIterator it(A,k); it; ++it)
{
d(it.col() - it.row() + A.rows() - 1) = 1;
}
}
int num_diags = d.sum();
Eigen::MatrixXd B(std::min(A.cols(), A.rows()), num_diags);
// fill B with diagonals
Eigen::ArrayXd v;
int B_col_idx = 0;
int B_row_sign = A.rows() >= A.cols() ? 1 : -1;
int indG=diagG.size()-1; int indD=0;
for (int i = 1 - A.rows(); i <=A.cols() - 1; i++)
{
if (d(i + A.rows() - 1))
{
if(i<1)
{ v.resize(diagG[indG].size());
for(int i=0;i<diagG[indG].size();i++)
{
v(i)=diagG[indG][i];
}
int B_row_start = std::max(0, B_row_sign * i);
B.block(B_row_start, B_col_idx, diagG[indG].size(), 1) = v;
B_col_idx++;
indG--;
}
else
{
v.resize(diagD[indD].size());
for(int i=0;i<diagD[indD].size();i++)
{
v(i)=diagD[indD][i] ;
}
int B_row_start = std::max(0, B_row_sign * i);
B.block(B_row_start, B_col_idx, diagD[indD].size(), 1) = v;
B_col_idx++;
indD++;
}
}
}
cout<<B<<endl; //the result of the function
}//end of the function

Related

double free or corruption (!prev) occured using the vector in C++

When I was writing the Iteration Merge Sort Code using the vector in C++,I met the error double free or corruption (!prev) when the vector size n is between 17 and 30;When n is smaller than 17 or larger than 30,there is no error.And the code Implemented sorting function.But I didn't use free or delete,why this error will occur?
#include <iostream>
#include <vector>
#include <cstdlib>
#include <random>
using namespace std;
// v1[left...middle] and v1[middle+1...right] are Ordered,merge them to v2;
void Merge(vector<int> &v1, vector<int> &v2, int left, int middle, int right)
{
int i = left, j = left, k = middle + 1;
while (i <= middle && k <= right)
{
if (v1[i] <= v1[k])
v2[j] = v1[i++];
else
v2[j] = v1[k++];
++j;
}
while (i <= middle)
{
v2[j++] = v1[i++];
}
while (k <= right)
{
v2[j++] = v1[k++];
}
}
// Merge adjacent subsequences of length interval in v1 into v2
void MergePass(vector<int> &v1, vector<int> &v2, int interval)
{
int i = 0, k = v1.size() - 2 * interval + 1;
while (i < k)
{
Merge(v1, v2, i, i + interval - 1, i + 2 * interval - 1);
i += 2 * interval;
}
/*for (i = 0; i < v1.size() - 2 * interval + 1; i += 2 * interval)
{
Merge(v1, v2, i, i + interval - 1, i + 2 * interval - 1);
}*/
if (i < v1.size() - interval)
Merge(v1, v2, i, i + interval - 1, v1.size() - 1);
else
{
for (; i < v1.size(); i++)
{
v2[i] = v1[i];
}
}
}
void MergeSort(vector<int> &v)
{
int k= v.size();
vector<int> v1(k);
int i = 1;
while (i < v.size())
{
MergePass(v, v1, i);
i *= 2;
MergePass(v1, v, i);
i *= 2;
}
}
int main()
{
vector<int> v;
int n;
cout << "input the size:";
cin >> n;
for (int j = 0; j < n; j++)
{
v.push_back(rand() % 1000 + 1);
}
MergeSort(v);
I have solved this problem.
In MergePass function
if (i < v1.size() - interval)
Merge(v1, v2, i, i + interval - 1, v1.size() - 1);
the size() returns std::size_t,when v1.size()<interval,v1.size()<interval will get a very large positive integer,Merge(v1, v2, i, i + interval - 1, v1.size() - 1) will still be run.So there is out-of-bounds.I change it to
if (i < (int)v1.size() - interval)
Merge(v1, v2, i, i + interval - 1, v1.size() - 1);
Thanks to #PaulMcKenzie and everyone helped me.

Reason for Segmentation Fault During Depth First Search on Tree of Large Size C++

I'm trying to solve https://open.kattis.com/problems/rootedsubtrees and part of the solution requires finding the minimum distance between any 2 nodes on the tree. To do this, I'm using Lowest Common Ancestor as a subroutine. Part of my LCA code uses a DFS to traverse the tree. Somehow, running this code on a line graph of size 200000 leads to a segmentation fault during the DFS section of the code.
#pragma GCC optimize("Ofast")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,fma")
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef vector<int> vi;
#define fast_cin() \
ios_base::sync_with_stdio(false); \
cin.tie(NULL); \
cout.tie(NULL);
int n, q, idx;
vector<int> adjlist[200009];
vector<int> L, E,
H; // depth at traversal index, node at traversal index, first traversal index of node
void dfs(int cur, int depth) {
cout << "dfs " << cur << " " << idx << endl;
H[cur] = idx;
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
dfs(nxt, depth + 1);
E[idx] = cur; // backtrack to current node
L[idx++] = depth;
}
}
class SparseTable { // OOP style
private:
vi A, P2, L2;
vector<vi> SpT; // the Sparse Table
public:
SparseTable() {} // default constructor
SparseTable(vi &initialA) { // pre-processing routine
A = initialA;
int n = (int)A.size();
int L2_n = (int)log2(n) + 1;
P2.assign(L2_n, 0);
L2.assign(1 << L2_n, 0);
for (int i = 0; i <= L2_n; ++i) {
P2[i] = (1 << i); // to speed up 2^i
L2[(1 << i)] = i; // to speed up log_2(i)
}
for (int i = 2; i < P2[L2_n]; ++i)
if (L2[i] == 0) L2[i] = L2[i - 1]; // to fill in the blanks
// the initialization phase
SpT = vector<vi>(L2[n] + 1, vi(n));
for (int j = 0; j < n; ++j) SpT[0][j] = j; // RMQ of sub array [j..j]
// the two nested loops below have overall time complexity = O(n log n)
for (int i = 1; P2[i] <= n; ++i) // for all i s.t. 2^i <= n
for (int j = 0; j + P2[i] - 1 < n; ++j) { // for all valid j
int x = SpT[i - 1][j]; // [j..j+2^(i-1)-1]
int y = SpT[i - 1][j + P2[i - 1]]; // [j+2^(i-1)..j+2^i-1]
SpT[i][j] = A[x] <= A[y] ? x : y;
}
}
int RMQ(int i, int j) {
int k = L2[j - i + 1]; // 2^k <= (j-i+1)
int x = SpT[k][i]; // covers [i..i+2^k-1]
int y = SpT[k][j - P2[k] + 1]; // covers [j-2^k+1..j]
return A[x] <= A[y] ? x : y;
}
};
int LCA(int u, int v, SparseTable &SpT) {
if (H[u] > H[v]) swap(u, v);
return E[SpT.RMQ(H[u], H[v])];
}
int APSP(int u, int v, SparseTable &SpT) {
int ancestor = LCA(u, v, SpT);
return L[H[u]] + L[H[v]] - 2 * L[H[ancestor]];
}
int main() {
fast_cin();
cin >> n >> q;
L.assign(2 * (n + 9), 0);
E.assign(2 * (n + 9), 0);
H.assign(n + 9, -1);
idx = 0;
int u, v;
for (int i = 0; i < n - 1; i++) {
cin >> u >> v;
u--;
v--;
adjlist[u].emplace_back(v);
adjlist[v].emplace_back(u);
}
dfs(0, 0);
SparseTable SpT(L);
ll d;
while (q--) {
cin >> u >> v;
u--;
v--;
d = (ll) APSP(u, v, SpT) + 1;
cout << (ll) n - d + (d) * (d + 1) / 2 << endl;
}
return 0;
}
Using the following Python Code to generate the input of a large line graph
n = 200000
q = 1
print(n, q)
for i in range(1, n):
print(i, i+1)
print(1, 200000)
I get the following last few lines of output before my program crashes.
.
.
.
dfs 174494 174494
dfs 174495 174495
dfs 174496 174496
dfs 174497 174497
dfs 174498 174498
Segmentation fault (core dumped)
Is the problem an issue of exhausting stack space with the recursion or something else?
You posted a lot of code, but here is one obvious error in the SparseMatrix class:
std::vector<int> P2;
//...
P2.assign(L2_n, 0);
for (int i = 0; i <= L2_n; ++i)
{
P2[i] = (1 << i); // <-- Out of bounds access when i == L2_n
To show you the error, change that line of code to this:
P2.at(i) = (1 << i); // <-- Out of bounds access when i == L2_n
You will now get a std::out_of_range exception thrown.
If you write a loop using <=, that loop will be considered suspicious, since a lot of off-by-one and buffer overrun errors occur with loop conditions written this way.
I believe stack exhaustion was the main problem in running the code on my machine. I re-implemented the DFS in an iterative fashion.
stack<tuple<int, int, bool>> st; // cur, depth, first_time
st.push ({0, 0, 1});
while (!st.empty()) {
auto [cur, depth, first_time] = st.top();
st.pop();
if (first_time){
H[cur] = idx;
}
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
st.push({cur, depth, 0});
st.push({nxt, depth+1, 1});
break;
}
}
and my code was able to run the large testcase on my machine.
I'm not sure is this is relevant to the original question, but after this change, the code still flagged a run-time error on the online judge and I eventually realized that the issue was that the sparse table was using too much memory, so I fixed that by avoiding wasted declared but not used memory spaces in rows of the sparse table. Then the online judge deemed it as being too slow. So I reverted the DFS code back to the recursive version, and it was accepted. Note that the accepted solution actually crashes on my machine when running the large testcase... I guess my machine has a more limited stack space than the online grader.
The accepted solution is here
#pragma GCC optimize("Ofast")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,fma")
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef vector<int> vi;
#define fast_cin() \
ios_base::sync_with_stdio(false); \
cin.tie(NULL); \
cout.tie(NULL);
int n, q, idx;
vector<int> adjlist[(int)2e5 + 9];
vector<int> L, E,
H; // depth at traversal index, node at traversal index, first traversal index of node
void dfs(int cur, int depth) {
H[cur] = idx;
E[idx] = cur;
L[idx++] = depth;
for (int &nxt : adjlist[cur]) {
if (H[nxt] != -1) continue;
dfs(nxt, depth + 1);
E[idx] = cur; // backtrack to current node
L[idx++] = depth;
}
}
class SparseTable { // OOP style
private:
vi A, P2, L2;
vector<vi> SpT; // the Sparse Table
public:
SparseTable() {} // default constructor
SparseTable(vi &initialA) { // pre-processing routine
A = initialA;
int n = (int)A.size();
int L2_n = (int)log2(n) + 1;
P2.assign(L2_n + 1, 0);
L2.assign((1 << L2_n) + 1, 0);
for (int i = 0; i <= L2_n; ++i) {
P2[i] = (1 << i); // to speed up 2^i
L2[(1 << i)] = i; // to speed up log_2(i)
}
for (int i = 2; i < P2[L2_n]; ++i)
if (L2[i] == 0) L2[i] = L2[i - 1]; // to fill in the blanks
// the initialization phase
SpT = vector<vi>(L2[n] + 1, vi());
SpT[0] = vi(n, 0);
for (int j = 0; j < n; ++j) SpT[0][j] = j; // RMQ of sub array [j..j]
// the two nested loops below have overall time complexity = O(n log n)
for (int i = 1; P2[i] <= n; ++i) { // for all i s.t. 2^i <= n
SpT[i] = vi(n + 1 - P2[i]); // initialize SpT[i]
for (int j = 0; j + P2[i] - 1 < n; ++j) { // for all valid j
int x = SpT[i - 1][j]; // [j..j+2^(i-1)-1]
int y = SpT[i - 1][j + P2[i - 1]]; // [j+2^(i-1)..j+2^i-1]
SpT[i][j] = A[x] <= A[y] ? x : y;
}
}
}
int RMQ(int i, int j) {
int k = L2[j - i + 1]; // 2^k <= (j-i+1)
int x = SpT[k][i]; // covers [i..i+2^k-1]
int y = SpT[k][j - P2[k] + 1]; // covers [j-2^k+1..j]
return A[x] <= A[y] ? x : y;
}
};
int LCA(int u, int v, SparseTable &SpT) {
if (H[u] > H[v]) swap(u, v);
return E[SpT.RMQ(H[u], H[v])];
}
int APSP(int u, int v, SparseTable &SpT) {
int ancestor = LCA(u, v, SpT);
return L[H[u]] + L[H[v]] - 2 * L[H[ancestor]];
}
int main() {
fast_cin();
cin >> n >> q;
L.assign(2 * (n), 0);
E.assign(2 * (n), 0);
H.assign(n, -1);
idx = 0;
int u, v;
for (int i = 0; i < n - 1; i++) {
cin >> u >> v;
u--;
v--;
adjlist[u].emplace_back(v);
adjlist[v].emplace_back(u);
}
dfs(n - 1, 0);
SparseTable SpT(L);
ll d;
while (q--) {
cin >> u >> v;
u--;
v--;
d = (ll)APSP(u, v, SpT) + 1LL;
cout << (ll)n - d + (d) * (d + 1) / (ll)2 << endl;
}
return 0;
}

Translate 2D Sequence in Array; C++

I want to implement a function that is able to translate(rotate) the sequence of a 2d array to the desired destination index. A, B, and C represents the length of the sequence. Source is the beginning of the sequence to be rotated. Source in the example below would be A. Dst is the index of the beginning of the target moving. Input/Output example: Before:
double A[][2] = { {0,0}, {1,1}, {2,2}, {3,3}, {4,4}, {5,5}, {6,6}, {7,7} };
^dst A B C
Calling function translate(A, 8, 5, 3, 1);
{ {0,0}, {5,5}, {6,6}, {7,7}, {1,1}, {2,2}, {3,3}, {4,4} };
A B C
When I run my code, the final index doesn't make it to the output array. What am I missing on the conditions?
/*
A-list of locations; 2d array
n- number of cities
src-index of the beginning of the sequence to be moved
len- length of sequence to translate
dst-index of the beginning of the target of moving
*/
void translate ( double A[][2], int n, int src, int len, int dst ) {
vector<vector<int>> variable;
variable.resize(n);
//to move sequence
for(int i = 0; i <= n - 1; i++) {
int source_index = (src + i)%(n - 1);
//cout << source_index << endl;
int destination_index = (dst - 1 + i)%(n - 1) + 1;
vector<int> variable2;
variable2.push_back(A[source_index][0]);
variable2.push_back(A[source_index][1]);
variable.at(destination_index) = variable2;
}
//get vector into array
for(int i = 1; i < n; i++){
A[i][0] = variable[i][0];
A[i][1] = variable[i][1];
}
}
My output:
(0, 0),(5, 5),(6, 6),(0, 0),(1, 1),(2, 2),(3, 3),(4, 4)
After working through it, I think I finally got it.
void translate ( double A[][2], int n, int src, int len, int dst ) {
vector<vector<int>> variable;
variable.resize(n);
//to move sequence
for(int i = 0; i <= n - 1; i++) {
int source_index = (src - 1 + i)%(n - 1) + 1;
//cout << source_index << endl;
int destination_index = (dst - 1 + i)%(n - 1) + 1;
vector<int> variable2;
variable2.push_back(A[source_index][0]);
variable2.push_back(A[source_index][1]);
variable.at(destination_index) = variable2;
}
//get vector into array
for(int i = 1; i < n; i++){
A[i][0] = variable[i][0];
A[i][1] = variable[i][1];
}
}

OpenMP Race Condition when finding Closest Pair

I'm doing an assignment to find the closest pair between two disjoint sets A and B. I'm using OpenMP to parallelize the recursion of the algorithm, but I am running into some data races. I am very new to OpenMP, so I think it has something to do with incorrect privating/sharing of variables. I have put the full algorithm below:
float OMPParticleSim::efficient_closest_pair(int n, vector<Particle> & p, vector<Particle> & q)
{
// brute force
if(n <= 3) {
float m = numeric_limits<float>::max();
for(int i = 0; i < n - 2; i++) {
for(int j = i + 1; j < n - 1; j++) {
if((set_A.find(p[i].id) != set_A.end() && set_A.find(p[j].id) != set_A.end()) || (set_B.find(p[i].id) != set_B.end() && set_B.find(p[j].id) != set_B.end())) {
continue;
}
float distsq = pow(p[i].x - p[j].x, 2) + pow(p[i].y - p[j].y, 2) + pow(p[i].z - p[j].z, 2);
pair<pair<Particle, Particle>, float> pa = make_pair(make_pair(p[i], p[j]), sqrt(distsq));
#pragma omp critical
insert(pa);
m = min(m, distsq);
}
}
return sqrt(m);
}
// copy first ceil(n/2) points of p to pl
vector<Particle> pl;
int ceiling = ceil(n/2);
for(int i = 0; i < ceiling; i++) {
pl.push_back(p[i]);
}
// copy first ceil(n/2) points of q to ql
vector<Particle> ql;
for(int i = 0; i < ceiling; i++) {
ql.push_back(q[i]);
}
// copy remaining floor(n/2) points of p to pr
vector<Particle> pr;
for(int i = ceiling; i < p.size(); i++) {
pr.push_back(p[i]);
}
// copy remaining floor(n/2) points of q to qr
vector<Particle> qr;
for(int i = ceiling; i < q.size(); i++) {
qr.push_back(p[i]);
}
float dl, dr, d;
#pragma omp task firstprivate(pl, ql, p, q, n) private(dl) shared(closest_pairs)
dl = efficient_closest_pair(ceil(n / 2), pl, ql);
#pragma omp task firstprivate(pl, ql, p, q, n) private(dr) shared(closest_pairs)
dr = efficient_closest_pair(ceil(n / 2), pr, qr);
#pragma omp taskwait
d = min(dl, dr);
float m = p[ceil(n / 2) - 1].x;
vector<Particle> s;
for(int i = 0; i < q.size(); i++) {
if(fabs(q[i].x - m) < d) {
s.push_back(Particle(q[i]));
}
}
int num = s.size();
float dminsq = d * d;
for (int i = 0; i < num - 2; i++) {
int k = i + 1;
while(k <= num - 1 && pow(s[k].y - s[i].y, 2) < dminsq) {
if((set_A.find(s[i].id) != set_A.end() && set_A.find(s[k].id) != set_A.end()) || (set_B.find(s[i].id) != set_B.end() && set_B.find(s[k].id) != set_B.end())) {
k++;
continue;
}
float dist = pow(s[k].x - s[i].x, 2) + pow(s[k].y - s[i].y, 2) + pow(s[k].z - s[i].z, 2);
pair<pair<Particle, Particle>, float> pa = make_pair(make_pair(s[i], s[k]), sqrt(dist));
#pragma omp critical
insert(pa);
dminsq = min(dist, dminsq);
k++;
}
}
return sqrt(dminsq);
}
The insert method looks like this:
void OMPParticleSim::insert(pair<pair<Particle, Particle>, float> & pair) {
if(closest_pairs.size() == 0) {
closest_pairs.push_back(pair);
return;
}
for(int i = 0; i < closest_pairs.size(); ++i) {
if(closest_pairs[i].second > pair.second) {
closest_pairs.insert(closest_pairs.begin() + i, 1, pair);
break;
}
}
if(closest_pairs.size() > k) {
closest_pairs.pop_back();
}
}
The start of the parallel region is here:
void OMPParticleSim::do_closest_pair(int num_threads) {
vector<Particle> p = set;
// presort on x
sort(p.begin(), p.end(), sortxomp);
vector<Particle> q = p;
// presort on y
sort(q.begin(), q.end(), sortyomp);
float cp;
#pragma omp parallel num_threads(num_threads)
{
#pragma omp single
{
cp = efficient_closest_pair(set.size(), p, q);
}
}
sort(closest_pairs.begin(), closest_pairs.end(), sortpairsomp);
}
All of the results are stored in a list closest_pairs and output to a file. The reason I know there are data races is because some of the Particle id's are negative (all of them start positive), and running the program multiple times results in different values being written to the file. Any help would be great!
The error was the dl and dr should have been shared between the tasks.

How to solve another version of Kakuro

The problem is, in a table of (h+1)*(w+1),the first row contains w values: a[1] ... a[w] which fill in the 2rd ... (w+1)th column. The first column contains h values: b[1] ... b[h] which fill in the 2rd ... (h+1)th row. sum(a[i]) is equal to sum(b[i]).
The question is to give one possible solution: result, so that sum(result[i][K]) for a certain K, is equal to a[i] with result[i][K] != result[j][K] (i != j and 0 < i < h+1). And the same rule for rows. PS: All the integers are positive.
For example:
a[] = {10, 3, 3}, b[] = {9, 7}
// 10 3 3
// 9 6 2 1
// 7 4 1 2
result = {6, 2, 1;
4, 1, 2}
It is like Kakuro but not the same. I cannot figure out which algorithm to apply, if anyone knows how to solve it, please give me some help. Thanks a lot.
You can always solve your problem with backtracking. Basic idea here: from top-to-bottom and left-to-right try a valid value in the partially filled table, backtrack when this value doesn't lead to a solution.
Minimal example in C++ with annotated solve:
#include <algorithm>
#include <iostream>
#include <iterator>
#include <memory>
class Problem {
public:
template<class AIter, class BIter>
Problem(AIter abegin, AIter aend, BIter bbegin, BIter bend)
: m_width(std::distance(abegin, aend))
, m_height(std::distance(bbegin, bend))
, m_table(new int[(m_width + 1) * (m_height + 1)])
{
std::fill(m_table.get(), m_table.get() + (m_width + 1) * (m_height + 1), 0);
for(size_t i = 0; i < m_width; ++i)
m_table[i + 1] = *abegin++;
for(size_t j = 0; j < m_height; ++j)
m_table[(j + 1) * (m_width + 1)] = *bbegin++;
}
bool Solve() { return solve(0, 0); }
int operator()(size_t i, size_t j) const;
private:
int a(size_t i) const { return m_table[i + 1]; }
int b(size_t j) const { return m_table[(j + 1) * (m_width + 1)]; }
int get(size_t i, size_t j) const { return m_table[(j + 1) * (m_width + 1) + i + 1]; }
void set(size_t i, size_t j, int value) { m_table[(j + 1) * (m_width + 1) + i + 1] = value; }
int colSum(size_t i) const;
int rowSum(size_t j) const;
bool solve(size_t i, size_t j);
size_t m_width, m_height;
std::unique_ptr<int[]> m_table; // (width + 1) x (height + 1)
};
int Problem::colSum(size_t i) const {
int sum = 0;
for(size_t j = 0; j < m_height; ++j)
sum += get(i, j);
return sum;
}
int Problem::rowSum(size_t j) const {
int sum = 0;
for(size_t i = 0; i < m_width; ++i)
sum += get(i, j);
return sum;
}
// solves column-wise using backtracking
bool Problem::solve(size_t i, size_t j) {
size_t width = m_width, height = m_height;
// past last column?
if(i >= width) {
// found solution
return true;
}
// remainder in column and row
int remColSum = a(i) - colSum(i);
int remRowSum = b(j) - rowSum(j);
// early break
if(remColSum <= 0 || remRowSum <= 0)
return false;
// starting at the minimal required value (1 or remColSum if on last row)
int startValue = j + 1 < height ? 1 : remColSum;
// remaining row sum cannot support the starting value
if(remRowSum < startValue)
return false;
// end value minimum remaining sum
int endValue = remColSum < remRowSum ? remColSum : remRowSum;
// on last element must equal starting value
if(i + 1 == width && j + 1 == height && startValue != endValue)
return false;
// column-wise i.e. next cell is (i, j + 1) wrapped
int nextI = i + (j + 1) / height;
int nextJ = (j + 1) % height;
for(int value = startValue; value <= endValue; ++value) {
bool valid = true;
// check row up to i
for(size_t u = 0; u < i && valid; ++u)
valid = (get(u, j) != value);
// check column up to j
for(size_t v = 0; v < j && valid; ++v)
valid = (get(i, v) != value);
if(!valid) {
// value is invalid in partially filled table
continue;
}
// value produces a valid, partially filled table, now try recursing
set(i, j, value);
// upon first solution break
if(solve(nextI, nextJ))
return true;
}
// upon failure backtrack
set(i, j, 0);
return false;
}
int Problem::operator()(size_t i, size_t j) const {
return get(i, j);
}
int main() {
int a[] = { 10, 3, 3 };
int b[] = { 9, 7 };
size_t width = sizeof(a) / sizeof(*a);
size_t height = sizeof(b) / sizeof(*b);
Problem problem(a, a + width, b, b + height);
if(!problem.Solve()) {
std::cout << "No solution" << std::endl;
}
for(size_t j = 0; j < height; ++j) {
if(j == 0) {
std::cout << " ";
for(size_t i = 0; i < width; ++i)
std::cout << " " << a[i];
std::cout << std::endl;
}
std::cout << b[j];
for(size_t i = 0; i < width; ++i) {
int value = problem(i, j);
if(value == 0)
std::cout << " ";
else
std::cout << " " << value;
}
std::cout << std::endl;
}
return 0;
}