I must implement in C++ using diblok The Seeded Region Growing algorithm due to Adams and Bischof which can be found here http://bit.ly/1nIxphj.
It is the fig.2 pseudocode.
After I choose the seeded points using the mouse , it throws this message : Unhandled exception at 0x00416ca0 in diblook.exe: 0xC0000005: Access violation reading location 0x3d2f6e68.
This is the code of the function:
void CDibView::OnLButtonDblClk(UINT nFlags, CPoint point)
{ BEGIN_SOURCE_PROCESSING;
int** labels = new int* [dwHeight];
for(int k = 0;k < dwHeight; k++)
labels[k] = new int[dwWidth];
int noOfRegions = 2;
double meanRegion[2];
double noOfPointsInRegion[2];
for(int i = 0; i < dwHeight ; i++)
for(int j = 0; j < dwWidth ; j++)
{
labels[i][j] = -1;
}
if(noOfPoints < 6)
{
CPoint p = GetScrollPosition() + point;
pos[noOfPoints].x = p.x;
pos[noOfPoints].y = p.y;
int regionLabel = 0;
if(noOfPoints <= noOfPoints / 2)
labels[p.x][p.y] = regionLabel;
else
labels[p.x][p.y] = regionLabel + 1;
noOfPoints++;
}
else
{
// Calculate the mean of each region
for(int i = 0; i < noOfRegions; i++)
{
for(int j = 0 ; j < noOfPoints; j++)
{
if(labels[pos[j].x][pos[j].y] == i)
{
meanRegion[i] += lpSrc[pos[j].x * w + pos[j].y];
}
}
meanRegion[i] /= 3;
noOfPointsInRegion[i] = 3;
}
for(int seedPoint = 0; seedPoint < noOfPoints; seedPoint++)
{
// define list
node *start, *temp;
start = (node *) malloc (sizeof(node));
temp = start;
temp -> next = NULL;
for(int i = -1; i <= 1; i++)
for(int j = -1; j<= 1; j++)
{
if(i == 0 && j == 0) continue;
int gamma = lpSrc[(pos[seedPoint].x + i) * + pos[seedPoint].y + j] - lpSrc[pos[seedPoint].x * w + pos[seedPoint].y];
push(start, pos[seedPoint].x + i, pos[seedPoint].y + j, gamma);
}
sort(start);
if(start != NULL)
{
node *y = start;
pop(start);
int sameNeighbour = 1;
int neighValue = -1;
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k ==0 && l==0) continue;
if(labels[y -> x + k][y -> y + l] != -1)
{
neighValue = labels[y -> x + k][y -> y + l];
break;
}
}
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k == 0 && l==0) continue;
if(labels[y -> x + k][y -> y = 1] != -1 && labels[y -> x + k][y -> y + l] != neighValue)
sameNeighbour = 0;
}
if(sameNeighbour == 1)
{
labels[y -> x][y -> y] = neighValue;
meanRegion[neighValue] = meanRegion[neighValue] * noOfPointsInRegion[neighValue] / noOfPointsInRegion[neighValue] + 1;
noOfPointsInRegion[neighValue]++;
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k == 0 && l == 0) continue;
if(labels[y -> x + k][y -> y + l] == -1 && find(start, y->x + k, y->y + l) == 0)
{
int gammak = meanRegion[neighValue] - lpSrc[(y->x +k) * w + (y->y + l)];
push(start, y->x + k, y->y + l, gammak);
sort(start);
}
}
}
else
{
labels[y->x][y->y] = -1;
}
}
}
int noOfRegionOne = 0;
int noOfRegionTwo = 0;
int noOfBoundary = 0;
for(int i = 0; i< dwHeight; i++)
for(int j = 0;j<dwWidth; j++)
{
if(labels[i][j] == -1)
noOfBoundary++;
else if(labels[i][j] == 0)
noOfRegionOne++;
else if(labels[i][j] == 1)
noOfRegionTwo++;
}
CString info;
info.Format("Boundary %d, One %d, Two %d", noOfBoundary, noOfRegionOne, noOfRegionTwo);
AfxMessageBox(info);
noOfPoints = 0;
}
CScrollView::OnLButtonDblClk(nFlags, point);
END_SOURCE_PROCESSING;
}
After a choose to break the running, this is what is shown http://postimg.org/image/j2sh9k0a1/
Can anybody tell what is wrong and why it doesn't work?
Thanks.
Your screenshot shows that your node (Y is a terrible name, incidentally) has garbage values in it. Offhand, I suspect that 'sort' is overwriting your node values, resulting in garbage. I would create a static copy of your current node to prevent it from changing during processing:
Change
node *y = start;
pop(start);
to
node y = *start;
pop(start);
Related
So here is the code I wrote for the question(https://codingcompetitions.withgoogle.com/kickstart/round/0000000000436140/000000000068c509#problem). For the sample input I am getting the right answer but it is not clearing test set 1.
I have created the code such that it checks for "1" up down right and left for each element of array and sees whether from that junction an L can be made.
For reference these are the conditions in the question:
"A segment is called "good" if all the cells in the segment contain only 1s.
An "L-shape" is defined as an unordered pair of segments, which has all the following properties:
Each of the segments must be a "good" segment.
The two segments must be perpendicular to each other.
The segments must share one cell that is an endpoint of both segments.
Segments must have length at least 2.
The length of the longer segment is twice the length of the shorter segment."
#include <bits/stdc++.h>
int main()
{
using namespace std;
int t, u;
cin >> t;
for (u = 1; u <= t; u++) {
int i, j, k, l = 0, a[40][40], r, c, right = 0, left = 0, up = 0, down = 0, downc, upc, lc, rc;
cin >> r >> c;
for (i = 0; i < r; i++) {
for (j = 0; j < c; j++) {
cin >> a[i][j];
}
}
for (i = 0; i < r; i++) {
for (j = 0; j < c; j++) {
if (a[i][j] == 1) {
right = 0;
left = 0;
up = 0;
down = 0;
downc = 0;
upc = 0;
lc = 0;
rc = 0;
for (k = i; k < r; k++) {
if (a[k][j] == 0)
break;
else
down++;
}
for (k = i; k >= 0; k--) {
if (a[k][j] == 0)
break;
else
up++;
}
for (k = j; k < c; k++) {
if (a[i][k] == 0)
break;
else
right++;
}
for (k = j; k >= 0; k--) {
if (a[i][k] == 0)
break;
else
left++;
}
if (!(up > 1 && down > 1 && right > 1 && left > 1)) {
downc = down;
upc = up;
rc = right;
lc = left;
if (up >= 2) {
if (right >= 4) {
while ((upc * 2) > right)
upc--;
l = l + upc - 1;
}
upc = up;
if (left >= 4) {
while ((upc * 2) > left)
upc--;
l = l + upc - 1;
}
upc = up;
}
if (down >= 2) {
if (right >= 4) {
while ((downc * 2) > right)
downc--;
l = l + downc - 1;
}
downc = down;
if (left >= 4) {
while ((downc * 2) > left)
downc--;
l = l + downc - 1;
}
downc = down;
}
if (right >= 2) {
if (up >= 4) {
while ((rc * 2) > up)
rc--;
l = l + rc - 1;
}
rc = right;
if (down >= 4) {
while ((rc * 2) > down)
rc--;
l = l + rc - 1;
}
rc = right;
}
if (left >= 2) {
if (up >= 4) {
while ((lc * 2) > up)
lc--;
l = l + lc - 1;
}
lc = left;
if (down >= 4) {
while ((lc * 2) > down)
lc--;
l = l + lc - 1;
}
lc = left;
}
}
}
}
}
cout << "Case #" << u << ": " << l << "\n";
}
}
Ok so I found the issue finally :
There was no need for " if (!(up > 1 && down > 1 && right > 1 && left > 1)) {.....}"
But still this code is too time consuming to run 1000x1000 grid case. Hence it fails test 2.
Edit: It was a mistake on my end. I bounded the array as 40x40. On changing it to a[1000][1000] it ran both tests successfully. Congos to me.
I was learning MO's Algorithm. In that I found a question. In which we have to make a program to take input n for n nodes of a tree then n-1 pairs of u and v denoting the connection between node u and node v. After that giving the n node values.
Then we will ask q queries. For each query we take input of k and l which denote the two nodes of that tree. Now we have to find the product of all the nodes in the path of k and l (including k and l).
I want to use MO's algorithm. https://codeforces.com/blog/entry/43230
But I am unable to make the code. Can anybody help me out in this.
The basic code for that would be:
int n, q;
int nxt[ N ], to[ N ], hd[ N ];
struct Que{
int u, v, id;
} que[ N ];
void init() {
// read how many nodes and how many queries
cin >> n >> q;
// read the edge of tree
for ( int i = 1 ; i < n ; ++ i ) {
int u, v; cin >> u >> v;
// save the tree using adjacency list
nxt[ i << 1 | 0 ] = hd[ u ];
to[ i << 1 | 0 ] = v;
hd[ u ] = i << 1 | 0;
nxt[ i << 1 | 1 ] = hd[ v ];
to[ i << 1 | 1 ] = u;
hd[ v ] = i << 1 | 1;
}
for ( int i = 0 ; i < q ; ++ i ) {
// read queries
cin >> que[ i ].u >> que[ i ].v;
que[ i ].id = i;
}
}
int dfn[ N ], dfn_, block_id[ N ], block_;
int stk[ N ], stk_;
void dfs( int u, int f ) {
dfn[ u ] = dfn_++;
int saved_rbp = stk_;
for ( int v_ = hd[ u ] ; v_ ; v_ = nxt[ v_ ] ) {
if ( to[ v_ ] == f ) continue;
dfs( to[ v_ ], u );
if ( stk_ - saved_rbp < SQRT_N ) continue;
for ( ++ block_ ; stk_ != saved_rbp ; )
block_id[ stk[ -- stk_ ] ] = block_;
}
stk[ stk_ ++ ] = u;
}
bool inPath[ N ];
void SymmetricDifference( int u ) {
if ( inPath[ u ] ) {
// remove this edge
} else {
// add this edge
}
inPath[ u ] ^= 1;
}
void traverse( int& origin_u, int u ) {
for ( int g = lca( origin_u, u ) ; origin_u != g ; origin_u = parent_of[ origin_u ] )
SymmetricDifference( origin_u );
for ( int v = u ; v != origin_u ; v = parent_of[ v ] )
SymmetricDifference( v );
origin_u = u;
}
void solve() {
// construct blocks using dfs
dfs( 1, 1 );
while ( stk_ ) block_id[ stk[ -- stk_ ] ] = block_;
// re-order our queries
sort( que, que + q, [] ( const Que& x, const Que& y ) {
return tie( block_id[ x.u ], dfn[ x.v ] ) < tie( block_id[ y.u ], dfn[ y.v ] );
} );
// apply mo's algorithm on tree
int U = 1, V = 1;
for ( int i = 0 ; i < q ; ++ i ) {
pass( U, que[ i ].u );
pass( V, que[ i ].v );
// we could our answer of que[ i ].id
}
}
This problem is a slight modification of the blog that you have shared.
Problem Tags:- MO's Algorithm, Trees, LCA, Binary Lifting, Sieve, Precomputation, Prime Factors
Precomputations:- Just we need to do some precomputations with seiveOfErothenesis to store the highest prime factor of each element possible in input constraints. Then using this we will store all the prime factors and their powers for each element in input array in another matrix.
Observation:- with the constraints you can see the there can be very few such primes possible for each element. For an element (10^6) there can be a maximum of 7 prime factors possible.
Modify MO Algo Given in blog:- Now in our compute method we just need to maintain a map that will store the current count of the prime factor. While adding or subtracting each element in solving the queries we will iterate on the prime factors of that element and divide our result(storing total no. of factors) with the old count of that prime and then update the count of that prime and the multiple our result with the new count.(This will be O(7) max for each addition/subtraction).
Complexity:- O(T * ((N + Q) * sqrt(N) * F)) where F is 7 in our case. F is the complexity of your check method().
T - no of test cases in input file.
N - the size of your input array.
Q - No. of queries.
Below is an implementation of the above approach in JAVA. computePrimePowers() and check() are the methods you would be interested in.
import java.util.*;
import java.io.*;
public class Main {
static int BLOCK_SIZE;
static int ar[];
static ArrayList<Integer> graph[];
static StringBuffer sb = new StringBuffer();
static boolean notPrime[] = new boolean[1000001];
static int hpf[] = new int[1000001];
static void seive(){
notPrime[0] = true;
notPrime[1] = true;
for(int i = 2; i < 1000001; i++){
if(!notPrime[i]){
hpf[i] = i;
for(int j = 2 * i; j < 1000001; j += i){
notPrime[j] = true;
hpf[j] = i;
}
}
}
}
static long modI[] = new long[1000001];
static void computeModI() {
for(int i = 1; i < 1000001; i++) {
modI[i] = pow(i, 1000000005);
}
}
static long pow(long x, long y) {
if (y == 0)
return 1;
long p = pow(x, y / 2);
p = (p >= 1000000007) ? p % 1000000007 : p;
p = p * p;
p = (p >= 1000000007) ? p % 1000000007 : p;
if ((y & 1) == 0)
return p;
else {
long tt = x * p;
return (tt >= 1000000007) ? tt % 1000000007 : tt;
}
}
public static void main(String[] args) throws Exception {
Reader s = new Reader();
int test = s.nextInt();
seive();
computeModI();
for(int ii = 0; ii < test; ii++){
int n = s.nextInt();
lcaTable = new int[19][n + 1];
graph = new ArrayList[n + 1];
arrPrimes = new int[n + 1][7][2];
primeCnt = new int[1000001];
visited = new int[n + 1];
ar = new int[n + 1];
for(int i = 0; i < graph.length; i++) graph[i] = new ArrayList<>();
for(int i = 1; i < n; i++){
int u = s.nextInt(), v = s.nextInt();
graph[u].add(v);
graph[v].add(u);
}
int ip = 1; while(ip <= n) ar[ip++] = s.nextInt();
computePrimePowers();
int q = s.nextInt();
LVL = new int[n + 1];
dfsTime = 0;
dfs(1, -1);
BLOCK_SIZE = (int) Math.sqrt(dfsTime);
int Q[][] = new int[q][4];
int i = 0;
while(q-- > 0) {
int u = s.nextInt(), v = s.nextInt();
Q[i][0] = lca(u, v);
if (l[u] > l[v]) {
int temp = u; u = v; v = temp;
}
if (Q[i][0] == u) {
Q[i][1] = l[u];
Q[i][2] = l[v];
}
else {
Q[i][1] = r[u]; // left at col1 in query
Q[i][2] = l[v]; // right at col2
}
Q[i][3] = i;
i++;
}
Arrays.sort(Q, new Comparator<int[]>() {
#Override
public int compare(int[] x, int[] y) {
int block_x = (x[1] - 1) / (BLOCK_SIZE + 1);
int block_y = (y[1] - 1) / (BLOCK_SIZE + 1);
if(block_x != block_y)
return block_x - block_y;
return x[2] - y[2];
}
});
solveQueries(Q);
}
System.out.println(sb);
}
static long res;
private static void solveQueries(int [][] Q) {
int M = Q.length;
long results[] = new long[M];
res = 1;
int curL = Q[0][1], curR = Q[0][1] - 1;
int i = 0;
while(i < M){
while (curL < Q[i][1]) check(ID[curL++]);
while (curL > Q[i][1]) check(ID[--curL]);
while (curR < Q[i][2]) check(ID[++curR]);
while (curR > Q[i][2]) check(ID[curR--]);
int u = ID[curL], v = ID[curR];
if (Q[i][0] != u && Q[i][0] != v) check(Q[i][0]);
results[Q[i][3]] = res;
if (Q[i][0] != u && Q[i][0] != v) check(Q[i][0]);
i++;
}
i = 0;
while(i < M) sb.append(results[i++] + "\n");
}
static int visited[];
static int primeCnt[];
private static void check(int x) {
if(visited[x] == 1){
for(int i = 0; i < 7; i++) {
int c = arrPrimes[x][i][1];
int pp = arrPrimes[x][i][0];
if(pp == 0) break;
long tem = res * modI[primeCnt[pp] + 1];
res = (tem >= 1000000007) ? tem % 1000000007 : tem;
primeCnt[pp] -= c;
tem = res * (primeCnt[pp] + 1);
res = (tem >= 1000000007) ? tem % 1000000007 : tem;
}
}
else if(visited[x] == 0){
for(int i = 0; i < 7; i++) {
int c = arrPrimes[x][i][1];
int pp = arrPrimes[x][i][0];
if(pp == 0) break;
long tem = res * modI[primeCnt[pp] + 1];
res = (tem >= 1000000007) ? tem % 1000000007 : tem;
primeCnt[pp] += c;
tem = res * (primeCnt[pp] + 1);
res = (tem >= 1000000007) ? tem % 1000000007 : tem;
}
}
visited[x] ^= 1;
}
static int arrPrimes[][][];
static void computePrimePowers() {
int n = arrPrimes.length;
int i = 0;
while(i < n) {
int ele = ar[i];
int k = 0;
while(ele > 1) {
int c = 0;
int pp = hpf[ele];
while(hpf[ele] == pp) {
c++; ele /= pp;
}
arrPrimes[i][k][0] = pp;
arrPrimes[i][k][1] = c;
k++;
}
i++;
}
}
static int dfsTime;
static int l[] = new int[1000001], r[] = new int[1000001], ID[] = new int[1000001], LVL[], lcaTable[][];
static void dfs(int u, int p){
l[u] = ++dfsTime;
ID[dfsTime] = u;
int i = 1;
while(i < 19) {
lcaTable[i][u] = lcaTable[i - 1][lcaTable[i - 1][u]];
i++;
}
i = 0;
while(i < graph[u].size()){
int v = graph[u].get(i);
i++;
if (v == p) continue;
LVL[v] = LVL[u] + 1;
lcaTable[0][v] = u;
dfs(v, u);
}
r[u] = ++dfsTime;
ID[dfsTime] = u;
}
static int lca(int u, int v){
if (LVL[u] > LVL[v]) {
int temp = u;
u = v; v = temp;
}
int i = 18;
while(i >= 0) {
if (LVL[v] - (1 << i) >= LVL[u]) v = lcaTable[i][v];
i--;
}
if (u == v) return u;
i = 18;
while(i >= 0){
if (lcaTable[i][u] != lcaTable[i][v]){
u = lcaTable[i][u];
v = lcaTable[i][v];
}
i--;
}
return lcaTable[0][u];
}
}
// SIMILAR SOLUTION FOR FINDING NUMBER OF DISTINCT ELEMENTS FROM U TO V
// USING MO's ALGORITHM
#include <bits/stdc++.h>
using namespace std;
const int MAXN = 40005;
const int MAXM = 100005;
const int LN = 19;
int N, M, K, cur, A[MAXN], LVL[MAXN], DP[LN][MAXN];
int BL[MAXN << 1], ID[MAXN << 1], VAL[MAXN], ANS[MAXM];
int d[MAXN], l[MAXN], r[MAXN];
bool VIS[MAXN];
vector < int > adjList[MAXN];
struct query{
int id, l, r, lc;
bool operator < (const query& rhs){
return (BL[l] == BL[rhs.l]) ? (r < rhs.r) : (BL[l] < BL[rhs.l]);
}
}Q[MAXM];
// Set up Stuff
void dfs(int u, int par){
l[u] = ++cur;
ID[cur] = u;
for (int i = 1; i < LN; i++) DP[i][u] = DP[i - 1][DP[i - 1][u]];
for (int i = 0; i < adjList[u].size(); i++){
int v = adjList[u][i];
if (v == par) continue;
LVL[v] = LVL[u] + 1;
DP[0][v] = u;
dfs(v, u);
}
r[u] = ++cur; ID[cur] = u;
}
// Function returns lca of (u) and (v)
inline int lca(int u, int v){
if (LVL[u] > LVL[v]) swap(u, v);
for (int i = LN - 1; i >= 0; i--)
if (LVL[v] - (1 << i) >= LVL[u]) v = DP[i][v];
if (u == v) return u;
for (int i = LN - 1; i >= 0; i--){
if (DP[i][u] != DP[i][v]){
u = DP[i][u];
v = DP[i][v];
}
}
return DP[0][u];
}
inline void check(int x, int& res){
// If (x) occurs twice, then don't consider it's value
if ( (VIS[x]) and (--VAL[A[x]] == 0) ) res--;
else if ( (!VIS[x]) and (VAL[A[x]]++ == 0) ) res++;
VIS[x] ^= 1;
}
void compute(){
// Perform standard Mo's Algorithm
int curL = Q[0].l, curR = Q[0].l - 1, res = 0;
for (int i = 0; i < M; i++){
while (curL < Q[i].l) check(ID[curL++], res);
while (curL > Q[i].l) check(ID[--curL], res);
while (curR < Q[i].r) check(ID[++curR], res);
while (curR > Q[i].r) check(ID[curR--], res);
int u = ID[curL], v = ID[curR];
// Case 2
if (Q[i].lc != u and Q[i].lc != v) check(Q[i].lc, res);
ANS[Q[i].id] = res;
if (Q[i].lc != u and Q[i].lc != v) check(Q[i].lc, res);
}
for (int i = 0; i < M; i++) printf("%d\n", ANS[i]);
}
int main(){
int u, v, x;
while (scanf("%d %d", &N, &M) != EOF){
// Cleanup
cur = 0;
memset(VIS, 0, sizeof(VIS));
memset(VAL, 0, sizeof(VAL));
for (int i = 1; i <= N; i++) adjList[i].clear();
// Inputting Values
for (int i = 1; i <= N; i++) scanf("%d", &A[i]);
memcpy(d + 1, A + 1, sizeof(int) * N);
// Compressing Coordinates
sort(d + 1, d + N + 1);
K = unique(d + 1, d + N + 1) - d - 1;
for (int i = 1; i <= N; i++) A[i] = lower_bound(d + 1, d + K + 1, A[i]) - d;
// Inputting Tree
for (int i = 1; i < N; i++){
scanf("%d %d", &u, &v);
adjList[u].push_back(v);
adjList[v].push_back(u);
}
// Preprocess
DP[0][1] = 1;
dfs(1, -1);
int size = sqrt(cur);
for (int i = 1; i <= cur; i++) BL[i] = (i - 1) / size + 1;
for (int i = 0; i < M; i++){
scanf("%d %d", &u, &v);
Q[i].lc = lca(u, v);
if (l[u] > l[v]) swap(u, v);
if (Q[i].lc == u) Q[i].l = l[u], Q[i].r = l[v];
else Q[i].l = r[u], Q[i].r = l[v];
Q[i].id = i;
}
sort(Q, Q + M);
compute();
}
}
Demo
I have to implement the CSR matrix data structure in C++ using 3 dynamic arrays (indexing starts at 0) and I've got stuck. So I have to implement 2 functions:
1) modify(int i, int j, TElem e) - modifies the value of (i,j) to e or adds if (if it does not exist) or deletes it if e is null.
2) element(int i, int j) const - returns the value found on (i,j)
I wanted to test my code in the next way:
Matrix m(10, 10);
for (int j = 0; j < m.nrColumns(); j++) {
m.modify(4, j, 3);
}
for (int i = 0; i < m.nrLines(); i++)
for (int j = 0; j < m.nrColumns(); j++)
if (i == 4)
assert(m.element(i, j) == 3);
else
assert(m.element(i, j) == NULL_TELEM);
And I got a surprise to see that m.element(4,j) will be 0 for j in the range (0,8) and only 3 for j=9.
This is my implementation of element(int i, int j) :
int currCol;
for (int pos = this->lines[i]; pos < this->lines[i+1]; pos++) {
currCol = this->columns[pos];
if (currCol == j)
return this->values[pos];
else if (currCol > j)
break;
}
return NULL_TELEM;
The constructor looks like this:
Matrix::Matrix(int nrLines, int nrCols) {
if (nrLines <= 0 || nrCols <= 0)
throw exception();
this->nr_lines = nrLines;
this->nr_columns = nrCols;
this->values = new TElem[1000];
this->values_capacity = 1;
this->values_size = 0;
this->lines = new int[nrLines + 1];
this->columns = new TElem[1000];
this->columns_capacity = 1;
this->columns_size = 0;
for (int i = 0; i <= nrLines; i++)
this->lines[i] = NULL_TELEM;
}
This is the "modify" method:
TElem Matrix::modify(int i, int j, TElem e) {
if (i < 0 || j < 0 || i >= this->nr_lines || j >= nr_columns)
throw exception();
int pos = this->lines[i];
int currCol = 0;
for (; pos < this->lines[i + 1]; i++) {
currCol = this->columns[pos];
if (currCol >= j)
break;
}
if (currCol != j) {
if (!(e == 0))
add(pos, i, j, e);
}
else if (e == 0)
remove(pos, i);
else
this->values[pos] = e;
return NULL_TELEM;
}
And this is the inserting method:
void Matrix::add(int index, int line, int column, TElem value)
{
this->columns_size++;
this->values_size++;
for (int i = this->columns_size; i >= index + 1; i--) {
this->columns[i] = this->columns[i - 1];
this->values[i] = this->values[i - 1];
}
this->columns[index] = column;
this->values[index] = value;
for (int i = line + 1; i <= this->nr_lines; i++)
this->lines[i]++;
}
Can somebody help me, please? I can't figure out why this happens and I really need to finish this implementation these days. It's pretty weird that is sees those positions having the value 0.
So having the next test that starts in the next way, I get a memory acces violation:
Matrix m(200, 300);
for (int i = m.nrLines() / 2; i < m.nrLines(); i++) {
for (int j = 0; j <= m.nrColumns() / 2; j++)
{
int v1 = j;
int v2 = m.nrColumns() - v1 - 1;
if (i % 2 == 0 && v1 % 2 == 0)
m.modify(i, v1, i * v1);
else
if (v1 % 3 == 0)
m.modify(i, v1, i + v1);
if (i % 2 == 0 && v2 % 2 == 0)
m.modify(i, v2, i * v2);
else
if (v2 % 3 == 0)
m.modify(i, v2, i + v2);
}
The error is thrown in the method "modify" at currCol = this->column[pos];
And if I look into the debugger it looks like:i=168, lines[i]=-842150451, lines[i+1]=10180,pos=-842150451.
Does anybody have any ideas why it looks this way?
Your code has two small errors.
When you try to find the insertion position in modify, you loop over the non-empty elements in the row:
int currCol = 0;
for (; pos < this->lines[i + 1]; i++) {
currCol = this->columns[pos];
if (currCol >= j)
break;
}
Here, you must update pos++ in each iteration instead of i++.
The second error occurs when you insert an element into column 0. The currCol will be zero, but your condition for adding a new element is
if (currCol != j) {
if (!(e == 0))
add(pos, i, j, e);
}
But j is zero, too, so nothing will be inserted. You can fix this by starting with a non-existing column:
int currCol = -1;
I have used openm to parallelize my c++ code as below:
int shell_num = 50, grparallel[shell_num],grbot[shell_num];
double p_x,p_y,grp[shell_num];
for (int f = 0; f < shell_num; f++)
{
grp[f] = 0;
grparallel[f] = 0;
grbot[f] = 0;
}
//some code...
#pragma omp parallel for reduction(+ : grp,grparallel,grbot)
for(int i = 0; i < N; i++){ //some code
for(int j = 0; j < N; j++){
if (j==i) continue;
double delta_x = x[i]-x[j],
delta_y = y[i]-y[j],
e_dot_e = e_x[i] * e_x[j] + e_y[i] * e_y[j],
e_cross_e = e_x[i] * e_y[j] - e_y[i] * e_x[j];
if (j > i)
{
double fasele = sqrt(dist(x[i],y[i],x[j],y[j],L));
for (int h = 0; h < shell_num; h++) //determine periodic distance between i and j is in which shel
{
if( L * h / 100 < fasele && fasele < L * (h + 1) / 100 )
{grp[h]+= e_dot_e;
double pdotr = abs(periodic(delta_x,L) * p_x + periodic(delta_y,L) * p_y)/fasele;
if (pdotr > 0.9659)
{
grparallel[h]+= 1;}else if(pdotr < 0.2588)
{
grbot[h]+= 1;
}
break;
}
}
}
}
}
When I run the code in terminal, there is an error:
‘grp’ has invalid type for ‘reduction’
The same error occurs for grparallel and grbot.
How can I remove the error?
I have a code that does Singular Value Decomposition (SVD) for square matrices. Code does the job however, it is quite slow and when matrix size increases it gets unbearable. As I am not familiar with parallel programming hence, I am asking advise from experts before I start digging deeper and eventually realize the action I want to achieve is not even possible.
Thank you in advance.
void SVD::decompose() {
bool flag;
int i, its, j, jj, k, l, nm;
double anorm, c, f, g, h, s, scale, x, y, z;
Row rv1(n);
g = scale = anorm = 0.0; //Householder reduction to bidiagonal form.
for (i = 0; i < n; i++) {
l = i + 2;
rv1[i] = scale*g;
g = s = scale = 0.0;
if (i < m) {
for (k = i; k < m; k++) scale += abs(u[k][i]);
if (scale != 0.0) {
for (k = i; k < m; k++) {
u[k][i] /= scale;
s += u[k][i] * u[k][i];
}
f = u[i][i];
g = -SIGN(sqrt(s), f);
h = f*g - s;
u[i][i] = f - g;
for (j = l - 1; j < n; j++) {
for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
f = s / h;
for (k = i; k < m; k++) u[k][j] += f*u[k][i];
}
for (k = i; k < m; k++) u[k][i] *= scale;
}
}
w[i] = scale *g;
g = s = scale = 0.0;
if (i + 1 <= m && i + 1 != n) {
for (k = l - 1; k < n; k++) scale += abs(u[i][k]);
if (scale != 0.0) {
for (k = l - 1; k < n; k++) {
u[i][k] /= scale;
s += u[i][k] * u[i][k];
}
f = u[i][l - 1];
g = -SIGN(sqrt(s), f);
h = f*g - s;
u[i][l - 1] = f - g;
for (k = l - 1; k < n; k++) rv1[k] = u[i][k] / h;
for (j = l - 1; j < m; j++) {
for (s = 0.0, k = l - 1; k < n; k++) s += u[j][k] * u[i][k];
for (k = l - 1; k < n; k++) u[j][k] += s*rv1[k];
}
for (k = l - 1; k < n; k++) u[i][k] *= scale;
}
}
anorm = MAX(anorm, (abs(w[i]) + abs(rv1[i])));
}
for (i = n - 1; i >= 0; i--) { //Accumulation of right-hand tranformations.
if (i < n - 1) {
if (g != 0.0) {
for (j = l; j < n; j++) // Double division to avoid possible underflow.
v[j][i] = (u[i][j] / u[i][l]) / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
for (k = l; k < n; k++) v[k][j] += s*v[k][i];
}
}
for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
}
v[i][i] = 1.0;
g = rv1[i];
l = i;
}
for (i = MIN(m, n) - 1; i >= 0; i--) { //Accumulation of left-hand transformations.
l = i + 1;
g = w[i];
for (j = l; j < n; j++) u[i][j] = 0.0;
if (g != 0.0) {
g = 1.0 / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
f = (s / u[i][i])*g;
for (k = i; k < m; k++) u[k][j] += f*u[k][i];
}
for (j = i; j < m; j++) u[j][i] *= g;
}
else for (j = i; j < m; j++) u[j][i] = 0.0;
++u[i][i];
}
for (k = n - 1; k >= 0; k--) { //Diagonalization of the bidiagonal form: Loop over
for (its = 0; its < 30; its++) { //singular values, and over allowed iterations.
flag = true;
for (l = k; l >= 0; l--) { //Test ofr splitting.
nm = l - 1;
if (l == 0 || abs(rv1[l]) <= eps*anorm) {
flag = false;
break;
}
if (abs(w[nm]) <= eps*anorm) break;
}
if (flag) {
c = 0.0; //Cancellatin of rv[l], if l>0.
s = 1.0;
for (i = l; i < k + 1; i++) {
f = s*rv1[i];
rv1[i] = c*rv1[i];
if (abs(f) <= eps*anorm) break;
g = w[i];
h = pythag(f, g);
w[i] = h;
h = 1.0 / h;
c = g*h;
s = -f*h;
for (j = 0; j < m; j++) {
y = u[j][nm];
z = u[j][i];
u[j][nm] = y*c + z*s;
u[j][i] = z*c - y*s;
}
}
}
z = w[k];
if (l == k) { //Convergence.
if (z < 0.0) { //Singular value is made nonnegative.
w[k] = -z;
for (j = 0; j < n; j++) v[j][k] = -v[j][k];
}
break;
}
x = w[l]; //Shift from bottom 2-by-2 minor.
nm = k - 1;
y = w[nm];
g = rv1[nm];
h = rv1[k];
f = ((y - z)*(y + z) + (g - h)*(g + h)) / (2.0*h*y);
g = pythag(f, 1.0);
f = ((x - z)*(x + z) + h*((y / (f + SIGN(g, f))) - h)) / x;
c = s = 1.0; //Next QR transformation:
for (j = l; j <= nm; j++) {
i = j + 1;
g = rv1[i];
y = w[i];
h = s*g;
g = c*g;
z = pythag(f, h);
rv1[j] = z;
c = f / z;
s = h / z;
f = x*c + g*s;
g = g*c - x*s;
h = y*s;
y *= c;
for (jj = 0; jj < n; jj++) {
x = v[jj][j];
z = v[jj][i];
v[jj][j] = x*c + z*s;
v[jj][i] = z*c - x*s;
}
z = pythag(f, h);
w[j] = z; //Rotation can be arbitrary if z = 0.
if (z) {
z = 1.0 / z;
c = f*z;
s = h*z;
}
f = c*g + s*y;
x = c*y - s*g;
for (jj = 0; jj < m; jj++) {
y = u[jj][j];
z = u[jj][i];
u[jj][j] = y*c + z*s;
u[jj][i] = z*c - y*s;
}
}
rv1[l] = 0.0;
rv1[k] = f;
w[k] = x;
}
}
}
Parts of your code can certainly be parallelized. How much you gain, that is an other question.
The easy way would be to use a common math library.
The fun way would be to maybe use OpenMP to do it yourself.
But befor you even think about OpenMP, consider to rearange your indices. You tend to loop over the first index alot, like in for (k = i; k < m; k++) u[k][i] *= scale;. This has a very bad cache hit rate in c++ for u[k][i] is basicly u[k*second_index_size+i]. If you swap the indices you get for (k = i; k < m; k++) u[i][k] *= scale; which makes perfect use of the cache.
You should see quite a speedup by implementing this.
Now for the OpenMP part.
Find out where the hot regions in your code are. Maybe use Visual Studio to do so. And then you could use OpenMP to parallelize certain for loops, like
#pragma omp parallel for
for (k = i; k < m; k++) u[i][k] *= scale;
What you will gain depends on where the hot regions are and how big your matrices are. Benchmarks will have to show.