summarize variables from current row and its previous - sas

This is my input:
data a;
policy_no = 1;
X_NET_PREMIUM_AMT = 1;
X_NET_PREMIUM_V_AMT = 8;
output;
policy_no = 1;
X_NET_PREMIUM_AMT = 2;
X_NET_PREMIUM_V_AMT = 6;
output;
policy_no = 1;
X_NET_PREMIUM_AMT = 9;
X_NET_PREMIUM_V_AMT = 4;
output;
policy_no = 1;
X_NET_PREMIUM_AMT = 12;
X_NET_PREMIUM_V_AMT = 5;
output;
policy_no = 1;
X_NET_PREMIUM_AMT = 6;
X_NET_PREMIUM_V_AMT = 3;
output;
policy_no = 2;
X_NET_PREMIUM_AMT = 18;
X_NET_PREMIUM_V_AMT = 78;
output;
policy_no = 2;
X_NET_PREMIUM_AMT = 25;
X_NET_PREMIUM_V_AMT = 16;
output;
policy_no = 2;
X_NET_PREMIUM_AMT = 91;
X_NET_PREMIUM_V_AMT = 42;
output;
policy_no = 3;
X_NET_PREMIUM_AMT = 123;
X_NET_PREMIUM_V_AMT = 52;
output;
policy_no = 3;
X_NET_PREMIUM_AMT = 4;
X_NET_PREMIUM_V_AMT = 1;
output;
run;
What I need to do is for every policy_no:
X_NET_PREMIUM_AMT = X_NET_PREMIUM_AMT OF PREVIOUS ROW + X_NET_PREMIUM_V_AMT OF CURRENT ROW.
So far - no problems.
What makes it difficult for me to do is,
I need the X_NET_PREMIUM_AMT to be updated for the next calculation,
I will explain:
row #1 (policy_no = 1):
X_NET_PREMIUM_AMT = 0(no previouse row) + 8 = 8;
row #2 (policy_no = 1):
X_NET_PREMIUM_AMT = 8 + 6 = 14;
row #3 (policy_no = 1):
X_NET_PREMIUM_AMT = 14 + 4 = 18;
And so on....
Unfortunately,
I have no idea how to do this...
Would aprreciate your help

What you are looking for is a cumulative sum of X_NET_PREMIUM_AMT per policy_no. Because overwriting your original variable is not a good idea I'll name the new variable cumsum.
data b;
set a;
by policy_no;
retain cumsum;
if first.policy_no then cumsum=X_NET_PREMIUM_V_AMT; else cumsum=X_NET_PREMIUM_V_AMT+cumsum;
run;
So when a different policy_no is encountered cumsum will be the same as X_NET_PREMIUM_V_AMT, for the other rows of this policy_no it will be X_NET_PREMIUM_V_AMT+cumsum. Your data has to be sorted by policy_no for this code which is the case here.

data out;
set a;
by policy_no;
OUT_X_NET_PREMIUM_AMT = sum(X_NET_PREMIUM_AMT, lag1(X_NET_PREMIUM_AMT));
OUT_X_NET_PREMIUM_V_AMT = sum(X_NET_PREMIUM_V_AMT, lag1(X_NET_PREMIUM_V_AMT));
if last.policy_no then output;
run;
Use lag1() function for that - returns previous value of variable.
Maybe you mean last 3 values:
OUT_X_NET_PREMIUM_AMT = sum(X_NET_PREMIUM_AMT, lag1(X_NET_PREMIUM_AMT), lag2(X_NET_PREMIUM_AMT));
OUT_X_NET_PREMIUM_V_AMT = sum(X_NET_PREMIUM_V_AMT, lag1(X_NET_PREMIUM_V_AMT), lag2(X_NET_PREMIUM_V_AMT));

Related

Wrong cost adjacency matrix

I try to obtain the adjacency matrix of weights, and then use it in the calculation of the minimum weight path. There is a problem, when I try to display it, I get a wrong result :
By logic, the diagonal must have only 0, and in the places where the vertices are adjacent, must be the weight of the edge
//set the source and destination of each edge
g->edge[0]->src = 0;
g->edge[0]->dest = 1;
g->edge[0]->weight = 9;
g->edge[1]->src = 0;
g->edge[1]->dest = 10;
g->edge[1]->weight = 6;
g->edge[2]->src = 1;
g->edge[2]->dest = 2;
g->edge[2]->weight = 3;
g->edge[3]->src = 1;
g->edge[3]->dest = 10;
g->edge[3]->weight = 2;
g->edge[4]->src = 2;
g->edge[4]->dest = 3;
g->edge[4]->weight = 2;
g->edge[5]->src = 2;
g->edge[5]->dest = 6;
g->edge[5]->weight = 3;
g->edge[6]->src = 2;
g->edge[6]->dest = 5;
g->edge[6]->weight = 3;
g->edge[7]->src = 3;
g->edge[7]->dest = 4;
g->edge[7]->weight = 5;
g->edge[8]->src = 4;
g->edge[8]->dest = 5;
g->edge[8]->weight = 4;
g->edge[9]->src = 6;
g->edge[9]->dest = 10;
g->edge[9]->weight = 2;
g->edge[10]->src = 6;
g->edge[10]->dest = 7;
g->edge[10]->weight = 9;
g->edge[11]->src = 7;
g->edge[11]->dest = 8;
g->edge[11]->weight = 7;
g->edge[12]->src = 7;
g->edge[12]->dest = 9;
g->edge[12]->weight = 2;
g->edge[13]->src = 8;
g->edge[13]->dest = 9;
g->edge[13]->weight = 7;
g->edge[14]->src = 9;
g->edge[14]->dest = 10;
g->edge[14]->weight = 5;
My code :
for (i = 0; i < numberOfVertices; i++)
{
adjacency_matrix[i][i] = 0;
for (j = i + 1; j < numberOfVertices; j++)
{
adjacency_matrix[i][j] = g->edge[i]->weight;
adjacency_matrix[j][i] = g->edge[i]->weight;
}
}
What's wrong?
for (i = 0; i < numberOfVertices; i++)
{
adjacency_matrix[i][i] = 0;
for (j = i + 1; j < numberOfVertices; j++)
{
adjacency_matrix[i][j] = g->edge[i]->weight;
adjacency_matrix[j][i] = g->edge[i]->weight;
}
}
In this code you are setting every edge from vertex i to every other vertex to the same weight. I do not think this is what you want.
( Note: It is hard to know what you do want. When reporting a problem you need to include a description of both what happens AND what you wanted to happen. "It's wrong!" is almost useless as a bug report. )

meshgrid matlab to c++ different results

here is the matlab code i'm trying to convert to c++
where
size(Iorig) == 1334X 2026
%% label checkers
Label = zeros(size(Iorig));
Margins = 11;
[X,Y] = meshgrid(1:size(Iorig,2),1:size(Iorig,1));
k = 1;
for i = 1:4
for j = 1:6
rr = rect{i,j};
x1 = rr(1);
x2 = rr(1) + rr(3);
y1 = rr(2);
y2 = rr(2) + rr(4);
Label(X>=x1+Margins&X<x2-Margins&Y>=y1+Margins&Y<y2-Margins) = k;
k = k+1;
end
end
I understand that we want to label the rectangles which are found in the previous step, there are 24 of those.
but I don't understand how to convert this line into easy c++ code without allocating a huge buffer of X and Y which basically just holds... indices..
thanks for your help here is what i started doing.
//label Checkers
List<List<int>^>^ label = gcnew List<List<int>^>();
int margins = 11;
int k = 1;
for (size_t i = 0; i < 4; i++)
{
for (size_t j = 0; j < 6; j++)
{
MacbethCheckerBatchesColor^ rect = autoDetectMacbethResult[i * 6 + j];
Point^ r = rect->Points[0];
int x1 = r->X;
int y1 = r->Y;
r = rect->Points[2];
int x2 = r->X;
int y2 = r->Y;
for (int h = 0; h < inputImage->HeightLines; h++)
{
List<int>^ tempRow = gcnew List<int>();
for (int w = 0; w < inputImage->WidthColumns; w++)
{
if ( (w>= x1+margins) & (w<x2-margins) & (h >= y1+margins) & (h<y2-margins) )
{
tempRow->Add(k);
}
else
{
tempRow->Add(0);
}
}
label->Add(tempRow);
}
k= k+100;//i tried here many other numbers... same result
}
}
Here is my result can you please help me find my mistake, the rectangles are the same, I guesss I have some other logical mistake.

Free memory after malloc in a loop

I got some memory allocated in a loop - how to free it when I am done with tr_data variable ?
(I am fairly new to C++)
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
struct svm_problem tr_data;
tr_data.l = (int) prm_num_samples_anchored.array[bar];
tr_data.y = Malloc(double, tr_data.l);
tr_data.x = Malloc(struct svm_node*, tr_data.l);
for (int row = 0; row < tr_data.l; row++)
{
tr_data.y[row] = ta0.array[bar-row-1];
//leak
svm_node* tr_data_x_onerow = Malloc(svm_node, num_features+1);
tr_data_x_onerow[0].index = 1; tr_data_x_onerow[0].value = in0.array[bar-row-1]; tr_data_x_onerow[1].index = 2; tr_data_x_onerow[1].value = in1.array[bar-row-1]; tr_data_x_onerow[2].index = 3; tr_data_x_onerow[2].value = in2.array[bar-row-1]; tr_data_x_onerow[3].index = 4; tr_data_x_onerow[3].value = in3.array[bar-row-1]; tr_data_x_onerow[4].index = 5; tr_data_x_onerow[4].value = in4.array[bar-row-1]; tr_data_x_onerow[5].index = 6; tr_data_x_onerow[5].value = in5.array[bar-row-1]; tr_data_x_onerow[6].index = 7; tr_data_x_onerow[6].value = in6.array[bar-row-1]; tr_data_x_onerow[7].index = 8; tr_data_x_onerow[7].value = in7.array[bar-row-1]; tr_data_x_onerow[8].index = 9; tr_data_x_onerow[8].value = in8.array[bar-row-1]; tr_data_x_onerow[9].index = 10;
tr_data_x_onerow[num_features].index = -1; //Each row of properties should be terminated with a -1 according to the readme
tr_data.x[row] = tr_data_x_onerow;
}
... few operation on tr_data
... and this does not work
for (int row = 0; row <tr_data.l; row++)
{
free(tr_data_x_onerow);
}
for (int row = 0; row <tr_data.l; row++)
{
free(tr_data.x[row]);
}
But please, just don't do this. This is C++. Use a vector or some other sane collection.

How to create training data for libsvm (as an svm_node struct)

I am trying to train an svm for a simple xor problem programmatically using libsvm to understand how the library works. The problem (i think) seems to be that i construct svm_node incorrectly; maybe i have trouble understanding the whole pointers to pointers thing. Could anybody help with this? I first construct a matrix for the xor problem then try to assign values from the matrix to svm_node (i am using 2 steps here because my real data will be in matrix format).
When testing the model i get incorrect values (always -1).
In a previous question i got help with the parameters C and gamma; these should be OK now since i got correct classifications for the xor problem using other code. Thanks again Pedrom!
I have searched in several places for answers, e.g. the Readme and in the SvmToy example; no luck however.
Here is the code that outputs incorrect classifications...
Thanks in advance!
//Parameters---------------------------------------------------------------------
svm_parameter param;
param.svm_type = C_SVC;
param.kernel_type = RBF;
param.degree = 3;
param.gamma = 0.5;
param.coef0 = 0;
param.nu = 0.5;
param.cache_size = 100;
param.C = 1;
param.eps = 1e-3;
param.p = 0.1;
param.shrinking = 1;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
//Problem definition-------------------------------------------------------------
svm_problem prob;
//Length, 4 examples
prob.l = 4;
//x values matrix of xor values
QVector< QVector<double> >matrix;
QVector<double>row(2);
row[0] = 1;row[1] = 1;
matrix.push_back(row);
row[0] = 1;row[1] = 0;
matrix.push_back(row);
row[0] = 0;row[1] = 1;
matrix.push_back(row);
row[0] = 0;row[1] = 0;
matrix.push_back(row);
//This part i have trouble understanding
svm_node* x_space = new svm_node[3];
svm_node** x = new svm_node *[prob.l];
//Trying to assign from matrix to svm_node training examples
for (int row = 0;row < matrix.size(); row++){
for (int col = 0;col < 2;col++){
x_space[col].index = col;
x_space[col].value = matrix[row][col];
}
x_space[2].index = -1; //Each row of properties should be terminated with a -1 according to the readme
x[row] = x_space;
}
prob.x = x;
//yvalues
prob.y = new double[prob.l];
prob.y[0] = -1;
prob.y[1] = 1;
prob.y[2] = 1;
prob.y[3] = -1;
//Train model---------------------------------------------------------------------
svm_model *model = svm_train(&prob,&param);
//Test model----------------------------------------------------------------------
svm_node* testnode = new svm_node[3];
testnode[0].index = 0;
testnode[0].value = 1;
testnode[1].index = 1;
testnode[1].value = 0;
testnode[2].index = -1;
//Should return 1 but returns -1
double retval = svm_predict(model,testnode);
qDebug()<<retval;
It seems you've been trying to get this example to work for weeks. I followed the style in svm-train.c that comes with libsvm. I used your values for C and gamma. It is working. I tried all points in the XOR example and it is giving correct results.
The summary of the problem you're having is that you're not allocating space for the 4 data points you train with, so you just over-write the data. This is a typical mistake with pointers in C. It may help you brushed up on pointers in C/C++.
Here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include "svm.h"
#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
struct svm_parameter param; // set by parse_command_line
struct svm_problem prob; // set by read_problem
struct svm_model *model;
struct svm_node *x_space;
int main(int argc, char **argv)
{
char input_file_name[1024];
char model_file_name[1024];
const char *error_msg;
param.svm_type = C_SVC;
param.kernel_type = RBF;
param.degree = 3;
param.gamma = 0.5;
param.coef0 = 0;
param.nu = 0.5;
param.cache_size = 100;
param.C = 1;
param.eps = 1e-3;
param.p = 0.1;
param.shrinking = 1;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
//Problem definition-------------------------------------------------------------
prob.l = 4;
//x values matrix of xor values
double matrix[prob.l][2];
matrix[0][0] = 1;
matrix[0][1] = 1;
matrix[1][0] = 1;
matrix[1][1] = 0;
matrix[2][0] = 0;
matrix[2][1] = 1;
matrix[3][0] = 0;
matrix[3][1] = 0;
//This part i have trouble understanding
svm_node** x = Malloc(svm_node*,prob.l);
//Trying to assign from matrix to svm_node training examples
for (int row = 0;row <prob.l; row++){
svm_node* x_space = Malloc(svm_node,3);
for (int col = 0;col < 2;col++){
x_space[col].index = col;
x_space[col].value = matrix[row][col];
}
x_space[2].index = -1; //Each row of properties should be terminated with a -1 according to the readme
x[row] = x_space;
}
prob.x = x;
//yvalues
prob.y = Malloc(double,prob.l);
prob.y[0] = -1;
prob.y[1] = 1;
prob.y[2] = 1;
prob.y[3] = -1;
//Train model---------------------------------------------------------------------
svm_model *model = svm_train(&prob,&param);
//Test model----------------------------------------------------------------------
svm_node* testnode = Malloc(svm_node,3);
testnode[0].index = 0;
testnode[0].value = 1;
testnode[1].index = 1;
testnode[1].value = 0;
testnode[2].index = -1;
//This works correctly:
double retval = svm_predict(model,testnode);
printf("retval: %f\n",retval);
svm_destroy_param(&param);
free(prob.y);
free(prob.x);
free(x_space);
return 0;
}

LIBSVM training data format (x values in svm_node for svm_problem)

I am using LIBSVM to do a simple XOR classification programmatically, trying to understand how the functions work. I have set up the problem following the instructions in the Readme as close as possible. Still i get the wrong output when using svm_predict (always 1 or -1).
In a related question somebody suggested that the problem might arise when using very few training examples. I tried increasing the number of examples to 20 but this did not help.
I suspect that the problem is somewhere in the definition of prob.x and/or prob.y but can't understand where. Could you help clarify how to define prob.x and prob.y using svm_node?
I hade searched thoroughly but cannot find an answer... E.g. Here, here, here, here, and here.
Thanks in advance!
Here is my code:
//Parameters
svm_parameter param;
param.svm_type = C_SVC;
param.kernel_type = RBF;
param.degree = 3;
param.gamma = 0;
param.coef0 = 0;
param.nu = 0.5;
param.cache_size = 100;
param.C = 0.4;
param.eps = 1e-3;
param.p = 0.1;
param.shrinking = 1;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = NULL;
param.weight = NULL;
//Problem definition
svm_problem prob;
//Length
prob.l = 4; //number of training examples
//x values
svm_node** x = new svm_node *[prob.l]; //Array of pointers to pointers to arrays
svm_node* x_space1 = new svm_node[3]; //Fist training example
svm_node* x_space2 = new svm_node[3]; //Second training example
svm_node* x_space3 = new svm_node[3]; //Third training example
svm_node* x_space4 = new svm_node[3]; //Fourth training example
x_space1[0].index = 1; //Fist training example
x_space1[0].value = 1;
x_space1[1].index = 2;
x_space1[1].value = 1;
x_space1[2].index = -1;
x_space2[0].index = 1; //Second training example
x_space2[0].value = 1;
x_space2[1].index = 2;
x_space2[1].value = 0;
x_space2[2].index = -1;
x_space3[0].index = 1; //Third training example
x_space3[0].value = 0;
x_space3[1].index = 2;
x_space3[1].value = 1;
x_space3[2].index = -1;
x_space4[0].index = 1; //Fourth training example
x_space4[0].value = 0;
x_space4[1].index = 2;
x_space4[1].value = 0;
x_space4[2].index = -1;
x[0] = x_space1; //Set each training example to x
x[1] = x_space2;
x[2] = x_space3;
x[3] = x_space4;
prob.x = x; //Assign x to the struct field prob.x
//yvalues
prob.y = new double[prob.l];
prob.y[0] = -1;
prob.y[1] = 1;
prob.y[2] = 1;
prob.y[3] = -1;
//Train model
svm_model *model = svm_train(&prob,&param);
//Test model
svm_node* testnode = new svm_node[3];
testnode[0].index = 1;
testnode[0].value = 1;
testnode[1].index = 2;
testnode[1].value = 0;
testnode[2].index = -1;
double retval = svm_predict(model,testnode);
qDebug()<<retval; //Should return +1 but returns -1
It seems a problem with your parameters. For instance, param.gamma shouldn't be zero if you are using a RBF kernel.
Why is your XOR problem 3-dimensional? You do not need the third dimension in each point (in fact you define it but not use it, I am not sure what will libSVM do, but for sure it will affect the chosen gamma, as the libSVM heuristics chooses 1/number_of_dimensions)
Your C parameter looks suspucious ( 0.4 may be way to low, try 1000)