If condition is not executed when the first condition is verified SAS - if-statement

The following code is executed and the result is wrong.
When the first condition is verified the code don't work correctly.
The lag1 it remains as missing when the value should be 3...
Thanks for help me.
DATA VALUES;
INPUT VAL caract$ var1 var2;
DATALINES;
1 a 12 0
1 c 0 4
1 c 3 2
2 a 3 2
2 b 15 16
2 b 4 1
3 a 12 13
3 c 12 13
4 c 14 15
5 b 14 0
6 b 14 15
7 a 12 15
7 c 12 15
8 c 14 15
9 c 14 5
10 c 13 7
;
RUN;
%macro lag_var(dataset, lag);
data &dataset&lag;
set &dataset;
by VAL;
%do i=0 %to &lag;
if caract eq 'b' then
lag&i=lag&i(var1);
else lag&i = lag&i(var2);
%end;
if first.VAL then do;
count=0;
%do i=1 %to &lag;
lag&i=.;
%end;
end;
count+1;
%do i=1 %to &lag;
if (not first.VAL and count<=&i) then do;
lag&i=.;
end;
%end;
maxi = max(of lag1 - lag&lag);
run;
%mend lag_var;
%lag_var(VALUES,3);

It is surely related to conditional execution of the LAG function. Try changing to something like this using TEMP1 and TEMP2 variables to hold the lagged values.
%do i=0 %to &lag;
temp1=lag&i(var1);
temp2=lag&i(var2);
if caract eq 'b' then
lag&i=temp1;
else lag&i = temp2;
%end;

Related

The drop sentence doesn't work with variables created with macro arrays SAS

I am trying to run the following code, that calculates the slope of each variables' timeseries.
I need to drop the variables created with an array, because I use the same logic for other functions.
Nevertheless the output data keeps the variables ys_new&i._: and I get the warning: The variable 'ys_new3_:'n in the DROP, KEEP, or RENAME list has never been referenced.
I think the iterator is evaluated to 3 in the %do %while block.
If someone can help me, I will really apreciated it.
DATA HAVE;
INPUT ID N_TRX_M0-N_TRX_M12 TRANSACTION_AMT_M0-TRANSACTION_AMT_M12;
DATALINES;
1 3 6 3 3 7 8 6 10 5 5 8 7 7 379866 856839 307909 239980 767545 511806 603781 948936 566114 402214 844657 2197164 817390
2 51 56 55 73 48 57 54 53 55 52 49 72 53 6439314 7367157 4614827 9465017 3776064 3661525 7870605 3971889 4919128 10024385 4660264 7748467 7339863
3 5 . . . . . . . . . . . . 232165 . . . . . . . . . . . .
;
RUN;
%Macro slope(variables)/parmbuff;
%let i = 1;
/* Get the first Parameter */
%let parm_&i = %scan(&syspbuff,&i,%str( %(, %)));
%do %while (%str(&&parm_&i.) ne %str());
array ys&i(12) &&parm_&i.._M12 - &&parm_&i.._M1;
array ys_new&i._[12];
/* Corre los valores missing*/
k = 1;
do j = 1 to 12;
if not(missing(ys&i(j))) then do;
ys_new&i._[k] = ys&i[j];
k + 1;
end;
end;
nonmissing = n(of ys_new&i._{*});
xbar = (nonmissing + 1)/2;
if nonmissing ge 2 then do;
ybar = mean(of ys&i(*));
cov = 0;
varx = 0;
do m=1 to nonmissing;
cov=sum(cov, (m-xbar)*(ys_new&i._(m)-ybar));
varx=sum(varx, (m-xbar)**2);
end;
slope_&&parm_&i. = cov/varx;
end;
%let i = %eval(&i+1);
/* Get next parm */
%let parm_&i = %scan(&syspbuff ,&i, %str( %(, %)));
%end;
drop ys_new&i._: k j m nonmissing ybar xbar cov varx;
%mend;
%let var_slope =
N_TRX,
TRANSACTION_AMT
;
DATA FEATURES;
SET HAVE;
%slope(&var_slope)
RUN;
The simplest solution is to generate the DROP statement before the macro has a chance to change the value of the macro variable I .
array ys&i(12) &&parm_&i.._M12 - &&parm_&i.._M1;
array ys_new&i._[12];
drop ys_new&i._: k j m nonmissing ybar xbar cov varx;
You could use a _TEMPORARY_ array instead, but then you need to remember to clear the values on each iteration of the data step.
array ys_new&i._[12] _temporary_;
call missing(of ys_new&i._[*]);
Then you can leave the DROP statement at the end if you want.
drop k j m nonmissing ybar xbar cov varx;
You are correct. &i is 3 after it exits the do loop leading to the drop statement giving a warning that ys_new3_: does not exist. Instead, consider using a temporary array to avoid the drop statement altogether:
array ys_new&i._[12] _TEMPORARY_;

Sum consecutive observations in a dataset SAS

I have a dataset that looks like:
Hour Flag
1 1
2 1
3 .
4 1
5 1
6 .
7 1
8 1
9 1
10 .
11 1
12 1
13 1
14 1
I want to have an output dataset like:
Total_Hours Count
2 2
3 1
4 1
As you can see, I want to count the number of hours included in each period with consecutive "1s". A missing value ends the consecutive sequence.
How should I go about doing this? Thanks!
You'll need to do this in two steps. First step is making sure the data is sorted properly and determining the number of hours in a consecutive period:
PROC SORT DATA = <your dataset>;
BY hour;
RUN;
DATA work.consecutive_hours;
SET <your dataset> END = lastrec;
RETAIN
total_hours 0
;
IF flag = 1 THEN total_hours = total_hours + 1;
ELSE
DO;
IF total_hours > 0 THEN output;
total_hours = 0;
END;
/* Need to output last record */
IF lastrec AND total_hours > 0 THEN output;
KEEP
total_hours
;
RUN;
Now a simple SQL statement:
PROC SQL;
CREATE TABLE work.hour_summary AS
SELECT
total_hours
,COUNT(*) AS count
FROM
work.consecutive_hours
GROUP BY
total_hours
;
QUIT;
You will have to do two things:
compute the run lengths
compute the frequency of the run lengths
For the case of using the implict loop
Each run length occurnece can be computed and maintained in a retained tracking variable, testing for a missing value or end of data for output and a non missing value for run length reset or increment.
Proc FREQ
An alternative is to use an explicit loop and a hash for frequency counts.
Example:
data have; input
Hour Flag; datalines;
1 1
2 1
3 .
4 1
5 1
6 .
7 1
8 1
9 1
10 .
11 1
12 1
13 1
14 1
;
data _null_;
declare hash counts(ordered:'a');
counts.defineKey('length');
counts.defineData('length', 'count');
counts.defineDone();
do until (end);
set have end=end;
if not missing(flag) then
length + 1;
if missing(flag) or end then do;
if length > 0 then do;
if counts.find() eq 0
then count+1;
else count=1;
counts.replace();
length = 0;
end;
end;
end;
counts.output(dataset:'want');
run;
An alternative
data _null_;
if _N_ = 1 then do;
dcl hash h(ordered : "a");
h.definekey("Total_Hours");
h.definedata("Total_Hours", "Count");
h.definedone();
end;
do Total_Hours = 1 by 1 until (last.Flag);
set have end=lr;
by Flag notsorted;
end;
Count = 1;
if Flag then do;
if h.find() = 0 then Count+1;
h.replace();
end;
if lr then h.output(dataset : "want");
run;
Several weeks ago, #Richard taught me how to use DOW-loop and direct addressing array. Today, I give it to you.
data want(keep=Total_Hours Count);
array bin[99]_temporary_;
do until(eof1);
set have end=eof1;
if Flag then count + 1;
if ^Flag or eof1 then do;
bin[count] + 1;
count = .;
end;
end;
do i = 1 to dim(bin);
Total_Hours = i;
Count = bin[i];
if Count then output;
end;
run;
And Thanks Richard again, he also suggested me this article.

Split value of a variable to multiple variables

i have a variable with values (14 12 13 15 15 14). i need to create new variables and assign the value accordingly Example: value 14 in var14 and value 2 in Var2
data a;
input a1 ##;
cards;
14 12 13 15 15 14
;
run;
%macro m;
proc sql noprint;
select distinct a1 into :k separated by '#' from WORK.a;
select count(distinct a1) into :c from WORK.a;
quit;
%do i=1 %to &c;
%let var%scan(&k, &i, '#')=%scan(&k, &i, '#');
%put var%scan(&k, &i, '#');
%end;
%mend m;
%m;
**************Log Results***************
var12
var13
var14
var15

SAS MACRO: Create many datasets -modify them - combine them into one within one MACRO without need to ouput multiple datsets

My initial Dataset has 14000 STID variable with 10^5 observation for each.
I would like to make some procedures BY each stid, output the modification into data by STID and then set all STID together under each other into one big dataset WITHOUT a need to output all temporary STID-datsets.
I start writing a MACRO:
data HAVE;
input stid $ NumVar1 NumVar2;
datalines;
a 5 45
b 6 2
c 5 3
r 2 5
f 4 4
j 7 3
t 89 2
e 6 1
c 3 8
kl 1 6
h 2 3
f 5 41
vc 58 4
j 5 9
ude 7 3
fc 9 11
h 6 3
kl 3 65
b 1 4
g 4 4
;
run;
/* to save all distinct values of THE VARIABLE stid into macro variables
where &N_VAR - total number of distinct variable values */
proc sql;
select count(distinct stid)
into :N_VAR
from HAVE;
select distinct stid
into :stid1 - :stid%left(&N_VAR)
from HAVE;
quit;
%macro expand_by_stid;
/*STEP 1: create datasets by STID*/
%do i=1 %to &N_VAR.;
data stid&i;
set HAVE;
if stid="&&stid&i";
run;
/*STEP 2: from here data modifications for each STID-data (with procs and data steps, e.g.)*/
data modified_stid&i;
set stid&i;
NumVar1_trans=NumVar1**2;
NumVar2_trans=NumVar1*NumVar2;
run;
%end;
/*STEP 3: from here should be some code lines that set together all created datsets under one another and delete them afterwards*/
data total;
set %do n=1 %to &N_VAR.;
modified_stid&n;
%end;
run;
proc datasets library=usclim;
delete <ALL DATA SETS by SPID>;
run;
%mend expand_by_stid;
%expand_by_stid;
But the last step does not work. How can I do it?
You're very close - all you need to do is remove the semicolon in the macro loop and put it after the %end in step 3, as below:
data total;
set
%do n=1 %to &N_VAR.;
modified_stid&n
%end;;
run;
This then produces the statement you were after:
set modified_stid1 modified_stid2 .... ;
instead of what your macro was originally generating:
set modified_stid1; modified_stid2; ...;
Finally, you can delete all the temporary datasets using stid: in the delete statement:
proc datasets library=usclim;
delete stid: ;
run;

SAS proc univariate for many variables by macro or array

This should be an easy question, but I didn't figure out...
I want to get mean and median of many variables by proc univariate as below. It's really time consuming to manually add M_ for mean and MD_ for median to all variables. I am wondering if there any simple approach, such as array to do so? Thanks a lot!
Code:
data old;
input year type A1 A2 A3 A4 A5;
datalines;
2000 1 1 2 3 4 5
2000 1 2 3 4 5 6
2000 2 3 4 5 6 7
2000 2 4 5 6 7 8
2001 1 5 6 7 8 9
2001 1 6 7 8 9 10
2001 1 7 8 9 10 11
2001 2 8 9 10 11 12
2001 2 9 10 11 12 13
2001 2 10 11 12 13 14
2002 1 11 12 13 14 15
2002 1 12 13 14 15 16
2002 1 13 14 15 16 17
2002 2 14 15 16 17 18
2002 2 15 16 17 18 19
2002 2 16 17 18 19 20
run;
proc univariate data=old noprint;
var A1 A2 A3 A4 A5;
by year type;
output out=new
mean=M_A1 M_A2 M_A3 M_A4 M_A5
median=MD_A1 MD_A2 MD_A3 MD_A4 MD_A5;
run;
Expected demo Code:
%let varlist = A1 A2 A3 A4 A5;
array vars (*) &varlist;
proc univariate data=old noprint;
var &vars(*);
by year type;
output out=new
mean=M_&vars(*)
median=MD_&vars(*);
run;
correct code by using proc sql
%macro uni;
%let varlist='A1','A2','A3','A4','A5';
%let vars=A1 A2 A3 A4 A5;
proc sql;
select cats('M_',name) into :meannamelist separated by ' '
from dictionary.columns
where libname='WORK' and memname='OLD' and name in (&varlist);
select cats('MD_',name) into :mediannamelist separated by ' '
from dictionary.columns
where libname='WORK' and memname='OLD' and name in (&varlist);
quit;
proc univariate data=old;
var &vars;
by year type;
output out=new
mean=&meannamelist
median=&mediannamelist;
run;
%mend uni;
options mprint;
%uni;
Another option is to create these lists in PROC SQL.
%let varlist=A1,A2,A3,A4,A5;
proc sql;
select cats('M_',name) into :meannamelist separated by ' '
from dictionary.columns
where libname='WORK' and memname='OLD' and name in (&varlist);
select cats('MD_',name) into :mediannamelist separated by ' '
from dictionary.columns
where libname='WORK' and memname='OLD' and name in (&varlist);
quit;
proc univariate etc.;
mean &meannamelist;
median &mediannamelist;
run;
One way of completing your code is to loop through a list:
proc univariate data=old noprint;
var
%let varlist1 = A1 A2 A3 A4 A5;
%let count_number1=1;
%let value1=%scan(&varlist1.,&count_number1.);
%do %while(&value1. NE %str());
&value1.
%let count_number1=%eval(&count_number1.+1);
%let value1=%scan(&varlist1.,&count_number1.);
%end;
;
by year type;
output out=new
mean=
%let varlist2 = A1 A2 A3 A4 A5;
%let count_number2=1;
%let value2=%scan(&varlist2.,&count_number2.);
%do %while(&value2. NE %str());
M_&value2.
%let count_number2=%eval(&count_number2.+1);
%let value2=%scan(&varlist2.,&count_number2.);
%end;
;
run;
If you can use PROC MEANS to obtain the needed stats, do that; it has an autoname option on the output statement that does approximately what you're asking for. Both mean and median are available in PROC MEANS.
If not, you might look to ods output to simplify things.