Looping around a whole data step in SAS - sas

So I have a following code:
%let macroVar = Var1 Var2;
Data new1;
Set old1 (keep= count &macroVar.);
Run;
Proc means data = new1 nway missing noprint;
Class var1;
Var count;
Output out= out_var1 sum=;
Proc means data = new1 nway missing noprint;
Class var2;
Var count;
Output out= out_var2 sum=;
How can I write out the two proc means in one data step, using the macro variable I set up at the beginning?
Many thanks

Why not just do it in one PROC MEANS call?
%let varlist=sex age ;
proc means data=sashelp.class missing noprint;
class &varlist;
ways 1;
var height;
output out=want sum=;
run;
Result:
Obs Sex Age _TYPE_ _FREQ_ Height
1 11 1 2 108.8
2 12 1 5 297.2
3 13 1 3 184.3
4 14 1 4 259.6
5 15 1 4 262.5
6 16 1 1 72.0
7 F . 2 9 545.3
8 M . 2 10 639.1
If you really must have separate output datasets then it will be faster to generate them from the output above.
%macro sum_counts(varlist,data=,var=count);
%local i n;
%let n=%sysfunc(countw(&varlist));
proc means data=&data missing noprint ;
class &varlist;
ways 1;
var &var;
output out=_summary_ sum=;
run;
%do i=1 %to &n;
data new&i;
set _summary_;
where _type_=2**(&n-&i);
keep %scan(&varlist,&i) &var;
run;
%end;
%mend sum_counts;
Example:
263 options mprint;
264 %sum_counts(varlist=sex age,data=sashelp.class,var=height);
MPRINT(SUM_COUNTS): proc means data=sashelp.class missing noprint ;
MPRINT(SUM_COUNTS): class sex age;
MPRINT(SUM_COUNTS): ways 1;
MPRINT(SUM_COUNTS): var height;
MPRINT(SUM_COUNTS): output out=_summary_ sum=;
MPRINT(SUM_COUNTS): run;
NOTE: There were 19 observations read from the data set SASHELP.CLASS.
NOTE: The data set WORK._SUMMARY_ has 8 observations and 5 variables.
MPRINT(SUM_COUNTS): data new1;
MPRINT(SUM_COUNTS): set _summary_;
MPRINT(SUM_COUNTS): where _type_=2**(2-1);
MPRINT(SUM_COUNTS): keep sex height;
MPRINT(SUM_COUNTS): run;
NOTE: There were 2 observations read from the data set WORK._SUMMARY_.
WHERE _type_=2;
NOTE: The data set WORK.NEW1 has 2 observations and 2 variables.
MPRINT(SUM_COUNTS): data new2;
MPRINT(SUM_COUNTS): set _summary_;
MPRINT(SUM_COUNTS): where _type_=2**(2-2);
MPRINT(SUM_COUNTS): keep age height;
MPRINT(SUM_COUNTS): run;
NOTE: There were 6 observations read from the data set WORK._SUMMARY_.
WHERE _type_=1;
NOTE: The data set WORK.NEW2 has 6 observations and 2 variables.

Related

SAS Array Variable Name Based on Another Array

I have data in the following format:
data have;
input id rtl_apples rtl_oranges rtl_berries;
datalines;
1 50 60 10
2 10 30 80
3 40 8 1
;
I'm trying to create new variables that represent the percent of the sum of the RTL variables, PCT_APPLES, PCT_ORANGES, PCT_BERRIES. The problem is I'm doing this within a macro so the names and number of RTL variables with vary with each iteration so the new variable names need to be generated dynamically.
This data step essentially gets what I need, but the new variables are in the format PCT1, PCT2, PCTn format so it's difficult to know which RTL variable the PCT corresponds too.
data want;
set have;
array rtls[*] rtl_:;
total_sales = sum(of rtl_:);
call symput("dim",dim(rtls));
array pct[&dim.];
do i=1 to dim(rtls);
pct[i] = rtls[i] / total_sales;
end;
drop i;
run;
I also tried creating the new variable name by using a macro variable, but only the last variable in the array is created. In this case, PCT_BERRIES.
data want;
set have;
array rtls[*] rtl_:;
total_sales = sum(of rtl_:);
do i=1 to dim(rtls);
var_name = compress(tranwrd(upcase(vname(rtls[i])),'RTL','PCT'));
call symput("var_name",var_name);
&var_name. = rtls[i] / total_sales;
end;
drop i var_name;
run;
I have a feeling I'm over complicating this so any help would be appreciated.
If you have the list of names in data already then use the list to create the names you need for your arrays.
proc sql noprint;
select distinct cats('RTL_',name),cats('PCT_',name)
into :rtl_list separated by ' '
, :pct_list separated by ' '
from dataset_with_names
;
quit;
data want;
set have;
array rtls &rtl_list;
array pcts &pct_list;
total_sales = sum(of rtls[*]);
do index=1 to dim(rtls);
pcts[index] = rtls[index] / total_sales;
end;
drop index ;
run;
You can't create variables while a data step is executing. This program uses PROC TRANSPOSE to create a new data using the RTL_ variables "renamed" PCT_.
data have;
input id rtl_apples rtl_oranges rtl_berries;
datalines;
1 50 60 10
2 10 30 80
3 40 8 1
;;;;
run;
proc transpose data=have(obs=0) out=names;
var rtl_:;
run;
data pct;
set names;
_name_ = transtrn(_name_,'rtl_','PCT_');
y = .;
run;
proc transpose data=pct out=pct2;
id _name_;
var y;
run;
data want;
set have;
if 0 then set pct2(drop=_name_);
array _rtl[*] rtl_:;
array _pct[*] pct_:;
call missing(of _pct[*]);
total = sum(of _rtl[*]);
do i = 1 to dim(_rtl);
_pct[i] = _rtl[i]/total*1e2;
end;
drop i;
run;
proc print;
run;
You may want to just report the row percents
proc transpose data=&data out=&data.T;
by id;
var rtl_:;
run;
proc tabulate data=&data.T;
class id _name_;
var col1;
table
id=''
, _name_='Result'*col1=''*sum=''
_name_='Percent'*col1=''*rowpctsum=''
/ nocellmerge;
run;

how can I build a loop for macro in SAS?

I want to do a simulation based on macro in SAS. I can build a function named 'fine()', the code is as follows
DATA CLASS;
INPUT NAME $ SEX $ AGE HEIGHT WEIGHT;
CARDS;
ALFRED M 14 69.0 112.5
ALICE F 13 56.5 84.0
BARBARA F 13 65.3 98.0
CAROL F 14 62.8 102.5
HENRY M 14 63.5 102.5
RUN;
PROC PRINT;
TITLE 'DATA';
RUN;
proc print data=CLASS;run;
PROC FCMP OUTLIB = work.functions.func;
function populationCalc(HEIGHT,WEIGHT,thres);
pop=HEIGHT-WEIGHT-thres;
return (pop);
ENDSUB;
options cmplib=(work.functions);
%macro fine(i);
data ex;
set CLASS;
thres=&i;
pop = populationCalc(HEIGHT,WEIGHT,thres);
if (pop>50) then score=1;
else score=0;
run;
proc iml;
USE ex;
READ all var _ALL_ into ma[colname=varNames];
CLOSE ex;
nn=nrow(ma);
total_score=sum(ma[,'thres']);
avg_score=sum(ma[,'thres'])/nn;
print total_score avg_score;
%mend fine;
%fine(10);
%fine(100);
%fine(150);
I want to build a loop for function 'fine()' ans also use macro, but the result is not as I expect. How can I fix this?
%macro ct(n);
data data_want;
%do i=1 %to &n;
x=%fine(&i);
output x;
%end;
run;
%macro ct;
%ct(10);
%fine does not generate any text that can be used in the context of a right hand side (RHS) of a DATA Step variable assignment statement.
You seem to perhaps want this data set as a result of invoking %ct
i total_score average_score
- ----------- -------------
1 5 1
2 10 2
3 15 3
etc...
Step 1. Save IML result
Add this to the bottom of IML code in %fine, replacing the print
create fine_out var {total_score avg_score};
append;
close fine_out;
quit;
Step 2. Rewrite ct macro
Invoke %fine outside a DATA step context so the DATA and IML steps can run. Append the IML output to a results data set.
%macro ct(n,out=result);
%local i;
%do i=1 %to &n;
%fine(&i)
%if &i = 1 %then %do;
data &out; set fine_out; run;
%end;
%else %do;
proc append base=&out data=fine_out; run;
%end;
%end;
%mend;
options mprint;
%ct(10)
This should be the output WORK.RESULT based on your data

Setting names to idgroup

Follow up to
SAS - transpose multiple variables in rows to columns
I have the following code:
data have;
input CX_ID 1. TYPE $1. COUNT_RATE 1. SUM_RATE 2.;
datalines;
1A110
1B220
2A120
;
run;
proc summary data = have nway;
class cx_id;
output out=want (drop = _:)
idgroup(out[2] (count_rate sum_rate)= count sum);
run;
So this table:
CX_ID TYPE COUNT_RATE SUM_RATE
1 A 1 10
1 B 2 20
2 A 1 20
becomes
CX_ID COUNT_1 COUNT_2 SUM_1 SUM_2
1 1 2 10 20
2 1 . 20 .
Which is perfect, but how do I set the names to be
Count_A Count_B Sum_A Sum_B
Or in general whatever the value in the type field of the have table ?
Thank you
A double PROC TRANSPOSE is dynamic and you can add a data step to customize the names easily.
*sample data;
data have;
input CX_ID 1. TYPE $1. COUNT 1. SUM 2.;
datalines;
1A110
1B220
2A120
;
run;
*transpose to long;
proc transpose data=have out=long;
by cx_id type;
run;
*transpose to wide;
proc transpose data=long out=wide;
by cx_id;
var col1;
id _name_ type;
run;

Count over columns in SAS

I have a data set in SAS containing individuals as rows and a variable for each period as columns. It looks something like this:
data have;
input individual t1 t2 t3;
cards;
1 112 111 123
2 112 111 123
3 111 111 123
4 112 112 111
;
run;
What I want is for SAS to count how many there is of each number for each time period. So I want to get something like it:
data want;
input count t1 t2 t3;
cards;
111 1 3 1
112 3 1 0
123 0 0 3
;
run;
I could do this with proc freq, but outputting this doesn't work very well, when I have a lot of columns.
Thanks
In general having data in the meta data is a bad idea, as here where PERIOD is coded into the Tn variables and you really want that to be a group. Having said that you can still have your cake and eat it too.
PROC SUMMARY can get the counts for each Tn quickly and then you will have smaller data set to fiddle with. Here is one approach that should work well for many time periods.
data have;
input individual t1 t2 t3;
cards;
1 112 111 123
2 112 111 123
3 111 111 123
4 112 112 111
;;;;
run;
proc print;
run;
proc summary data=have chartype;
class t:;
ways 1;
output out=want;
run;
proc print;
run;
data want;
set want;
p = findc(_type_,'1');
c = coalesce(of t1-t3);
run;
proc print;
run;
proc summary data=want nway completetypes;
class c p;
freq _freq_;
output out=final;
run;
proc print;
run;
proc transpose data=final out=morefinal(drop=_name_) prefix=t;
by c;
id p;
var _freq_;
run;
proc print;
run;
First restructure the data so that it is in more of a vertical fashion. This will be easier to work with. We also want to create a flag that we will use as a counter later on.
data have2;
set have;
array arr[*] t1-t3;
flag = 1;
do period=lbound(arr) to hbound(arr);
val = arr[period];
output;
end;
keep period val flag;
run;
Summarize the data so we have the number of times that value occurred in each of the periods.
proc sql noprint;
create table smry as
select val,
period,
sum(flag) as count
from have3
group by 1,2
order by 1,2
;
quit;
Transpose the data so we have one line per value and then the counts for each period after that:
proc transpose data=smry out=want(drop=_name_);
by val;
id period;
var count;
run;
Note that when you define the array in the first step you could use this notation which would allow for a dynamic number of periods:
array arr[*] t:;
This assumes every variable beginning with 't' in the dataset should go into the array.
If your computer memory is large enough to hold the entire output, then Hash could be a viable solution:
data have;
input individual t1 t2 t3;
cards;
1 112 111 123
2 112 111 123
3 111 111 123
4 112 112 111
;
run;
data _null_;
if _n_=1 then
do;
/*This is to construct a Hash, where count is tracked and t1-t3 is maintained*/
declare hash h(ordered:'a');
h.definekey('count');
h.definedata('count', 't1','t2','t3');
h.definedone();
call missing(count, t1,t2,t3);
end;
set have(rename=(t1-t3=_t1-_t3))
/*rename to avoid conflict between input data and Hash object*/
end=last;
array _t(*) _t:;
array t(*) t:;
/*The key is to set up two arrays, one is for input data,
another is for Hash feed, and maneuver their index variable accordingly*/
do i=1 to dim(_t);
count=_t(i);
rc=h.find(); /*search the Hash and bring back data elements if found*/
/*If there is a match, then corresponding 't' will increase by '1'*/
if rc=0 then
t(i)+1;
else
do;
/*If there is no match, then corresponding 't' will be initialized as '1',
and all of the other 't' reset to '0'*/
do j=1 to dim(t);
t(j)=0;
end;
t(i)=1;
end;
rc=h.replace(); /*Update the Hash*/
end;
if last then
rc=h.output(dataset:'want');
run;
Try this:
%macro freq(dsn);
proc sql;
select name into:name separated by ' ' from dictionary.columns where libname='WORK' and memname='HAVE' and name like 't%';
quit;
%let ncol=%sysfunc(countw(&name,%str( )));
%do i=1 %to &ncol;
%let col=%scan(&name,&i);
proc freq data=have;
table &col/out=col_&i(keep=&col count rename=(&col=count count=&col));
run;
%end;
data temp;
merge
%do i=1 %to &ncol;
col_&i
%end;
;
by count;
run;
data want;
set temp;
array vars t:;
do over vars;
if missing(vars) then vars=0;
end;
run;
%mend;
%freq(have)

Split SAS dataset

I have a SAS dataset that looks like this:
id | dept | ...
1 A
2 A
3 A
4 A
5 A
6 A
7 A
8 A
9 B
10 B
11 B
12 B
13 B
Each observation represents a person.
I would like to split the dataset into "team" datasets, each dataset can have a maximum of 3 observations.
For the example above this would mean creating 3 datasets for dept A (2 of these datasets would contain 3 observations and the third dataset would contain 2 observations). And 2 datasets for dept B (1 containing 3 observations and the other containing 2 observations).
Like so:
First dataset (deptA1):
id | dept | ...
1 A
2 A
3 A
Second dataset (deptA2)
id | dept | ...
4 A
5 A
6 A
Third dataset (deptA3)
id | dept | ...
7 A
8 A
Fourth dataset (deptB1)
id | dept | ...
9 B
10 B
11 B
Fifth dataset (deptB2)
id | dept | ...
12 B
13 B
The full dataset I'm using contains thousands of observations with over 50 depts. I can work out how many datasets per dept are required and I think a macro is the best way to go as the number of datasets required is dynamic. But I can't figure out the logic to create the datasets so that they have have a maximum of 3 observations. Any help appreciated.
Another version.
Compared to DavB version, it only processes input data once and splits it into several tables in single datastep.
Also if more complex splitting rule is required, it can be implemented in datastep view WORK.SOURCE_PREP.
data WORK.SOURCE;
infile cards;
length ID 8 dept $1;
input ID dept;
cards;
1 A
2 A
3 A
4 A
5 A
6 A
7 A
8 A
9 B
10 B
11 B
12 B
13 B
14 C
15 C
16 C
17 C
18 C
19 C
20 C
;
run;
proc sort data=WORK.SOURCE;
by dept ID;
run;
data WORK.SOURCE_PREP / view=WORK.SOURCE_PREP;
set WORK.SOURCE;
by dept;
length table_name $32;
if first.dept then do;
count = 1;
table = 1;
end;
else count + 1;
if count > 3 then do;
count = 1;
table + 1;
end;
/* variable TABLE_NAME to hold table name */
TABLE_NAME = catt('WORK.', dept, put(table, 3. -L));
run;
/* prepare list of tables */
proc sql noprint;
create table table_list as
select distinct TABLE_NAME from WORK.SOURCE_PREP where not missing(table_name)
;
%let table_cnt=&sqlobs;
select table_name into :table_list separated by ' ' from table_list;
select table_name into :tab1 - :tab&table_cnt from table_list;
quit;
%put &table_list;
%macro loop_when(cnt, var);
%do i=1 %to &cnt;
when ("&&&var.&i") output &&&var.&i;
%end;
%mend;
data &table_list;
set WORK.SOURCE_PREP;
select (TABLE_NAME);
/* generate OUTPUT statements */
%loop_when(&table_cnt, tab)
end;
run;
You could try this:
%macro split(inds=,maxobs=);
proc sql noprint;
select distinct dept into :dept1-:dept9999
from &inds.
order by dept;
select ceil(count(*)/&maxobs.) into :numds1-:numds9999
from &inds.
group by dept
order by dept;
quit;
%let numdept=&sqlobs;
data %do i=1 %to &numdept.;
%do j=1 %to &&numds&i;
dept&&dept&i&&j.
%end;
%end;;
set &inds.;
by dept;
if first.dept then counter=0;
counter+1;
%do i=1 %to &numdept.;
%if &i.=1 %then %do;
if
%end;
%else %do;
else if
%end;
dept="&&dept&i" then do;
%do k=1 %to &&numds&i.;
%if &k.=1 %then %do;
if
%end;
%else %do;
else if
%end;
counter<=&maxobs.*&k. then output dept&&dept&i&&k.;
%end;
end;
%end;
run;
%mend split;
%split(inds=YOUR_DATASET,maxobs=3);
Just replace the INDS parameter value in the %SPLIT macro call to the name of your input data set.