SAS macro with conditional loops (%if %then %else %do %end) - if-statement

I'm a beginner in SAS and I am trying to use a macro to import excel files using a conditional loop.
The importing process is based on initial_year ; final_year; initial_month and final_month values.
But it seems the If condition is not working. Can you help please. Thank you.
This is my sas program:
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%let initial_year=2019; %let initial_month=2;
%let final_year=2021; %let final_month=1;
%Macro import_loop;
%if &final_month >= &initial_month %then %do;
%DO x = &initial_year %TO &final_year;
%DO i = &initial_month %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%else %do
%DO x = &initial_year %TO &final_year-1;
%DO i = &initial_month %TO 12;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%DO x = &final_year %TO &final_year;
%DO i = 1 %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%end;
%mend import_loop;
%import_loop

Just treat your dates like dates and the looping will be much easier.
%macro import(from,to);
%local start end offset yymm year ;
%let start=%sysfunc(inputn(&from.01,yymmdd8.));
%let end=%sysfunc(inputn(&to.01,yymmdd8.));
%do offset=0 %to %sysfunc(intck(month,&start,&end));
%let yymm=%sysfunc(intnx(month,&start,&offset),yymmn6.);
%let year=%substr(&yymm,1,4);
proc import datafile="&path.&year.\Farmacias_EA_&yymm..xlsx"
out=Farmacias_EA_&yymm. REPLACE dbms=xlsx
;
run;
data Farmacias_EA_&yymm.;
set Farmacias_EA_&yymm.;
Data_anomes=&yymm.;
run;
%end;
%mend import ;
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%import(201902,201201);
Do you really want the variable DATA_ANOMES to have numbers like 201,902 ? Why not either store it as a string like "201902" or an actual date value like "01FEB2019"d ?

Related

How to integrate multiple imputation into Donna Spiegelman's SAS Subtype Macro?

I am trying to analyse the HRs for different disease subtypes in a nested case-control study using the dataset with a high proportion of missing values for two variables that I need for adjusted risk model. For non-adjusted model I used Donna Spiegelman's Subtype macro https://www.hsph.harvard.edu/donna-spiegelman/software/subtype/.
Now I want to run an adjusted model using the dataset with Multiply imputed Data for categorical variables a and b. My idea was to first run Proc MI :
proc mi data=Subtype nimpute=100 out=ImputSP seed=1305468 ;
class a b disease_sub infection ;
fcs plots=trace ( std mean );
var a b age disease_sub infection;
fcs discrim( a b /classeffects=include) nbiter=100;
run;
And then run the edited version of the subtype macro in which I've added "by imputation" option in 3 parts of the macro code.
/* get list of values of outcometype */
proc sort data=newdatname; by outcometype ; run;
proc means noprint data=newdatname; var outcometype;
output out=_expblist_ mean=m_outcometype;
by outcometype;
where outcometype ne . ;
run;
data _expblist_;
set _expblist_ end=_end_;
call symput('_eb_'||trim(left(_n_)), trim(left(m_outcometype)));
if _end_ then call symput ('_neb_', trim(left(_n_)));
run;
%if &studydesign eq CACO %then %do; %let reftype=0; %end;
ods listing close;
proc logistic data=newdatname outest=LL;
by _imputation_;
model outcometype(ref="&reftype")=&exposureND &exposureD_ &unconstrvar /link=glogit covb;
%if &studydesign eq CACA %then %do;
%do j=1 %to &_nexpND;
*variables for pair-wise LRT of heterogeneity test;
%do j=1 %to &_nexpND;
%do i=1 %to &_neb_;
if outcometype=&i then _expND_&j=&&_expND_&j._&i..;
%end;
rename _expND_&j=&&_expND_&j;
%end;
%do j=1 %to &_nexpD;
%do k=1 %to &&_nexpD_&j;
%do i=1 %to &_neb_;
if outcometype=&i then _expD_&j._&k=&&_expD_&j._&k._&i..;
%end;
rename _expD_&j._&k=&&_expD_&j._&k..;
%end;
%end;
%do j=1 %to &_nexpND;
%do i=1 %to %eval(&_neb_-1);
%do k=%eval(&i+1) %to &_neb_;
_expND_&j._&i._vs_&j._&k.= _expND_&j*(outcometype=&&_eb_&i|outcometype=&&_eb_&k);
%end;
%end;
%end;
%do j=1 %to &_nexpD;
%do i=1 %to %eval(&_neb_-1);
%do k=%eval(&i+1) %to &_neb_;
%do m=1 %to &&_nexpD_&j;
_expD_&j._&m._&i._vs_&j._&m._&k.=_expD_&j._&m.*(outcometype=&&_eb_&i|outcometype=&&_eb_&k);
%end;
%end;
%end;
%end;
run;
%end;
ods listing close;
proc phreg data=newdatname outest=LL %if "&covs" eq "YES" %then %do; covs %end; nosummary;
model (entrytime,time)*censoring(0)=
%do j=1 %to &_nexpND;
%do i=1 %to &_neb_;
_expND_&j._&i
%end;
%end;
%do j=1 %to &_nexpD;
%do k=1 %to &&_nexpD_&j;
%do i=1 %to &_neb_;
_expD_&j._&k._&i
%end;
%end;
%end;
%if "&augmented" eq "NO" %then %do;
%do j=1 %to &_nunconstrvar;
%do i=1 %to &_neb_;
_ucv_&j._&i
%end;
%end;
%end;
%else %if "&augmented" eq "YES" %then %do;
&unconstrvar
%end;
&constrvar / covb %if "&studydesign" eq "MCACO" | "&studydesign" eq "CACO" %then %do; ties=discrete %end; ;
%if "&studydesign" eq "COHORT" | "&studydesign" eq "CACO" %then %do; strata outcometype &stratavar; %end;
%if "&studydesign" eq "MCACO" %then %do; strata &matchid; %end;
by _imputation_;
After running the subtypeBY_macro my intention is to run this code to obtain the final estimates:
proc print data=&PamEst (obs=8) ;
title "Logistic Model Coefficients First 8 Obs ";
var _Imputation_ Variable Estimate StdErr;
run;
TITLE "MULTIPLE IMPUTATION LOG REFRESSION-FCS";
proc mianalyze parms (classvar=ClassVal)=&PamEst ;
class &classmi ;
MODELeffects &modelvar ;
ods output ParameterEstimates=mianalyze_&PamEst;
run;
title;
data OR_&PamEst;
set mianalyze_&PamEst;
OR=exp (estimate);
LCL_OR=exp (LCLMean);
UCL_OR=exp(UCLMean);
proc print;
var parm &classmi OR LCL_OR UCL_OR;
run;
Sadly the program crushes on the SubtypeBY step and I do not know how to best proceed to integrate multiple imputation into the adjusted model. Any tips would be much appreciated!

Creating SAS macro to find the start month and year for a specified window

I am creating a macro for SAS that can find the beginning month and year given the specified window (in month). Appreciate if you could assist me what is wrong with the code:
%macro date;
%let endmonth=12;
%let endyear=2000;
%let window=24;
%if %eval(&window-12)>0
%then %do;
%if %eval(&endmonth - %eval MOD(&window,12))<0
%then %do;
%let startmonth=%eval(%eval MOD(&window,12)-&endmonth);
%let startyear=%eval(&endyear-%eval(%sysevalf(&window/12,integer)-1));
%end;
%else %do;
%let startmonth=%eval(&endmonth-%eval MOD(&window,12));
%let startyear=%eval(&endyear-%sysevalf(&window/12,integer));
%end;
%end;
%else %do;
%if %eval(&endmonth-&window)<0
%then %do;
%let startmonth=%eval(&window-&endmonth);
%let startyear=%eval(&endyear-1)
%end;
%else %do;
%let startmonth=%eval(&endmonth-&window);
%let startyear=&endyear
%end;
%end;
Don't reinvent the wheel. The INTNX function will compute all manner intervals from a given date.
Example:
%macro compute_start(end_ym=, months_ago=);
%local end_dt;
%let end_dt = %sysfunc(inputn(&end_ym.01, yymmdd8.));
%sysfunc(intnx(MONTH,&end_dt,-&months_ago))
%mend;
%let start_dt = %compute_start(end_ym=200012,months_ago=24);
%put &=start_dt;
%put %sysfunc(year(&start_dt));
%put %sysfunc(month(&start_dt));
* or just;
%let start_dt = %sysfunc(intnx(
MONTH,
%sysfunc(inputn(20001201, yymmdd8.)),
-24
));

Problem with CALL SYMPUT inside if then loop

I'm encountering problems when I use the CALL SYMPUT function inside a if-then loop in SAS.
%LET food=kebab;
DATA _NULL_;
IF &food.=pizza THEN DO;
CALL SYMPUT('price',12);
CALL SYMPUT('fat',5);
END;
ELSE IF &food.=kebab THEN DO;
CALL SYMPUT('price',6);
CALL SYMPUT('fat',4);
END;
RUN;
%put &food.;
%put &price.;
%put &fat.;
The variables actually take these values :
food = kebab ; price = 12 (instead of desired value 6) ; fat = 5 (instead of 4)
Thanks in advance for any explanation.
Because you're using a data step, your IF statements need quotes. If it was %IF then your code would be closer to correct.
%LET food=kebab;
DATA _NULL_;
IF "&food." = "pizza" THEN DO;
CALL SYMPUT('price',12);
CALL SYMPUT('fat',5);
END;
ELSE IF "&food." = "kebab" THEN DO;
CALL SYMPUT('price',6);
CALL SYMPUT('fat',4);
END;
RUN;
%put &food.;
%put &price.;
%put &fat.;
Another option is full macro logic, this will work in SAS 9.4M5+
%LET food=kebab;
%IF &food.=pizza %THEN %DO;
%let price = 12;
%let fat=5;
%END;
%ELSE %IF &food.=kebab %THEN %DO;
%let price = 6;
%let fat=4;
%END;
%put &food.;
%put &price.;
%put &fat.;
EDIT: If you're not on SAS 9.4M5+ which supports open macro code you need to wrap your logic in a macro.
%macro create_variables();
%global price fat;
%LET food=kebab;
%IF &food.=pizza %THEN %DO;
%let price = 12;
%let fat=5;
%END;
%ELSE %IF &food.=kebab %THEN %DO;
%let price = 6;
%let fat=4;
%END;
%put &food.;
%put &price.;
%put &fat.;
%mend;
%create_variables();

Getting a WARNING: Truncated record. but not able to figure out the reason

I have a macro dt_query , which will be called with different parameters...
%let dt_start_date_sql = %dt_query(month,-1,sqlsvr);
65 %let dt_end_date_sql = %dt_query(month,0,sqlsvr);
WARNING: Truncated record.
66 %let start_date1=%dt_query(month,-1,oracle);
That macro is not creating any dataset but still getting this WARNING.Here is the code for dt_query macro....
%macro dt_query(interval,offset,useDbtype,quote=,date=,alignment=B)/minoperator;
%put Start macro dt_query(&interval,&offset,&useDbtype,quote=&quote,date=&date,alignment=&alignment);
%local useFormat useQuote sasdate d interval_temp;
%if %superq(date)=%str() %then %let date=&dt_sas;
%if &useDbtype=%str() %then %let useDbtype=&dbtype;
%let useDbtype=%upcase(&useDbtype);
%let interval=%upcase(&interval);
%let interval_temp=%scan(&interval,1,%str(.));
%let pos=%sysfunc(anydigit(&interval_temp));
%if &pos %then %let interval_temp=%substr(&interval_temp,1,%eval(&pos-1));
%if %eval(&interval_temp in YEAR QTR MONTH WEEK DAY YEARLY QUARTERLY MONTHLY WEEKLY DAILY)=0 %then
%do;
%let errormsg1=&interval is not a valid date interval.;
%put ERROR: &errormsg1;
%let jumptoexit=1;
%let d=;
%goto EXIT;
%end;
%if %sysfunc(inputn(&offset,best.))=%str() %then
%do;
%let errormsg1=&offset is not a valid date offset.;
%put ERROR: &errormsg1;
%let jumptoexit=1;
%let d=;
%goto EXIT;
%end;
%if &useDbtype=%str() %then
%do;
/* If useDbtype is missing, assume we need a sas date string */
%let useFormat=date9.;
%let useQuote=Y;
%let sasdate=1;
%end;
%else
%if %eval(&useDbtype in DB2 SQLSVR ORACLE TERADATA) %then
%do;
%if &useDbtype eq DB2 %then
%do;
/* date format is 'mm/dd/yyyy' */
%let useFormat=mmddyy10.;
%let useQuote=Y;
%end;
%else
%if &useDbtype eq SQLSVR %then
%do;
/* date format is 'mm/dd/yyyy' */
%let useFormat=mmddyy10.;
%let useQuote=Y;
%end;
%else
%if &useDbtype eq ORACLE %then
%do;
/* date format is 01-DEC-2011*/
%let useFormat=date11.;
%let useQuote=Y;
%end;
%else
%if &useDbtype eq TERADATA %then
%do;
/* date format is '2012-01-01'*/
%let useFormat=yymmddd10.;
%let useQuote=Y;
%end;
%end;
%else
%do;
%let errormsg1=Unrecognized useDbtype value &useDbtype..;
%let errormsg2=Must be one of DB2, SQLSVR, ORACLE, TERADATA.;
%put;
%put ERROR: &errormsg1;
%put ERROR: &errormsg2;
%put;
%let jumptoexit=1;
%let d=;
%goto EXIT;
%end;
%if &quote ne %str() %then %let useQuote=&quote;
%let d=%dt_date(date=&date,interval=&interval,format=&useFormat,offset=&offset.,alignment=&alignment,quote=&useQuote);
%if &sasdate=1 %then %let d=%superq(d)D;
%EXIT:
%unquote(%superq(d))
%put End macro dt_query - Date Value returned is %unquote(%superq(d));
%mend dt_query;
I have seen this warning before when I have a line of code that is really long. And SAS only reads the first N characters of the line. In this case I don't see any lines that are obviously too long. But would look in your original code and insert some breaks on the longest lines. If that doesn't work, I would probably start brute force debugging...
Setting the linesize might solve your problem.
options LINESIZE=256;

What's the fastest way to partition a sas dataset for batch processing?

I have a large sas dataset (1.5m obs, ~250 variables) that I need to split into several smaller sas datasets of equal size for batch processing. Each dataset needs to contain all the variables but only a fraction of the obs. What is the fastest way of doing this?
You could do something like the following:
%macro splitds(inlib=,inds=,splitnum=,outid=);
proc sql noprint;
select nobs into :nobs
from sashelp.vtable
where libname=upcase("&inlib") and memname=upcase("&inds");
quit;
%put Number of observations in &inlib..&inds.: &nobs;
data %do i=1 %to &splitnum.;
&outid.&i
%end;;
set &inds.;
%do j=1 %to (&splitnum.-1);
%if &j.=1 %then %do;
if
%end;
%else %do;
else if
%end;
_n_<=((&nobs./&splitnum.)*&j.) then output &outid.&j.;
%end;
else output &outid.&splitnum.;
run;
%mend;
An example call to split MYLIB.MYDATA into 10 data sets named NEWDATA1 - NEWDATA10 would be:
%splitds(inlib=mylib,inds=mydata,splitnum=10,outid=newdata);
Try this. I haven't tested yet, so expect a bug somewhere. You will need to edit the macro call to BATCH_PROCESS to include the names of the datasets, number of new data sets, etc.
%macro nobs (dsn);
%local nobs dsid rc;
%let nobs=0;
%let dsid = %sysfunc(open(&dsn));
%if &dsid %then %do;
%let nobs = %sysfunc(attrn(&dsid,NOBS));
%end;
%else %put Open for dataset &dsn failed - %sysfunc(sysmsg());
%let rc = %sysfunc(close(&dsid));
&nobs
%mend nobs;
%macro batch_process(dsn_in,dsn_out_prefix,number_of_dsns);
%let dsn_obs = &nobs(&dsn_in);
%let obs_per_dsn = %sysevalf(&dsn_obs / &number_of_dsns);
data
%do i = 1 %to &number_of_dsns;
&dsn_out_prefix.&i
%end;
;
set &dsn_in;
drop _count;
retain _count 0;
_count = _count + 1;
%do i = 1 %to &number_of_dsns;
if (1 + ((&i - 1) * &obs_per_dsn)) <= _count <= (&i * &obs_per_dsn) then do;
output &dsn_out_prefix.&i;
end;
%end;
run;
%mend batch_process;
%batch_process( dsn_in=DSN_NAME , dsn_out_prefix = PREFIX_ , number_of_dsns = 5 );