SAS check if column exist in Table - sas

I want to make a macro That check if all the columns in col_to_check are in Table and I want exit SAS if one of these columns doesn't exit
I try this:
%let col_to_check = ID SEG AGE;
%MACRO check(table , col_to_check);
%local count;
%let count=0;
%DO i_=1 %TO %sysfunc(countw(&col_to_check.," "));
%LET col=%SCAN(&col_to_check.,&i_.," ");
%if ( %varexist(&table.,&col.) = 1) %then endsas;
%END;
%MEND check;

Use the vcolumn table of the sashelp library.
%macro check(lib, table, col_to_check);
%let nb_col = %sysfunc(countw(&col_to_check., %quote( )));
%let col_names = "%sysfunc(tranwrd(&col_to_check.,%str( )," "))";
proc sql noprint;
select count(distinct name) into :nb
from sashelp.vcolumn where upcase(name) in (&col_names.)
and upcase(libname)="&lib."
and upcase(memname)="&table.";
quit;
%if &nb. ^=&nb_col. %then
%goto end_pg;
%else
%do;
%put do stuff;
%end;
%end_pg:
%mend;
%check(SASHELP, CLASS, SEX WEIGHT NAME AGE);
PS: What do you mean by "exit SAS"? You could easily replace the %goto by %ABORT

Related

SAS: If condition won't recognize macro variable within datastep

In the code below the IF statement that sends the email isn't evaluating correctly. I am not sure why. I tried to check for a null but that didnt work either. It is just always sending the the first do in that statement. In the below statement TABLE1 exist with no records and TABLE2 does not exist. I think it has something to do with &CNT3 being populated with a COUNT(*) in the proc sql statement.
%IF %SYSFUNC(exist(TABLE1)) %THEN %DO;
PROC SQL;
SELECT COUNT(*) INTO : CNT3 FROM TABLE1;
QUIT;
%END;
%ELSE %DO;
%LET CNT3=0;
%END;
%put &cnt3.;
%IF %SYSFUNC(exist(TABLE2)) %THEN %DO;
PROC SQL;
SELECT COUNT(*) INTO : CNT4 FROM TABLE2;
QUIT;
%END;
%ELSE %DO;
%LET CNT4=0;
%END;
%put &cnt4.;
%IF (&CNT3 ^=0 AND &CNT3^='0') %THEN %DO;
PROC EXPORT DATA=TABLE1.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST1";
RUN;
%END;
%IF (&CNT4 ^=0 AND &CNT4^='0') %THEN %DO;
PROC EXPORT DATA=&ENV..AUTH_ERRORLOG_&REC_DATE.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST2";
RUN;
%END;
%let EMAIL_SUBJECT = "TEST EMAIL.";
FILENAME OUTBOX EMAIL 'TEST#TEST.COM';
DATA _NULL_;
IF (&CNT3 ^=0 AND &CNT3 ^='0') OR (&CNT4 ^=0 AND &CNT4^='0') THEN
DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.
ATTACH=("/data/REPORT1.xlsx" CONTENT_TYPE="APPLICATION/XLSX");
END;
ELSE DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.;
PUT"NO ERRORS FOUND";
END;
RUN;
There may be a number of things going on here, so let's try to clean this up a bit to see if it will resolve your issues.
First, let's grab the observation count from the metadata of the tables of interest instead of counting all the observations. This is a great repeatable macro that I highly recommend keeping as an always-available sasauto:
%macro nobs(data);
%local dsid nobs rc;
%let nobs = -1;
%if(%sysfunc(exist(&data.)) ) %then %do;
%let dsid = %sysfunc(open(&data.));
%let nobs = %sysfunc(attrn(&dsid., nlobs));
%let rc = %sysfunc(close(&dsid.));
%end;
&nobs.
%mend;
This will act like a function and return the number of observations for a SAS table. If it does not exist, it returns -1. For example:
%put The number of obs in sashelp.cars is %nobs(sashelp.cars);
%put The number of obs in a non-existent table is %nobs(doesntexist);
Output:
The number of obs in sashelp.cars is 428
The number of obs in a non-existent table is -1
Now we're guaranteeing that we're always returning a number without spaces in it. Let's replace the program logic:
%if(%nobs(table1) > 0) %then %do;
PROC EXPORT DATA=TABLE1
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST1";
RUN;
%end;
%if(%nobs(table2) > 0) %then %do;
PROC EXPORT DATA=&ENV..AUTH_ERRORLOG_&REC_DATE.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST2";
RUN;
%end;
%let EMAIL_SUBJECT = "TEST EMAIL.";
FILENAME OUTBOX EMAIL 'TEST#TEST.COM';
DATA _NULL_;
IF (%nobs(table1) > 0 OR %nobs(table2) > 0) then do;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.
ATTACH=("/data/REPORT1.xlsx" CONTENT_TYPE="APPLICATION/XLSX");
END;
ELSE DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.;
PUT"NO ERRORS FOUND";
END;
RUN;
This test does not make any sense
&CNT3 ^=0 AND &CNT3 ^='0'
in either the macro logic or the data step logic.
If CNT3 is going to have values like 0 or 123 or even 123 then just test if it is zero or not:
&cnt3 ne 0

SAS macro with conditional loops (%if %then %else %do %end)

I'm a beginner in SAS and I am trying to use a macro to import excel files using a conditional loop.
The importing process is based on initial_year ; final_year; initial_month and final_month values.
But it seems the If condition is not working. Can you help please. Thank you.
This is my sas program:
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%let initial_year=2019; %let initial_month=2;
%let final_year=2021; %let final_month=1;
%Macro import_loop;
%if &final_month >= &initial_month %then %do;
%DO x = &initial_year %TO &final_year;
%DO i = &initial_month %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%else %do
%DO x = &initial_year %TO &final_year-1;
%DO i = &initial_month %TO 12;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%DO x = &final_year %TO &final_year;
%DO i = 1 %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%end;
%mend import_loop;
%import_loop
Just treat your dates like dates and the looping will be much easier.
%macro import(from,to);
%local start end offset yymm year ;
%let start=%sysfunc(inputn(&from.01,yymmdd8.));
%let end=%sysfunc(inputn(&to.01,yymmdd8.));
%do offset=0 %to %sysfunc(intck(month,&start,&end));
%let yymm=%sysfunc(intnx(month,&start,&offset),yymmn6.);
%let year=%substr(&yymm,1,4);
proc import datafile="&path.&year.\Farmacias_EA_&yymm..xlsx"
out=Farmacias_EA_&yymm. REPLACE dbms=xlsx
;
run;
data Farmacias_EA_&yymm.;
set Farmacias_EA_&yymm.;
Data_anomes=&yymm.;
run;
%end;
%mend import ;
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%import(201902,201201);
Do you really want the variable DATA_ANOMES to have numbers like 201,902 ? Why not either store it as a string like "201902" or an actual date value like "01FEB2019"d ?

Checking for variable type in SAS-Macro

I am trying to summarize my variables using proc sql and proc freq procedures in a macro.
Here is the code:
%macro des_freq(input= ,vars= );
%let n=%sysfunc(countw(&vars));
%let binary=NO;
%do i = 1 %to &n;
%let values = %scan(&vars, &i);
%if %datatyp(&values)=NUMERIC %then %do;
proc summary data = &input;
output out=x min(&values)=minx max(&values)=maxx;
run;
data _null_;
set x;
if minx = 0 and maxx = 1 then call symputx('binary','YES');
run;
%if &binary = YES %then %do;
proc sql;
select segment_final,
(sum(case when &values = 1 then 1 else 0 end)/ count(*)) * 100 as &values._percent
from &input
group by segment_final;
quit;
%end;
%else %do;
proc freq data =&input;
tables segment_final*&values/nofreq nopercent nocol;
run;
%end;
%end;
%else %do;
proc freq data =&input;
tables segment_final*&values/nofreq nopercent nocol;
run;
%end;
%end;
%mend;
My variables can be numeric or character. If it's numeric, it can 2 more distinct values.
I want % of 1's in a binary variable by segments(hence proc sql) and % of all distinct variables for each segment(hence proc freq).
My first if statement is checking whether the variable if numeric or not and then if its numeric, next few steps is checking if its binary or not. If its binary then execute the proc sql else execute proc freq.
If the variable is character then just execute the proc freq.
I am not able to figure out how to check if my variable is numeric or not. I tried %SYSFUNC(Vartype), %isnum and %DATATYP. None of them seem to work. Please help!!
First you can look into sashelp.vcolumn table to check variables types:
data want(keep=libname memname name type);
set sashelp.vcolumn( where= (libname='SASHELP' and memname='CLASS'));
run;
If you don't want to use vcolumn table, you can use vtype() data step function as #Tom suggest:
data _NULL_;
set &input (obs=1);
call symput('binary',ifc(vtype(&values)='N','YES','NO' ));
run;

How to loop through a macro variable in SAS

I have an example like this:
proc sql;
select dealno into :deal_no
from deal_table;
Now I want to traverse the variable deal_no now containing all dealno in table deal_table but I don't know how to do it.
Another option is add 'separated by' to the sql code, which will add a delimiter to the values. You can then use the SCAN function in a data step or %SCAN in a macro to loop through the values and perform whatever task you want. Example below.
proc sql noprint;
select age into :age separated by ','
from sashelp.class;
quit;
%put &age.;
data test;
do i=1 by 1 while(scan("&age.",i) ne '');
age=scan("&age.",i);
output;
end;
drop i;
run;
If you do
%put &deal_no;
you can see that it only contains the first value of dealno, not all of them.
To avoid that you can do something like this:
proc sql;
create table counter as select dealno from deal_table;
select dealno into :deal_no_1 - :deal_no_&sqlobs
from deal_table;
quit;
%let N = &sqlobs;
%macro loop;
%do i = 1 %to &N;
%put &&deal_no_&i;
%end;
%mend;
%loop; run;
Here's another solution.
proc sql noprint;
select age into :ageVals separated by ' '
from ageData;
quit;
%put &ageVals;
%macro loopAgeVals; %let i = 1; %let ageVal = %scan(&ageVals, &i);
%do %while("&ageVal" ~= "");
%put &ageVal;
%let i = %eval(&i + 1);
%let ageVal = %scan(&ageVals, &i);
%end;
%mend;
%loopAgeVals;

What's the fastest way to partition a sas dataset for batch processing?

I have a large sas dataset (1.5m obs, ~250 variables) that I need to split into several smaller sas datasets of equal size for batch processing. Each dataset needs to contain all the variables but only a fraction of the obs. What is the fastest way of doing this?
You could do something like the following:
%macro splitds(inlib=,inds=,splitnum=,outid=);
proc sql noprint;
select nobs into :nobs
from sashelp.vtable
where libname=upcase("&inlib") and memname=upcase("&inds");
quit;
%put Number of observations in &inlib..&inds.: &nobs;
data %do i=1 %to &splitnum.;
&outid.&i
%end;;
set &inds.;
%do j=1 %to (&splitnum.-1);
%if &j.=1 %then %do;
if
%end;
%else %do;
else if
%end;
_n_<=((&nobs./&splitnum.)*&j.) then output &outid.&j.;
%end;
else output &outid.&splitnum.;
run;
%mend;
An example call to split MYLIB.MYDATA into 10 data sets named NEWDATA1 - NEWDATA10 would be:
%splitds(inlib=mylib,inds=mydata,splitnum=10,outid=newdata);
Try this. I haven't tested yet, so expect a bug somewhere. You will need to edit the macro call to BATCH_PROCESS to include the names of the datasets, number of new data sets, etc.
%macro nobs (dsn);
%local nobs dsid rc;
%let nobs=0;
%let dsid = %sysfunc(open(&dsn));
%if &dsid %then %do;
%let nobs = %sysfunc(attrn(&dsid,NOBS));
%end;
%else %put Open for dataset &dsn failed - %sysfunc(sysmsg());
%let rc = %sysfunc(close(&dsid));
&nobs
%mend nobs;
%macro batch_process(dsn_in,dsn_out_prefix,number_of_dsns);
%let dsn_obs = &nobs(&dsn_in);
%let obs_per_dsn = %sysevalf(&dsn_obs / &number_of_dsns);
data
%do i = 1 %to &number_of_dsns;
&dsn_out_prefix.&i
%end;
;
set &dsn_in;
drop _count;
retain _count 0;
_count = _count + 1;
%do i = 1 %to &number_of_dsns;
if (1 + ((&i - 1) * &obs_per_dsn)) <= _count <= (&i * &obs_per_dsn) then do;
output &dsn_out_prefix.&i;
end;
%end;
run;
%mend batch_process;
%batch_process( dsn_in=DSN_NAME , dsn_out_prefix = PREFIX_ , number_of_dsns = 5 );