Importing an empty CSV file in SAS - sas

I'm trying to import series of CSV files with a macro that loops thru all the files in the given folder.But, there are some empty CSV files in the folder which I would like to exclude from the loop.
Is there any way in SAS to find CSV file size ?
PROC IMPORT OUT=&output
DATAFILE= "&input"
DBMS=CSV REPLACE;
GETNAMES=YES;
DATAROW=2;
*GUESSINGROWS=32000;
RUN;
Thanks, Sam.

Here's away to do it in a datastep:
filename fileref 'c:\date.tmp';
data a;
infile fileref truncover;
fid=fopen('fileref');
Bytes=finfo(fid,'File Size (bytes)');
crdate=finfo(fid,'Create Time');
moddate=finfo(fid,'Last Modified');
input var1 $20.;
run;

%macro FileAttribs(filename);
%local rc fid fidc Bytes;
%let rc=%sysfunc(filename(onefile,&filename));
%let fid=%sysfunc(fopen(&onefile));
%let Bytes=%sysfunc(finfo(&fid,File Size (bytes)));
%if &Bytes >0 %then %do;
%put #####Size is > 0#####;
%end;
%else %do;
%put #####Size is < 0#####;
%end;
%let fidc=%sysfunc(fclose(&fid));
%let rc=%sysfunc(filename(onefile));
%put NOTE: File size of &filename is &Bytes bytes;
%mend FileAttribs;

and here's another way to do it:
%let filename =c:\date.tmp;
%let rc=%sysfunc(filename(onefile,&filename));
%let fid=%sysfunc(fopen(&onefile));
%let Bytes=%sysfunc(finfo(&fid,File Size (bytes)));
%let fidc=%sysfunc(fclose(&fid));
%put NOTE: File size of &filename is &Bytes bytes;

Related

SAS: If condition won't recognize macro variable within datastep

In the code below the IF statement that sends the email isn't evaluating correctly. I am not sure why. I tried to check for a null but that didnt work either. It is just always sending the the first do in that statement. In the below statement TABLE1 exist with no records and TABLE2 does not exist. I think it has something to do with &CNT3 being populated with a COUNT(*) in the proc sql statement.
%IF %SYSFUNC(exist(TABLE1)) %THEN %DO;
PROC SQL;
SELECT COUNT(*) INTO : CNT3 FROM TABLE1;
QUIT;
%END;
%ELSE %DO;
%LET CNT3=0;
%END;
%put &cnt3.;
%IF %SYSFUNC(exist(TABLE2)) %THEN %DO;
PROC SQL;
SELECT COUNT(*) INTO : CNT4 FROM TABLE2;
QUIT;
%END;
%ELSE %DO;
%LET CNT4=0;
%END;
%put &cnt4.;
%IF (&CNT3 ^=0 AND &CNT3^='0') %THEN %DO;
PROC EXPORT DATA=TABLE1.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST1";
RUN;
%END;
%IF (&CNT4 ^=0 AND &CNT4^='0') %THEN %DO;
PROC EXPORT DATA=&ENV..AUTH_ERRORLOG_&REC_DATE.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST2";
RUN;
%END;
%let EMAIL_SUBJECT = "TEST EMAIL.";
FILENAME OUTBOX EMAIL 'TEST#TEST.COM';
DATA _NULL_;
IF (&CNT3 ^=0 AND &CNT3 ^='0') OR (&CNT4 ^=0 AND &CNT4^='0') THEN
DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.
ATTACH=("/data/REPORT1.xlsx" CONTENT_TYPE="APPLICATION/XLSX");
END;
ELSE DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.;
PUT"NO ERRORS FOUND";
END;
RUN;
There may be a number of things going on here, so let's try to clean this up a bit to see if it will resolve your issues.
First, let's grab the observation count from the metadata of the tables of interest instead of counting all the observations. This is a great repeatable macro that I highly recommend keeping as an always-available sasauto:
%macro nobs(data);
%local dsid nobs rc;
%let nobs = -1;
%if(%sysfunc(exist(&data.)) ) %then %do;
%let dsid = %sysfunc(open(&data.));
%let nobs = %sysfunc(attrn(&dsid., nlobs));
%let rc = %sysfunc(close(&dsid.));
%end;
&nobs.
%mend;
This will act like a function and return the number of observations for a SAS table. If it does not exist, it returns -1. For example:
%put The number of obs in sashelp.cars is %nobs(sashelp.cars);
%put The number of obs in a non-existent table is %nobs(doesntexist);
Output:
The number of obs in sashelp.cars is 428
The number of obs in a non-existent table is -1
Now we're guaranteeing that we're always returning a number without spaces in it. Let's replace the program logic:
%if(%nobs(table1) > 0) %then %do;
PROC EXPORT DATA=TABLE1
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST1";
RUN;
%end;
%if(%nobs(table2) > 0) %then %do;
PROC EXPORT DATA=&ENV..AUTH_ERRORLOG_&REC_DATE.
DBMS=XLSX
OUTFILE="data/REPORT1.xlsx"
REPLACE;
SHEET="TEST2";
RUN;
%end;
%let EMAIL_SUBJECT = "TEST EMAIL.";
FILENAME OUTBOX EMAIL 'TEST#TEST.COM';
DATA _NULL_;
IF (%nobs(table1) > 0 OR %nobs(table2) > 0) then do;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.
ATTACH=("/data/REPORT1.xlsx" CONTENT_TYPE="APPLICATION/XLSX");
END;
ELSE DO;
FILE OUTBOX
TO=('TEST#TEST.COM')
SUBJECT= &EMAIL_SUBJECT.;
PUT"NO ERRORS FOUND";
END;
RUN;
This test does not make any sense
&CNT3 ^=0 AND &CNT3 ^='0'
in either the macro logic or the data step logic.
If CNT3 is going to have values like 0 or 123 or even 123 then just test if it is zero or not:
&cnt3 ne 0

SAS macro with conditional loops (%if %then %else %do %end)

I'm a beginner in SAS and I am trying to use a macro to import excel files using a conditional loop.
The importing process is based on initial_year ; final_year; initial_month and final_month values.
But it seems the If condition is not working. Can you help please. Thank you.
This is my sas program:
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%let initial_year=2019; %let initial_month=2;
%let final_year=2021; %let final_month=1;
%Macro import_loop;
%if &final_month >= &initial_month %then %do;
%DO x = &initial_year %TO &final_year;
%DO i = &initial_month %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%else %do
%DO x = &initial_year %TO &final_year-1;
%DO i = &initial_month %TO 12;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%DO x = &final_year %TO &final_year;
%DO i = 1 %TO &final_month;
%if &i <=9 %then %let anomes=&x.0&i;
%else %let anomes=&x&i ;
proc import datafile="&path&x\Farmacias_EA_&anomes..xlsx"
out=Farmacias_EA_&anomes REPLACE dbms=xlsx;
run;
data Farmacias_EA_&anomes;
set Farmacias_EA_&anomes;
Data_anomes=&anomes;
run;
%end;
%end;
%end;
%mend import_loop;
%import_loop
Just treat your dates like dates and the looping will be much easier.
%macro import(from,to);
%local start end offset yymm year ;
%let start=%sysfunc(inputn(&from.01,yymmdd8.));
%let end=%sysfunc(inputn(&to.01,yymmdd8.));
%do offset=0 %to %sysfunc(intck(month,&start,&end));
%let yymm=%sysfunc(intnx(month,&start,&offset),yymmn6.);
%let year=%substr(&yymm,1,4);
proc import datafile="&path.&year.\Farmacias_EA_&yymm..xlsx"
out=Farmacias_EA_&yymm. REPLACE dbms=xlsx
;
run;
data Farmacias_EA_&yymm.;
set Farmacias_EA_&yymm.;
Data_anomes=&yymm.;
run;
%end;
%mend import ;
%let path=\\xxxx.yy.pt\aaa$\INFO\;
%import(201902,201201);
Do you really want the variable DATA_ANOMES to have numbers like 201,902 ? Why not either store it as a string like "201902" or an actual date value like "01FEB2019"d ?

Listing files UNIX SAS

i found a code that list all directories and sub directories from a path.
but it brings only the directory and the name of the file.
could you guys please help me how to bring the owner and the file size?
%macro list_files(dir);
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
%if &did eq 0 %then %do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %index(%qscan(&name,-1,'/'),.) gt 0 %then %do;
data _tmp;
length dir $512 name $100;
dir=symget("dir");
name=symget("name");
run;
proc append base=want data=_tmp;
run;quit;
%end;
%else %if %qscan(&name,2,.) = %then %do;
%list_files(&dir/&name)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend list_files;
DOPTNAME is your friend here.
Read SAS documentation for "Example 1: Using DOPTNAME to Retrieve Directory Attribute Information"
This example opens the directory with the fileref MYDIR, retrieves all system-dependent directory information items, writes them to the SAS log, and closes the directory:
%let rc=%sysfunc(filename(filrf, physical-name));
%let did=%sysfunc(dopen(&filrf));
%let infocnt=%sysfunc(doptnum(&did));
%do j=1 %to &infocnt;
%let opt=%sysfunc(doptname(&did, &j));
%put Directory information=&opt;
%end;
%let rc=%sysfunc(dclose(&did));
%macro test;
%let filrf=mydir;
%let rc=%sysfunc(filename(filrf, physical-name));
%let did=%sysfunc(dopen(&filrf));
%let infocnt=%sysfunc(doptnum(&did));
%do j=1 %to &infocnt;
%let opt=%sysfunc(doptname(&did, &j));
%put Directory information=&opt;
%end;
%let rc=%sysfunc(dclose(&did));
%mend test;
%test
Use the finfo() function. You can do this all in a single data step.
Documentation
/* Macro variable to store the directory. Do not keep ending slash. */
%let directory = /my/directory;
filename mydir "&directory";
data file_list;
length directory
filetype $15.
filename $1000.
owner $100.
size 8.
;
directory = "&directory/";
/* Open the directory */
did = dopen("mydir");
/* If the directory exists, loop through all files in the directory */
if(did) then do;
do i = 1 to dnum(did);
/* Get the filename */
filename = dread(did, i);
/* Create a filename variable and create a file ID to read its attributes */
rc = filename('infile', cats(directory,filename));
fid = fopen('infile');
owner = finfo(fid, 'Owner Name');
size = finfo(fid, 'File Size (bytes)');
/* Flag if it's a directory or file */
if(missing(size)) then filetype = 'Directory';
else filetype = 'File';
/* Close the file */
rc = fclose(fid);
output;
end;
end;
/* Close the directory */
rc = close(did);
keep directory filename owner size filetype;
run;

Macro quoting issue in function style macro

This is a follow up question to this question.
I'm trying to simplify the way we embed images into our HTML results. The idea for this was inspired by this other question .
Basically what I am trying to do is to write a function-style macro (called %html_embed_image()) that takes an image, and converts it into a base64 format suitable for use in an HTML <img src=""> block.
Given an image such as this:
The usage would be:
data _null_;
file _webout;
put "<img src=""%html_embed_image(iFileName=hi.png)"" />";
run;
And the final output would be:
<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAIAAAAC64paAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAABaSURBVDhP5YxbCsAgDAS9/6XTvJTWNUSIX3ZAYXcdGxW4QW6Khw42Axne81LG0shlRvVVLyeTI2aZ2fcPyXwPdBI8B999NK/gKTaGyxaMX8gTJRkpyREFmegBTt8lFJjOey0AAAAASUVORK5CYII=" />
The question linked above shows how to do this in regular datastep code, but I am having issues getting this working in a function style macro. I posted a simplified problem I was having earlier and Tom was able to solve that simplified issue, but it doesn't seem to be working in the greater context of the function style macro.
Here is my code so far (the line causing issues is wrapped with two put statements indicating that it is the problem):
option mprint symbolgen;
%macro html_embed_image(iFileName=);
%local rc fid rc2 str str_length format_length format_mod base64_format base64_string;
/* ONLY READ IN 16K CHUNKS AS CONVERTING TO BASE64 */
/* INCREASES SIZE AND DONT WANT TO EXCEED 32K. */
%let rc = %sysfunc(filename(filrf, &iFileName, , lrecl=16000));
%let fid = %sysfunc(fopen(&filrf, i, 16000, b));
%if &fid > 0 %then %do;
%let rc = %sysfunc(fread(&fid));
%do %while(&rc eq 0);
%let rc2 = %sysfunc(fget(&fid,str,16000));
%let str = %superq(str);
/* FORMAT LENGTH NEEDS TO BE 4n/3 ROUNDED UP TO NEAREST MULTIPLE OF 4 */
%let str_length = %length(&str);
%let format_length = %sysevalf(4*(&str_length/3));
%let format_mod = %sysfunc(mod(&format_length,4));
%if &format_mod ne 0 %then %do;
%let format_length = %sysevalf(&format_length - &format_mod + 4);
%end;
%let base64_format = %sysfunc(cats($base64x,&format_length,.));
%put &=base64_format;
/* CONVERT THE BINARY DATA TO BASE64 USING THE CALCULATED FORMAT */
%put PROBLEM START;
%let base64_string = %sysfunc(putc(&str,&base64_format));
%put PROBLEM END;
%put &=base64_string;
/*&base64_string*/ /* RETURN RESULT HERE - COMMENTED OUT UNTIL WORKING */
%let rc = %sysfunc(fread(&fid));
%end;
%end;
%else %do;
%put %sysfunc(sysmsg());
%end;
%let rc=%sysfunc(fclose(&fid));
%let rc=%sysfunc(filename(filrf));
%mend;
Test the code:
%put %html_embed_image(iFileName=hi.png);
Results in:
ERROR: Expected close parenthesis after macro function invocation not found.
Any tips on how to fix this, or suggestions for workarounds would be great.
Just write the text using a data step.
%let fname=hi.png;
data _null_;
file _webout recfm=n;
if _n_=1 then put '<img src="data:image/png;base64,';
length str $60 coded $80 ;
infile "&fname" recfm=n eof=eof;
do len=1 to 60;
input ch $char1.;
substr(str,len,1)=ch;
end;
put str $base64x80.;
return;
eof:
len=len-1;
clen=4*ceil(len/3);
coded = putc(substr(str,1,len),cats('$base64x',clen,'.'));
put coded $varying80. clen ;
put '" />';
run;
If you really want to generate text in-line it might be best to add quotes so that you could call the macro in the middle of a PUT statement and not worry about hitting maximum string length.
%macro base64(file);
%local filerc fileref rc fid text len ;
%*----------------------------------------------------------------------
Assign fileref and open the file.
-----------------------------------------------------------------------;
%let fileref = _fread;
%let filerc = %sysfunc(filename(fileref,&file));
%let fid = %sysfunc(fopen(&fileref,s,60,b));
%*----------------------------------------------------------------------
Read file and dump as quoted BASE64 text.
-----------------------------------------------------------------------;
%if (&fid > 0) %then %do;
%do %while(%sysfunc(fread(&fid)) = 0);
%do %while(not %sysfunc(fget(&fid,text,60)));
%let len = %eval(4*%sysfunc(ceil(%length(%superq(text))/3)));
%if (&len) %then "%sysfunc(putc(%superq(text),$base64x&len..))" ;
%end;
%end;
%let rc = %sysfunc(fclose(&fid));
%end;
%*----------------------------------------------------------------------
Clear fileref assigned by macro,
-----------------------------------------------------------------------;
%if ^(&filerc) %then %let rc = %sysfunc(filename(fileref));
%mend base64;
So then your example data step becomes something like this:
%let fname=hi.png;
data _null_;
file _webout recfm=n;
put '<img src="data:image/png;base64,' %base64(&fname) '" />' ;
run;

What's the fastest way to partition a sas dataset for batch processing?

I have a large sas dataset (1.5m obs, ~250 variables) that I need to split into several smaller sas datasets of equal size for batch processing. Each dataset needs to contain all the variables but only a fraction of the obs. What is the fastest way of doing this?
You could do something like the following:
%macro splitds(inlib=,inds=,splitnum=,outid=);
proc sql noprint;
select nobs into :nobs
from sashelp.vtable
where libname=upcase("&inlib") and memname=upcase("&inds");
quit;
%put Number of observations in &inlib..&inds.: &nobs;
data %do i=1 %to &splitnum.;
&outid.&i
%end;;
set &inds.;
%do j=1 %to (&splitnum.-1);
%if &j.=1 %then %do;
if
%end;
%else %do;
else if
%end;
_n_<=((&nobs./&splitnum.)*&j.) then output &outid.&j.;
%end;
else output &outid.&splitnum.;
run;
%mend;
An example call to split MYLIB.MYDATA into 10 data sets named NEWDATA1 - NEWDATA10 would be:
%splitds(inlib=mylib,inds=mydata,splitnum=10,outid=newdata);
Try this. I haven't tested yet, so expect a bug somewhere. You will need to edit the macro call to BATCH_PROCESS to include the names of the datasets, number of new data sets, etc.
%macro nobs (dsn);
%local nobs dsid rc;
%let nobs=0;
%let dsid = %sysfunc(open(&dsn));
%if &dsid %then %do;
%let nobs = %sysfunc(attrn(&dsid,NOBS));
%end;
%else %put Open for dataset &dsn failed - %sysfunc(sysmsg());
%let rc = %sysfunc(close(&dsid));
&nobs
%mend nobs;
%macro batch_process(dsn_in,dsn_out_prefix,number_of_dsns);
%let dsn_obs = &nobs(&dsn_in);
%let obs_per_dsn = %sysevalf(&dsn_obs / &number_of_dsns);
data
%do i = 1 %to &number_of_dsns;
&dsn_out_prefix.&i
%end;
;
set &dsn_in;
drop _count;
retain _count 0;
_count = _count + 1;
%do i = 1 %to &number_of_dsns;
if (1 + ((&i - 1) * &obs_per_dsn)) <= _count <= (&i * &obs_per_dsn) then do;
output &dsn_out_prefix.&i;
end;
%end;
run;
%mend batch_process;
%batch_process( dsn_in=DSN_NAME , dsn_out_prefix = PREFIX_ , number_of_dsns = 5 );