SAS: Dynamically determine input fields from a dataset - sas

I am trying to parse a delimited dataset with over 300 fields. Instead of listing all the input fields like
data test;
infile "delimited_filename.txt"
DSD delimiter="|" lrecl=32767 STOPOVER;
input field_A:$200.
field_B :$200.
field_C:$200.
/*continues on */
;
I am thinking I can dump all the field names into a file, read in as a sas dataset, and populate the input fields - this also gives me the dynamic control if any of the field names changes (add/remove) in the dataset. What would be some good ways to accomplish this?
Thank you very much - I just started sas, still trying to wrap my head around it.

This worked for me - Basically "write" data open code using macro language and run it.
Note: my indata_header_file contains 5 columns: Variable_Name, Variable_Length, Variable_Type, Variable_Label, and Notes.
%macro ReadDsFromFile(filename_to_process, indata_header_file, out_dsname);
%local filename_to_process indata_header_file out_dsname;
/* This macro var contain code to read data file*/
%local read_code input_in_line;
%put *** Processing file: &filename_to_process ...;
/* Read in the header file */
proc import OUT = ds_header
DATAFILE = &indata_header_file.
DBMS = EXCEL REPLACE; /* REPLACE flag */
SHEET = "Names";
GETNAMES = YES;
MIXED = NO;
SCANTEXT = YES;
run;
%let id = %sysfunc(open(ds_header));
%let NOBS = %sysfunc(attrn(&id.,NOBS));
%syscall set(id);
/*
Generates:
data &out_dsname.;
infile "&filename_to_process."
DSD delimiter="|" lrecl=32767 STOPOVER FIRSTOBS=3;
input
'7C'x
*/
%let read_code = data &out_dsname. %str(;)
infile &filename_to_process.
DSD delimiter=%str("|") lrecl=32767 STOPOVER %str(;)
input ;
/*
Generates:
<field_name> : $<field_length>;
*/
%do i = 1 %to &NObs;
%let rc = %sysfunc(fetchobs(&id., &i));
%let VAR_NAME = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Name)) ));
%let VAR_LENGTH = %sysfunc(getvarn(&id., %sysfunc(varnum(&id., Variable_Length)) ));
%let VAR_TYPE = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Type)) ));
%let VAR_LABEL = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Label)) ));
%let VAR_NOTES = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Notes)) ));
%if %upcase(%trim(&VAR_TYPE.)) eq CHAR %then
%let input_in_line = &VAR_NAME :$&VAR_LENGTH..;
%else
%let input_in_line = &VAR_NAME :&VAR_LENGTH.;
/* append in_line statment to main macro var*/
%let read_code = &read_code. &input_in_line. ;
%end;
/* Close the fid */
%let rc = %sysfunc(close(&id));
%let read_code = &read_code. %str(;) run %str(;) ;
/* Run the generated code*/
&read_code.
%mend ReadDsFromFile;

Sounds like you want to generate code based on metadata. A data step is actually a lot easier to code and debug than a macro.
Let's assume you have metadata that describes the input data. For example let's use the metadata about the SASHELP.CARS. We can build our metadata from the existing DICTIONARY.COLUMNS metadata on the existing dataset. Let's set the INFORMAT to the FORMAT since that table does not have INFORMAT value assigned.
proc sql noprint ;
create table varlist as
select memname,varnum,name,type,length,format,format as informat,label
from dictionary.columns
where libname='SASHELP' and memname='CARS'
;
quit;
Now let's make a sample text file with the data in it.
filename mydata temp;
data _null_;
set sashelp.cars ;
file mydata dsd ;
put (_all_) (:);
run;
Now we just need to use the metadata to write a program that could read that data. All we really need to do is define the variables and then add a simple INPUT firstvar -- lastvar statement to read the data.
filename code temp;
data _null_;
set varlist end=eof ;
by varnum ;
file code ;
if _n_=1 then do ;
firstvar=name ;
retain firstvar ;
put 'data ' memname ';'
/ ' infile mydata dsd truncover lrecl=1000000;'
;
end;
put ' attrib ' name 'length=' #;
if type = 'char' then put '$'# ;
put length ;
if informat ne ' ' then put #10 informat= ;
if format ne ' ' then put #10 format= ;
if label ne ' ' then put #10 label= :$quote. ;
put ' ;' ;
if eof then do ;
put ' input ' firstvar '-- ' name ';' ;
put 'run;' ;
end;
run;
Now we can just run the generated code using %INCLUDE.
%include code / source2 ;

Related

Sas: How to use macro for infile multiple txt?

I need the macro to have one parameter which is the path of the text files.
This is what I have now. I have 6 txt with name= NY2012.txt NY2013.txt NY2014.txt.....etc.
%macro data ;
%let list= 2012 2013 2014 2015 2016 2017;
data allData;
delete; (what should I put here?)
run;
%do i=1 %to 6;
%let currItem = %scan(&list, &i);
filename f&list "/folders/myfolders/NY&list.txt"; (should this be here?)
data currentData;
x = &currItem;
infile f&list truncover;
input value $ 1-20;
retain year;
if _n_=1 then year=year_1;
run;
* Keeping adding things to the cumulative data set;
data allData;
set allData currentData;
run;
%end;
%mend data;
%data;
Then I should end up with a dataset for each year and one large dataset that included all the years. How should I fix this? Thank you.
You should make loop by elements in macro variable and work with scanned element of list but not with list of elements inside the loop. So the code will look like:
%let list=2017 2018 2019;
%macro data(tlist) ; %macro d; %mend d;
%do i=1 %to %sysfunc(countw(&tlist,%str( )));
%let currItem = %scan(&tlist, &i, %str( ));
filename f&currItem. "/folders/myfolders/NY&currItem..txt";
data currentData&currItem;
x = &currItem;
infile f&currItem truncover;
input value $ 1-20;
retain year;
if _n_=1 then year=year_1;
run;
* Keeping adding things to the cumulative data set;
data allData;
set allData currentData&currItem;
run;
filename f&currItem. clear;
%end;
%mend data;
%data;
%data(&list);
A simple wildcard on the infile will suffice.
In this case ???? means any 4 characters. You could also use NY*.txt.
data allData ;
length _f f $256. ; /* temporary & permanent variables to hold the filename being read */
infile "/folders/myfolders/NY????.txt" truncover filename=_f ;
input value $ 1-20 ;
f = _f ;
/* derive the year from the filename */
/* compress(var,,'kd') means Keep Digits */
year = input(compress(scan(f,-1,'/'),,'kd'),8.) ;
run ;

Concatenating all variables in an observation in SAS

Is there a general purpose way of concatenating each variable in an observation into one larger variable whilst preserving the format of numeric/currency fields in terms of how it looks when you do a proc print on the dataset. (see sashelp.shoes for example)
Here is some code you can run, as you can see when looking at the log, using the catx function to produce a comma separated output removes both the $ currency sign as well as the period from the numeric variables
proc print data=sashelp.shoes (obs=10);
run;
proc sql;
select name into :varstr2 separated by ','
from dictionary.columns
where libname = "SASHELP" and
memname = "SHOES";
quit;
data stuff();
format all $5000.;
set sashelp.shoes ;
all = catx(',',&varstr2.) ;
put all;
run;
Any solution needs to be general purpose as it will run on disparate datasets with differently formatted variables.
You can manually loop over PDV variables of the data set, concatenating each formatted value retrieved with vvaluex. A hash can be used to track which variables of the data set to process. If you are comma separating values you will probably want to double quote formatted values that contain a comma.
data want;
set sashelp.cars indsname=_data;
if _n_ = 1 then do;
declare hash vars();
length _varnum 8 _varname $32;
vars.defineKey('_n_');
vars.defineData('_varname');
vars.defineDone();
_dsid = open(_data);
do _n_ = 1 to attrn(_dsid,'NVAR');
rc = vars.add(key:_n_,data:varname(_dsid,_n_));
end;
_dsid = close(_dsid);
call missing (of _:);
end;
format weight comma7.;
length allcat $32000 _vvx $32000;
do _n_ = 1 to vars.NUM_ITEMS;
vars.find();
_vvx = strip(vvaluex(_varname));
if index(_vvx,",") then _vvx = quote(strip(_vvx));
if _n_ = 1
then allcat = _vvx;
else allcat = cats(allcat,',',_vvx);
end;
drop _:;
run;
You can use import and export to csv file:
filename tem temp;
proc export data=sashelp.SHOES file=tem dbms=csv replace;
run;
data l;
length all $ 200;
infile tem truncover firstobs=2;
input all 1-200;
run;
P.S.
If you need concatenate only char, uou can create array of all CHARACTER columns in dataset, and just iterate thru:
data l;
length all $ 5000;
set sashelp.SHOES;
array ch [*] _CHARACTER_;
do i = 1 to dim(ch);
all=catx(',',all,ch[i]);
end;
run;
The PUT statement is the easiest way to do that. You don't need to know the variables names as you can use the _all_ variable list.
put (_all_) (+0);
It will honor the formats attached the variables and if you have used DSD option on the FILE statement then the result is a delimited list.
What is the ultimate goal of this exercise? If you want to create a file you can just write the file directly.
data _null_;
set sashelp.shoes(obs=3);
file 'myfile.csv' dsd ;
put (_all_) (+0);
run;
If you really do want to get that string into a dataset variable there is no need to invent some new function. Just take advantage of the PUT statements abilities by creating a file and then reading the lines from the file.
filename junk temp;
data _null_;
set sashelp.shoes(obs=3);
file junk dsd ;
put (_all_) (+0);
run;
data stuff ;
set sashelp.shoes(obs=3);
infile junk truncover ;
input all $5000.;
run;
You can even do it without creating the full text file. Instead just write one line at a time and save the line into a variable using the _FILE_ automatic variable.
filename junk temp;
data stuff;
set sashelp.shoes(obs=3);
file junk dsd lrecl=5000 ;
length all $5000;
put #1 (_all_) (+0) +(-2) ' ' #;
all = _file_;
output;
all=' ';
put #1 all $5000. #;
run;
Solution with vvalue and concat function (||):
It is similar with 'solution without catx' (the last one), but it is simplified by vvalue function instead put.
/*edit sashelp.shoes with missing values in Product as test-cases*/
proc sql noprint;
create table wocatx as
select * from SASHELP.SHOES;
update wocatx
set Product = '';
quit;
/*Macro variable for concat function (||)*/
proc sql;
select ('strip(vvalue('|| strip(name) ||'))') into :varstr4 separated by "|| ',' ||"
from dictionary.columns
where libname = "WORK" and
memname = "WOCATX";
quit;
/*Data step to concat all variables*/
data stuff2;
format all $5000.;
set work.wocatx ;
all = &varstr4. ;
put all;
run;
Solution with catx:
proc print data=SASHELP.SHOES;
run;
proc sql;
select ifc(strip(format) is missing,strip(name),ifc(type='num','put('|| strip(name) ||','|| strip(format) ||')','input('|| strip(name) ||','|| strip(format) ||')')) into :varstr2 separated by ','
from dictionary.columns
where libname = "SASHELP" and
memname = "SHOES";
quit;
data stuff();
format all $5000.;
set sashelp.shoes ;
all = catx(',',&varstr2.) ;
put all;
run;
If there isn't in dictionary.columns format, then in macro variable varstr2 will just name, if there is format, then when it would call in catx it will convert in format, that you need, for example,if variable is num type then put(Sales,DOLLAR12.), or if it char type then input function . You could add any conditions in select into if you need.
If there is no need of using of input function just change select:
ifc(strip(format) is missing,strip(name),'put('|| strip(name) ||','|| strip(format) ||')')
Solution without catx:
/*edit sashelp.shoes with missing values in Product as test-cases*/
proc sql noprint;
create table wocatx as
select * from SASHELP.SHOES;
update wocatx
set Product = '';
quit;
/*Macro variable for catx*/
proc sql;
select ifc(strip(format) is missing,strip(name),ifc(type='num','put('|| strip(name) ||','|| strip(format) ||')','input('|| strip(name) ||','|| strip(format) ||')')) into :varstr2 separated by ','
from dictionary.columns
where libname = "WORK" and
memname = "WOCATX";
quit;
/*data step with catx*/
data stuff;
format all $5000.;
set work.wocatx ;
all = catx(',',&varstr2.) ;
put all;
run;
/*Macro variable for concat function (||)*/
proc sql;
select ifc(strip(format) is missing,
'strip(' || strip(name) || ')',
'strip(put('|| strip(name) ||','|| strip(format) ||'))') into :varstr3 separated by "|| ',' ||"
from dictionary.columns
where libname = "WORK" and
memname = "WOCATX";
quit;
/*Data step without catx*/
data stuff1;
format all $5000.;
set work.wocatx ;
all = &varstr3. ;
put all;
run;
Result with catx and missing values:
Result without catx and with missing values:

PROC SQL within SAS Macro to list all variables of a data set - SELECT Statement causing error

I was trying to create a macro to output a list of all variables of a specific data set. In my macro, I am using PROC SQL. The code runs OK outside %macro, but error message saying the SELECT statement is not valid when it is being used within %MACRO
here is an example:
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
%put &vlist;
the above works perfectly;
but
%macro getvars(dsn);
%local vlist;
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
&vlist;
%mend;
the above doesn't work when I tried to do:
%let var_list = %getvars(dataset);
it returns:
ERROR 180-322: Statement is not valid or it is used out of proper order.
underlining the SELECT statement within the PROC SQL
SAS macros are not like functions in most programming languages: they don't return values, they are actually replaced by the content of the macro.
The solution is to make your macro variable global, outside the macro. Then you don't need to assign it to a new macro variable with %let.
%global vlist;
%macro getvars(dsn);
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
%mend;
%getvars(work.class)
%put &=vlist;
[EDIT]
and then just use the list in your keep statement
data OUT (keep= &vlist. VAR_B1);
merge DATA_A (in=a) DATA_B (in=b) ;
run;
Seems like the only viable option for my use case is from the following SAS paper, under the section of "USING A MACRO LOOP"
https://support.sas.com/resources/papers/proceedings/proceedings/sugi30/028-30.pdf
To clarify, my use case need a direct output of the list itself, not a macro variable.
e.g.
data OUT (keep= %getvars(DATA_A) VAR_B1);
merge DATA_A (in=a)
DATA_B (in=b)
;
run;
The PROC SQL won't work for me. So I think I need to move over to SAS I/O Functions in Macro Loop.
Below is from the SAS Paper:
%Macro GetVars(Dset) ;
%Local VarList ;
/* open dataset */
%Let FID = %SysFunc(Open(&Dset)) ;
/* If accessable, process contents of dataset */
%If &FID %Then %Do ;
%Do I=1 %To %SysFunc(ATTRN(&FID,NVARS)) ;
%Let VarList= &VarList %SysFunc(VarName(&FID,&I));
%End ;
/* close dataset when complete */
%Let FID = %SysFunc(Close(&FID)) ;
%End ;
&VarList
%Mend ;
A macro using %SYSFUNC(DOSUBL( can run any amount of SAS code (in a separate stream) when invoked at source code parse-time.
Example:
data have_A;
do index = 1 to 10;
x = index ** 2; y = x-1; z = x+1; p = x/2; q = sqrt(x); output;
end;
run;
data have_B(keep=B1);
do index = 1 to 10;
B1 + index; output;
end;
run;
%macro getvars(data);
%local rc lib mem names;
%let rc = %sysfunc(DOSUBL(%nrstr(
%let syslast = &data;
%let lib = %scan (&SYSLAST,1,.);
%let mem = %scan (&SYSLAST,2,.);
proc sql noprint;
select name into :names separated by ' ' from
dictionary.columns where
libname = "&lib." and
memname = "&mem."
;
quit;
)));
/* Emit variable name list */
&names.
%mend;
data OUT (keep=%getvars(HAVE_A) B1);
merge HAVE_A (in=a) /* 1:1 merge (no BY) */
HAVE_B (in=b)
;
run;
%let var_list = %getvars(dataset);
will resolve to:
%let var_list = proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("dataset");
quit;
So it will store "proc SQL noprint" in var_list, and then fail because you use sql satements outside of proc sql.

Import all columns from CSV as character?

Simple question.
PROC IMPORT OUT= braw.address
DATAFILE= "&path.\address_data.csv"
DBMS=csv REPLACE;
GETNAMES=YES;
RUN;
This statement will create the dataset columns as character or numeric depending on the values, which is smart, but not what I want.
I want to import them all as character, to make for easier regex evaluation.
Is there a simple way to do this?
I would generally just write my own input statement for the CSV, then you can make them whatever you want.
IE:
data braw.address;
infile "&path.\address_data.csv" dlm=',' dsd missover;
input
field1 $
field2 $
....
;
run;
You can use the log from the PROC IMPORT to generate this the first time and just edit it to contain $ for each variable.
If you do not want to write a SAS macro to read all the columns as character, you could try a "cheat". Manually edit the file and duplicate the first row (the one containing column headers. Since those will most likely all be character strings, SAS should import all the columns as character.
Of course, a macro to do this would not be that difficult. You can try something like this:
%macro readme(dsn,fn);
/* Macro to read all columns of a CSV as character */
/* Parameters: */
/* DSN - The name of the SAS data set to create */
/* FN - The external file to read (quoted) */
/* Example: */
/* %readme(want, 'c:\temp\tempfile.csv'); */
data _null_;
infile &fn;
input;
i = 1;
length headers inputstr $200;
headers = compress(_infile_,"'");
newvar = scan(headers,1,',');
do until (newvar = ' ');
inputstr = trim(inputstr) || ' ' || trim(newvar) || ' $';
i + 1;
newvar = scan(headers,i,',');
end;
call symput('inputstr',inputstr);
stop;
run;
data &dsn;
infile &fn firstobs=2 dsd dlm=',' truncover;
input &inputstr.;
run;
%mend;
%readme(want, 'c:\temp\tempfile.csv');
Here is my macro to read dlm file with all vars as char:
%MACRO ImportText(file,dsn,dlm);
* Read data use proc import to get variable name and length;
PROC IMPORT DATAFILE="&file" OUT=temp DBMS=dlm REPLACE;
DELIMITER = &dlm;
GETNAMES = YES;
GUESSINGROWS = 32767;
RUN;
* Put variable names into macro variable;
PROC CONTENTS DATA=temp out=vars NOPRINT; RUN;
PROC SQL NOPRINT;
SELECT CATT(name,' : $',length,'.') INTO :vars SEPARATED BY ' ' FROM vars ORDER BY varnum;
QUIT;
* Read real data;
DATA &dsn;
INFILE "&file" DELIMITER=&dlm MISSOVER DSD FIRSTOBS=2 LRECL=32767;
INPUT &vars;
RUN;
%MEND;

Outputting to a dataset while in a macro do loop

Here's a macro I'm writing, I want to look at a given data set, and write of each of the field(column) names as an observation into an info data set.
Here's what I've got
%macro go_macro(data);
/*get the list of field names from the dictionary*/
proc sql noprint;
select distinct name into :columns separated by ' '
from dictionary.columns
where memname = "%upcase(%scan(&data, -1))" and libname = "%upcase(%scan(&data, 1))" and type = "char"
;
quit;
/*now loop through this list of fieldnames*/
%let var_no = 1;
%let var = %scan(&columns, &var_no, ' ');
%do %while (&var ne);
/*here is where I want to write the fieldname to an observation*/
fname = "&var";
output;
%let var_no = %eval(&var_no +1);
%let var = %scan(&columns, &var_no, ' ');
%end;
%mend;
/*execute the code*/
data bdqa.accounts_info;
%go_macro(braw.accounts)
run;
this gives me
[MPRINT] Parsing Base DataServer
/* 0005 */ fname = "SORT_CODE";
/* 0006 */ output;
/* 0009 */ fname = "BANK_NAME";
/* 0010 */ output;
/* 0013 */ fname = "CREDIT_CARD";
/* 0014 */ output;
/* 0017 */ fname = "PRIMARY_ACCT_HOLDER";
/* 0018 */ output;
/* 0021 */ fname = "account_close_date";
/* 0022 */ output;
/* 0023 */ run;
ERROR: Parsing exception - aborting
ERROR: DS-00274 : Could not parse base DataServer code: Encountered " <ALPHANUM> "fname "" at line 5, column 9.
Was expecting one of:
<EOF>
";" ...
"*" ...
"data" ...
"proc" ...
(and 9 more)
while
while
data mytest;
do i = 1 to 5;
fname = 'hello world';
output;
end;
keep fname;
run;
is perfectly legit.
the following code
%macro char_freqs(data=);
/*get the different variables*/
proc sql noprint;
select distinct name into :columns separated by ' '
from dictionary.columns
where memname = "%upcase(%scan(&data, -1))" and libname = "%upcase(%scan(&data, 1))" and type = "char"
;
quit;
/*now get the distinct values for each of the variables*/
%let var_no = 1;
%let var = %scan(&columns, &var_no, ' ');
%do %while (&var ne);
proc freq data=&data;
tables &var/out=&var._freq;
run;
%let var_no = %eval(&var_no +1);
%let var = %scan(&columns, &var_no, ' ');
%end;
%mend;
%char_freqs(data=macros.businesses)
Also works - the PROC FREQ is allowed.
The problem is you have something like this:
data bdqa.accounts_info;
%go_macro(braw.accounts)
run;
->
data bdqa.accounts_info;
proc sql;
... select stuff ...;
quit;
fname = "stuff"; ...
run;
You need:
proc sql;
select stuff;
quit;
data bdqa.accounts_info;
fname = "stuff";
...
run;
You need to remove the PROC SQL from the macro - you can create a macro variable outside of a macro. Honestly you shouldn't use a macro for this at all - you can do one of two things:
a) Create the table directly from DICTIONARY.COLUMNS
proc sql;
create table bdqa.accounts_info as select name as fname from dictionary.columns where ... ;
quit;
b) Create it in a datastep
data bdqa.accounts_info;
__data = "&var_no";
do ... ;
run;
(the do loop is basically identical to the %do loop in the macro)