Create a sequence of new column names - sas

I have a hundred or so columns which I would like to rename in SAS using the following macro:
%macro rename1(oldvarlist, newvarlist);
%let k=1;
%let old = %scan(&oldvarlist, &k);
%let new = %scan(&newvarlist, &k);
%do %while(("&old" NE "") & ("&new" NE ""));
rename &old = &new;
%let k = %eval(&k + 1);
%let old = %scan(&oldvarlist, &k);
%let new = %scan(&newvarlist, &k);
%end;
%mend;
The columns are currently named C5, C7, C9, ..., C205 and I would like to rename them AR_0, AR_1, ..., AR100.
With the macro above, how can I put these new names after the comma of the following code without writing each and every one of them?
%rename1(C5--C205, # new names here #);

This is a bit of a longer solution, but it's fairly dynamic and you easy to see how things work. I'm assuming you'll use the rename statement in proc datasets. Otherwise you could just be lazy and use arrays to replace then drop the old variables, though that isn't efficient.
proc sql;
create table oldvar as
select name, varnum
from sashelp.vcolumn
where upcase(libname)='SASHELP'
and upcase(memname)='CLASS'
order by varnum;
quit;
data rename;
set oldvar;
new_var=catx("_", "AR",varnum);
run;
proc sql noprint;
select catx("=", name, new_var) into :rename_list
separated by " "
from rename;
quit;
%put rename &rename_list;
proc datasets library=work;
modify my_dataset;
rename &rename_list;
run;quit;

This will first find the old columns and rename them to AR_# and create macrovariable varlist that you can use:
proc sql noprint;
create table newvar as
select name
from sashelp.vcolumn
where libname="SASHELP" and memname="CLASS"
order by name;
quit;
data newvar;
set newvar;
name=compress("AR_"!!put(_n_,4.));
run;
proc sql noprint;
select name into :varlist separated by " "
from newvar;
quit;

Probably, something like this would do the job
%macro rename2(oldvarlist, newPrefix);
%let k=1;
%let old = %scan(&oldvarlist, &k);
%do %while(("&old" NE ""));
rename &old = &newPrefix.&k.;
%let k = %eval(&k + 1);
%let old = %scan(&oldvarlist, &k);
%end;
%mend;

Related

PROC SQL within SAS Macro to list all variables of a data set - SELECT Statement causing error

I was trying to create a macro to output a list of all variables of a specific data set. In my macro, I am using PROC SQL. The code runs OK outside %macro, but error message saying the SELECT statement is not valid when it is being used within %MACRO
here is an example:
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
%put &vlist;
the above works perfectly;
but
%macro getvars(dsn);
%local vlist;
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
&vlist;
%mend;
the above doesn't work when I tried to do:
%let var_list = %getvars(dataset);
it returns:
ERROR 180-322: Statement is not valid or it is used out of proper order.
underlining the SELECT statement within the PROC SQL
SAS macros are not like functions in most programming languages: they don't return values, they are actually replaced by the content of the macro.
The solution is to make your macro variable global, outside the macro. Then you don't need to assign it to a new macro variable with %let.
%global vlist;
%macro getvars(dsn);
proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("&dsn");
quit;
%mend;
%getvars(work.class)
%put &=vlist;
[EDIT]
and then just use the list in your keep statement
data OUT (keep= &vlist. VAR_B1);
merge DATA_A (in=a) DATA_B (in=b) ;
run;
Seems like the only viable option for my use case is from the following SAS paper, under the section of "USING A MACRO LOOP"
https://support.sas.com/resources/papers/proceedings/proceedings/sugi30/028-30.pdf
To clarify, my use case need a direct output of the list itself, not a macro variable.
e.g.
data OUT (keep= %getvars(DATA_A) VAR_B1);
merge DATA_A (in=a)
DATA_B (in=b)
;
run;
The PROC SQL won't work for me. So I think I need to move over to SAS I/O Functions in Macro Loop.
Below is from the SAS Paper:
%Macro GetVars(Dset) ;
%Local VarList ;
/* open dataset */
%Let FID = %SysFunc(Open(&Dset)) ;
/* If accessable, process contents of dataset */
%If &FID %Then %Do ;
%Do I=1 %To %SysFunc(ATTRN(&FID,NVARS)) ;
%Let VarList= &VarList %SysFunc(VarName(&FID,&I));
%End ;
/* close dataset when complete */
%Let FID = %SysFunc(Close(&FID)) ;
%End ;
&VarList
%Mend ;
A macro using %SYSFUNC(DOSUBL( can run any amount of SAS code (in a separate stream) when invoked at source code parse-time.
Example:
data have_A;
do index = 1 to 10;
x = index ** 2; y = x-1; z = x+1; p = x/2; q = sqrt(x); output;
end;
run;
data have_B(keep=B1);
do index = 1 to 10;
B1 + index; output;
end;
run;
%macro getvars(data);
%local rc lib mem names;
%let rc = %sysfunc(DOSUBL(%nrstr(
%let syslast = &data;
%let lib = %scan (&SYSLAST,1,.);
%let mem = %scan (&SYSLAST,2,.);
proc sql noprint;
select name into :names separated by ' ' from
dictionary.columns where
libname = "&lib." and
memname = "&mem."
;
quit;
)));
/* Emit variable name list */
&names.
%mend;
data OUT (keep=%getvars(HAVE_A) B1);
merge HAVE_A (in=a) /* 1:1 merge (no BY) */
HAVE_B (in=b)
;
run;
%let var_list = %getvars(dataset);
will resolve to:
%let var_list = proc sql noprint;
select name into :vlist separated by ' '
from dictionary.columns
where memname = upcase("dataset");
quit;
So it will store "proc SQL noprint" in var_list, and then fail because you use sql satements outside of proc sql.

dynamically exclude column names in proc sql macro

I have a proc sql statement in a macro function that selects column names from dictionary.columns. I would like to exclude column names based on multiple string patterns that are passed a arguments - see below
%symdel keepnames;
%macro test(data=, col=);
%global keepnames;
%let data_lib = %sysfunc(upcase(%sysfunc(scan("&data", 1, "."))));
%let data_data = %sysfunc(upcase(%sysfunc(scan("&data", 2, "."))));
%put &data_lib;
%put &data_data;
proc sql noprint;
select name into :keepnames separated by " "
from dictionary.columns
where libname = "&data_lib" and
memname = "&data_data" and
upcase(name) not like upcase("&col.");
quit;
%mend test;
%test(data=sashelp.cars, col=mpg w)
%put &keepnames;
Ideally, the col argument would turn into %mpg%, %w% thereby excluding any column names with mpg or w in their name.
There are a couple issues I'm encountering. First, I can't quite figure out how to hide the % from the macro processor. I tried using %str() in several ways but without luck. Second, I can't easily add % symbols around the words in the col argument. Any help is appreciated!
Change the macro parameter name to be something better informing, for example
%macro fetch_names (data=, dropPattern=, resultVar=fetchedNames)
...
%mend;
Consider passing a regular expression instead of a space separated list of values that would have to be iterated over.
%let fetchedNames = ;
%fetch_names (
data = sashelp.cars
, dropPattern = mpg|w /* <------- regular expression pattern */
, resultVar = fetchedNames
)
The innard of the macro would be similar.
change into :keepnames to into :&resultVar
change upcase(name) not like upcase("&col.") to not prxmatch("/&dropPattern./i", name)
I made use of the contains operator in the following and looped through the arguments in col to generate separate tests for each exclusion. For some reason I don't have cars so I used class:
%symdel keepnames;
%macro test(data=, col=);
%global keepnames;
%let data_lib = %sysfunc(upcase(%sysfunc(scan("&data", 1, "."))));
%let data_data = %sysfunc(upcase(%sysfunc(scan("&data", 2, "."))));
%put &data_lib;
%put &data_data;
%let i = 1;
%let exclusion = %scan(&col,&i);
proc sql noprint;
select name into :keepnames separated by " "
from dictionary.columns
where libname = "&data_lib" and
memname = "&data_data"
%do %while(&exclusion ne );
and upcase(name) not contains upcase("&exclusion")
%let i = %eval(&i + 1);
%let exclusion = %scan(&col,&i);
%end;
;
quit;
%mend test;
option mprint;
%test(data=sashelp.class, col=ame x)
%put &keepnames;
%test(data=sashelp.class)
%put &keepnames;
%test(data=sashelp.class, col=e)
%put &keepnames;

Create dummy using category variables more efficiently in SAS

I have data on with state variation in U.S. Now i want to creat many dummies to control state fix effect. In stata it's an easy work while in sas it seems I have to create all dummies manually.However logit regression with fix effects runs quite slow in stata. I wonder whether there's a more efficient way to create dummy from char variables(not numerical, which I know a few methods to apply) in sas since I have too many char variables need to be created as dummies.
Cheers,
Eva
proc logistic supports the class statement. Place your variables in the class statement and you can specify the type of parameterization you'd like as well. The most common method is referential coding.
proc logistic data=sashelp.heart;
class sex bp_status/param=ref;
model status = sex ageAtStart height weight bp_status;
run;
https://support.sas.com/documentation/cdl/en/statug/63347/HTML/default/viewer.htm#statug_logistic_sect006.htm
Not all procs support the class statement, in those cases you can use proc glmmod or a variety of other method to create your dummy variables.
http://blogs.sas.com/content/iml/2016/02/22/create-dummy-variables-in-sas.html
If you absolutely need to manually create dummy variables you can use a macro like this one. You would need to call it for each variable.
%macro create_dummy(dataset=, var=);
%* Save Distinct Values and Dummy Variable Names;
proc sql noprint;
select distinct
&var,
tranwrd(tranwrd(trim(&var), " ", "_"), ".", "")
into
:value1-,
:name1-
from
&dataset
;
select
count(distinct(&var))
into
:total
from
&dataset
;
quit;
%* Create Dummy Variables;
data &dataset;
set &dataset;
%do i=1 %to &total;
if &var = "&&value&i" then &&name&i = 1; else &&name&i = 0;
%end;
run;
%mend create_dummy;
You can add a loop to the Macro if you want to call the Macro only once. Add a do loop to the top like:
%macro create_dummy(dataset=, var=);
%do l %to %sysfunc(countw(&var));
%let var1 = %scan(&var, &l);
%* Save Distinct Values and Dummy Variable Names;
proc sql noprint;
select distinct
&var1,
tranwrd(tranwrd(trim(&var1), " ", "_"), ".", "")
into
:value1-,
:name1-
from
&dataset
;
select
count(distinct(&var1))
into
:total
from
&dataset
;
quit;
%* Create Dummy Variables;
data &dataset;
set &dataset;
%do i=1 %to &total;
if &var1 = "&&value&i" then &&name&i = 1; else &&name&i = 0;
%end;
run;
%end;
%mend create_dummy;

sas how to use a list to store count distinct of all the variables in a table

I want to store the count distinct of each variable from a table in another. I wanted to use a loop for it, over the list of the variables. So first, I stored the variables names in "vars", doing this:
proc sql ;
select name
into :vars separated by ' '
from dictionary.columns
where libname eq 'HW' and
memname eq "ORDERS";
quit;
Then, I created another list with the result of the count distinct with the following code:
%macro g();
%let b=;
%do i = 1 %to 3;
%let a=%scan(&vars,&i);
proc sql;
select count(distinct &a)
into :gaby from hw.ORDERS;
quit;
%let b=&b &gaby;
%end;
%put &b;
%mend g;
%g();
After this, I wanted to add both to a table, but I can add the vars variable but not the b variable.
data a;
call symput('lista', symget('vars'));
call symput('lista1', symget('b'));
do i=1 to 3;
timept=i;
variable=scan("&vars",i);
dist=scan("&b",i);
output;
end;
run;
The table shows correctly the name of the variables but instead of showing the count distinct (that were stored in b) shows the letter "b".
Is there a way to perform this? also, is there a way to perform it easily?
Thanks!!!!!!!!!!
You're pretty close. I would just use a single SQL pass and create an output table directly. If you want it in a column form, then use PROC TRANSPOSE.
proc sql noprint;
select name
into :vars separated by ' '
from dictionary.columns
where libname eq 'SASHELP' and
memname eq "SHOES";
quit;
%put &vars;
%macro create_table();
proc sql noprint;
%local i n var;
%let n = %sysfunc(countw(&vars));
create table output as
select
%do i=1 %to %eval(&n-1);
%let var = %scan(&vars,&i);
count(distinct &var) as &var,
%end;
%let var = %scan(&vars,&n);
count(distinct &var) as &var
from sashelp.shoes;
quit;
%mend;
%create_table;
proc transpose data=output out=want(rename=(_NAME_=variable COL1=Dist));
run;

How to loop through a macro variable in SAS

I have an example like this:
proc sql;
select dealno into :deal_no
from deal_table;
Now I want to traverse the variable deal_no now containing all dealno in table deal_table but I don't know how to do it.
Another option is add 'separated by' to the sql code, which will add a delimiter to the values. You can then use the SCAN function in a data step or %SCAN in a macro to loop through the values and perform whatever task you want. Example below.
proc sql noprint;
select age into :age separated by ','
from sashelp.class;
quit;
%put &age.;
data test;
do i=1 by 1 while(scan("&age.",i) ne '');
age=scan("&age.",i);
output;
end;
drop i;
run;
If you do
%put &deal_no;
you can see that it only contains the first value of dealno, not all of them.
To avoid that you can do something like this:
proc sql;
create table counter as select dealno from deal_table;
select dealno into :deal_no_1 - :deal_no_&sqlobs
from deal_table;
quit;
%let N = &sqlobs;
%macro loop;
%do i = 1 %to &N;
%put &&deal_no_&i;
%end;
%mend;
%loop; run;
Here's another solution.
proc sql noprint;
select age into :ageVals separated by ' '
from ageData;
quit;
%put &ageVals;
%macro loopAgeVals; %let i = 1; %let ageVal = %scan(&ageVals, &i);
%do %while("&ageVal" ~= "");
%put &ageVal;
%let i = %eval(&i + 1);
%let ageVal = %scan(&ageVals, &i);
%end;
%mend;
%loopAgeVals;