How to parse text from large html in SAS - sas

I was looking everywhere online for a good parsing code, however all the example are very trivial. The following PERL expression works fine only for 5 bytes.
rx1=prxparse("s/<.*?>//");
My table contains a text filed with the strings something like this
<meta name="generator" content="HTML Tidy, see www.w3.org" /> Test
<table style="WIDTH: 360.0pt;BORDER-COLLAPSE: collapse;" border="0"
cellspacing="0" cellpadding="0" width="480"> <tr style="HEIGHT: 15.0pt;">
<td style="BORDER-BOTTOM: rgb(236,233,216);BORDER-LEFT: rgb(236,233,216);
BACKGROUND-COLOR: transparent;WIDTH: 360.0pt;HEIGHT: 15.0pt; " width="480">
So it contains <table> <tr <td . . . and other complex html structures. How to parse this kind of html into plain text ?

What #Joe says in the comments is true but unfortunately it doesn't negate that fact that people often need to solve this kind of problem. Below are some macros that I use when I need to extract certain values out of XML/HTML. It's not perfect but it's gotten the job done for everything I've needed.
The major limitation of the below macros is that they require the HTML/XML they are parsing to exist in a single field in SAS. The size limitation of a single field in SAS is 32767 chars, which means that if your HTML file is bigger than that then you will need to take just the subset of it that you need to work with.
Examples are included and the best way to figure out how it works is just to run the examples.
/*****************************************************************************
** PROGRAM: PRXEXTRACT.SAS
**
** SEARCHES THROUGH AN XML (OR HTML) FILE FOR AN ELEMENT AND EXTRACTS THE
** VALUE BETWEEN AN ELEMENTS TAGS.
**
** PARAMETERS:
** iElement : The element to search through the blob for.
** iField : The fieldname to save the result to.
** iType : (N or C) for Numeric or Character.
** iLength : The length of the field to create.
** iXMLField : The name of the field that contains the XML blob to parse.
** iDelimiterType: (1 or 2). Defaults to 1. 1 USES <> AS DELIMS. 2 USES [].
**
******************************************************************************
** HISTORY:
** 1.0 MODIFIED: 14-FEB-2011 BY:RP
** - CREATED.
** 1.1 MODIFIED: 16-FEB-2011 BY:RP
** - ADDED OPTION TO CHANGE DELIMITERS FROM <> TO []
** 1.1 MODIFIED: 17-FEB-2011 BY:RP
** - CORRECTED ERROR WHEN MATCH RETURNS A LENGTH OF ZERO
** - CORRECTED MISSING AMPERSAND FROM IDELIMITERTYPE CHECK.
** - ADDED ESCAPING QUOTES TO [] DELIMITER TYPE
** - CORRECTED WARNING WHEN MATCH RETURNS MISSING NUMERIC FIELD
** 1.2 MODIFIED: 25-FEB-2011 BY:RP
** - ADDED DELIMITER TYPES TO WORK WITH MASKED HTML CODES
** 1.3 MODIFIED: 11-MAR-2011 BY:RP
** - MODIFIED TO ALLOW FOR OPTIONAL ATTRIBUTES ON THE ELEMENT BEING SEARCHED FOR.
** 1.4 MODIFIED: 14-MAR-2011 BY:RP
** - CORRECTED TO REMOVE FALSE MATCHES FROM PRIOR VERSION. ADDED EXAMPLE.
** 1.5 MODIFIED: 10-APR-2012 BY:RP
** - CORRECTED PROBLEM WITH ZERO LENGTH STRING MATCHES
** 1.6 MODIFIED: 22-MAY-2012 BY:RP
** - ADDED ABILITY TO CAPTURE ATTRIBUTES
*****************************************************************************/
%macro prxExtract(iElement=, iField=, iType=, iLength=, iXMLField=, iDelimiterType=1, iSequence=1, iAttributesField=);
%local delim_open delim_close;
crLf = byte(10) || byte(13);
&iXMLField = compress(&iXMLField,crLf,);
%if &iDelimiterType eq 1 %then %do;
%let delim_open = <;
%let delim_close = >;
%end;
%else %if &iDelimiterType eq 2 %then %do;
%let delim_open = \[;
%let delim_close = \];
%end;
%else %if &iDelimiterType eq 3 %then %do;
%let delim_open = %nrbquote(&)lt%quote(%str(;)) ;
%let delim_close = %nrbquote(&)gt%quote(%str(;)) ;
%end;
%else %do;
%put ERR%str()ROR (prxExtract.sas): You specified an incorrect option for the iDelimiterType parameter.;
%end;
%if %sysfunc(index(&iField,[)) %then %do;
/* DONT DO THIS IF ITS AN ARRAY */
%end;
%else %do;
%if "%upcase(&iType)" eq "N" %then %do;
attrib &iField length=&iLength format=best.;
%end;
%else %do;
attrib &iField length=$&iLength format=$&iLength..;
%end;
%end;
/*
** BREAKDOWN OF REGULAR EXPRESSION (EXAMPLE USES < AND > AS DELIMS AND ANI AS THE ELEMENT BEING LOOKED FOR:
**
** &delim_open&iElement --> FINDS <ANI
** (\s+.*?&delim_close|&delim_close){1}? --> FINDS THE SHORTEST SINGLE INSTANCE OF EITHER:
** - ONE OR MORE SPACES FOLLOWED BY ANYTHING UNTIL A > CHARACTER
** - OR JUST A > CHARACTER
** THE ?: JUST TELLS IT NOT TO CAPTURE WHAT IT FOUND INBETWEEN THE ( AND )
** (.*?) --> FINDS WHAT WE ARE SEARCHING FOR AND CAPTURES IT INTO BUFFER 1.
** &delim_open --> FINDS <
** \/ --> FINDS THE / CHARACTER. THE FIRST SLASH ESCAPES IT SO IT KNOWS ITS NOT A SPECIAL REGEX SLASH
** &iElement&delim_close --> FINDS ANI>
*/
prx_id = prxparse("/&delim_open&iElement((\s+.*?)&delim_close|&delim_close){1}?(.*?)&delim_open\/&iElement&delim_close/i");
prx_start = 1;
prx_stop = length(&iXMLField);
prx_sequence = 0;
call prxnext(prx_id, prx_start, prx_stop, &iXMLField, prx_pos, prx_length);
do while (prx_pos > 0);
prx_sequence = prx_sequence + 1;
if prx_sequence = &iSequence then do;
if prx_length > 0 then do;
call prxposn(prx_id, 3, prx_pos, prx_length);
%if "%upcase(&iType)" eq "N" %then %do;
length prx_tmp_n $200;
prx_tmp_n = substr(&iXMLField, prx_pos, prx_length);
if cats(prx_tmp_n) ne "" then do;
&iField = input(substr(&iXMLField, prx_pos, prx_length), ?best.);
end;
%end;
%else %do;
if prx_length ne 0 then do;
&iField = substr(&iXMLField, prx_pos, prx_length);
end;
else do;
&iField = "";
end;
%end;
**
** ALSO SAVE THE ATTRIBUTES TO A FIELD IF REQUESTED
*;
%if "%upcase(&iAttributesField)" ne "" %then %do;
call prxposn(prx_id, 2, prx_pos, prx_length);
if prx_length ne 0 then do;
&iAttributesField = substr(&iXMLField, prx_pos, prx_length);
end;
else do;
&iAttributesField = "";
end;
%end;
end;
end;
call prxnext(prx_id, prx_start, prx_stop, &iXMLField, prx_pos, prx_length);
end;
drop crLf prx:;
%mend;
Example for a single element:
data example;
xml = "<test><ANI2Digits>00</ANI2Digits><XNI xniattrib=1>7606256091</XNI><ANI>number2</ANI><ANI x=hmm y=yay>number3</ANI></test>"; * NOTE THE XML MUST BE ALL ON ONE LINE;
%prxExtract(iElement=xni, iField=my_xni, iType=c, iLength=15, iXMLField=xml, iSequence=1, iAttributesField=my_xni_attribs);
run;
Example for repeating elements:
data example;
xml = "<test><ANI2Digits>00</ANI2Digits><ANI>7606256091</ANI><ANI>number2</ANI><ANI x=hmm y=yay>number3</ANI></test>"; * NOTE THE XML MUST BE ALL ON ONE LINE;
%prxExtract(iElement=ani2digits, iField=ani2digits, iType=c, iLength=50, iXMLField=xml);
length ani1-ani6 $15;
length attr1-attr6 $100;
array arrani [1:6] $ ani1-ani6;
array arrattr [1:6] $ attr1-attr6;
%prxCount (iElement=ani, iXMLField=xml, iDelimiterType=1);
do cnt=1 to prx_count;
%prxExtract(iElement=ani, iField=arrani[cnt], iType=c, iLength=15, iXMLField=xml, iSequence=cnt, iAttributesField=arrattr[cnt]);
end;
run;
Finally - if you are need the version for multiple elements you will also need the prxcount macro:
/*****************************************************************************
** PROGRAM: MACROS.PRXCOUNT.SAS
**
** RETURNS THE NUMBER OF TIMES AN ELEMENT IS FOUND IN AN HTML/XML FILE.
**
** PARAMETERS:
** iElement : The element to search through the blob for.
** iXMLField : The name of the field that contains the XML blob to parse.
** iDelimiterType: (1/2/3). Defaults to 1. 1 USES <> AS DELIMS. 2 USES [].
** 3 USES ENCODED VALUES FOR <>.
**
******************************************************************************
** HISTORY:
** 1.0 MODIFIED: 25-FEB-2011 BY:RP
** - CREATED.
** 1.1 MODIFIED: 14-MAR-2011 BY:RP
** - MODIFIED TO ALLOW FOR OPTIONAL ATTRIBUTES ON THE ELEMENT BEING SEARCHED FOR.
*****************************************************************************/
%macro prxCount(iElement=, iXMLField=, iDelimiterType=1);
%local delim_open delim_close;
crLf = byte(10) || byte(13);
&iXMLField = compress(&iXMLField,crLf,);
%if &iDelimiterType eq 1 %then %do;
%let delim_open = <;
%let delim_close = >;
%end;
%else %if &iDelimiterType eq 2 %then %do;
%let delim_open = \[;
%let delim_close = \];
%end;
%else %if &iDelimiterType eq 3 %then %do;
%let delim_open = %nrbquote(&)lt%quote(%str(;)) ;
%let delim_close = %nrbquote(&)gt%quote(%str(;)) ;
%end;
%else %do;
%put ERR%str()ROR (prxCount.sas): You specified an incorrect option for the iDelimiterType parameter.;
%end;
prx_id = prxparse("/&delim_open&iElement(\s+.*?&delim_close|&delim_close){1}?(.*?)&delim_open\/&iElement&delim_close/i");
prx_count = 0;
prx_start = 1;
prx_stop = length(&iXMLField);
call prxnext(prx_id, prx_start, prx_stop, &iXMLField, prx_pos, prx_length);
do while (prx_pos > 0);
prx_count = prx_count + 1;
call prxposn(prx_id, 1, prx_pos, prx_length);
call prxnext(prx_id, prx_start, prx_stop, &iXMLField, prx_pos, prx_length);
end;
drop crLf prx_:;
%mend;

Related

SAS: lookup data inside a function/subroutine / return an array

Suppose I like to do something like the following (with exemplary variable names for better readability):
take a parameter InParameter and match it to the variable MyVar1 in a dataset MyData
return all values for the variable MyVar2 for the filtered observations
from a subroutine/function
that i can use inside proc sql/datastep
This is what I got so far (clearly not working):
proc fcmp outlib=work.funcs.MyFunction;
function MyFunction(InParameter $);
array MyArray ... ; /* Here: Create an array with something like SELECT MyVar2 FROM MyData WHERE MyVar1 = Inparameter */
return(MyArray{});
endsub;
;
quit;
options cmplib=work.funcs;
data MyOutput;
set Somedata;
if MyVar2 in MyFunction("H20") then output;
run;
In short:
can data in datasets be accessed from inside a function/subroutine?
can a function/subroutine return an array?
Thanks for your help!
We created a utility macro called %ds2list() that will perform your desired process. It doesn't use an array statement but it achieves the same result.
The macro simply returns values from a dataset in a list format. Here's an example of calling it:
%put %ds2list(iDs=sashelp.class, iField=name, iQuote=1);
This would return:
'Alfred','Alice','Barbara','Carol','Henry','James','Jane','Janet','Jeffrey','John','Joyce','Judy','Louise','Mary','Philip','Robert','Ronald','Thomas','William'
The default behavior for %ds2list() is to comma separate the returned values but it is very flexible. You can change the delimiter to a value of your choice (or no delimiter), you can turn the quotes on or off, or change them from single to double quotes, and you can provide any dataset options you would normally use on a set statement such as a where=() statement.
Additionally because the macro is pure macro code you can use this literally anywhere in SAS. In any proc/data/macro you like. We use it extensively for calls to ODBC passthrough when we have a large list of IDs we want to be returned.
Here's an example of how you could use it. First create a table that will contain values to compare against the list values:
data keep;
input name $;
datalines;
Alfred
Carol
Janet
run;
Iterate over the values we want to check against the list:
data want;
set keep;
if name in (%ds2list(iDs=sashelp.class, iField=name, iQuote=1, iDsOptions=where=(sex='F'))) then do;
output;
end;
run;
Returns:
Obs name
=== =====
1 Carol
2 Janet
You can see Alfred was excluded from the result because he was filtered out by the where=() clause.
Here is the macro, I suggest putting it in your macro autocall library:
/***************************************************************************
** PROGRAM: MACRO.DS2LIST.SAS
**
** UTILITY PROGRAM THAT DETECTS RETURNS A LIST OF FIELD VALUES FROM A
** DATASET IN DELIMITED FORMAT.
**
** PARAMETERS:
** iDs : THE LIBNAME.DATASET NAME THAT YOU WANT TO CHECK.
** iField : THE FIELD THAT CONTAINS THE VALUES YOU WANT RETURNED IN A
** DELIMITED FORMAT.
** iDelimiter: DEFAULT IS A COMMA. THE DELIMITER TO USE FOR THE RETURNED LIST.
** iDsOptions: ANY STANDARD DATASET OPTIONS THAT YOU WOULD LIKE TO APPLY SUCH
** AS A WHERE STATEMENT.
** iQuote : (0=NO,1=YES). DEFAULT=0/NO. DETERMINES WHETHER THE RETURNED
** LIST IS QUOTED OR NOT.
** iQuoteChar: (SINGLE,DOUBLE) DEFAULT=SINGLE. SPECIFIES WHETHER SINGLE
** OR DOUBLE QUOTES ARE USED WHEN QUOTING THE RETURNED LIST
**
*****************************************************************************/
%macro ds2list(iDs=, iField=, iDsOptions=, iDelimiter=%str(,), iQuote=0, iQuoteChar=single);
%local dsid pos rc result cnt quotechar value;
%let result=;
%let cnt=0;
%if &iQuote %then %do;
%if "%upcase(&iQuoteChar)" eq "DOUBLE" %then %do;
%let quotechar = %nrstr(%");
%end;
%else %if "%upcase(&iQuoteChar)" eq "SINGLE" %then %do;
%let quotechar = %nrstr(%');
%end;
%else %do;
%let quotechar = %nrstr(%");
%put WARNING: MACRO.DS2LIST.SAS: PARAMETER IQUOTECHAR INCORRECT. DEFAULTED TO DOUBLE;
%end;
%end;
%else %do;
%let quotechar = ;
%end;
/*
** ENSURE ALL THE REQUIRED PARAMETERS WERE PASSED IN.
*/
%if "&iDs" ne "" and "&iField" ne "" %then %do;
%let dsid=%sysfunc(open(&iDs(&iDsOptions),i));
%if &dsid %then %do;
%let pos=%sysfunc(varnum(&dsid,&iField));
%if &pos %then %do;
%let rc=%sysfunc(fetch(&dsid));
%do %while (&rc eq 0);
%if "%sysfunc(vartype(&dsid,&pos))" = "C" %then %do;
%let value = %qsysfunc(getvarc(&dsid,&pos));
%if "%trim(&value)" ne "" %then %do;
%let value = %qtrim(&value);
%end;
%end;
%else %do;
%let value = %sysfunc(getvarn(&dsid,&pos));
%end;
/* WHITESPACE/CARRIAGE RETURNS REMOVED IN THE BELOW LINE */
/* TO ENSURE NO WHITESPACE IS RETURNED IN THE OUTPUT. */
%if &cnt ne 0 %then %do;%unquote(&iDelimiter)%end;%unquote(&quotechar&value&quotechar.)
%let cnt = %eval(&cnt + 1);
%let rc = %sysfunc(fetch(&dsid));
%end;
%if &rc ne -1 %then %do;
%put WARNING: MACRO.DS2LIST.SAS: %sysfunc(sysmsg());
%end;
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: FIELD &iField NOT FOUND IN DATASET %upcase(&iDs).;
%end;
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: DATASET %upcase(&iDs) COULD NOT BE OPENED.;
%end;
%let rc=%sysfunc(close(&dsid));
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: YOU MUST SPECIFY BOTH THE IDS AND IFIELD PARAMETERS TO CALL THIS MACRO.;
%end;
%mend;
Not sure that a function would work with the IN operator. You might need to wrap the function call with a macro to generate the proper syntax. In which case why not just make a macro to begin with?
Here is generic macro to extract the values from a variable in a dataset.
%macro varlist
/*----------------------------------------------------------------------
Generate list of values from dataset
----------------------------------------------------------------------*/
(dataset /* Input dataset */
,variable /* Variable Name */
,quote=1 /* Add quotes around values? 1=Single 2=Double */
,comma=1 /* Add comma between values? */
,paren=1 /* Add parentheses around results? */
);
%local did sep &variable ;
%if &paren=1 %then (;
%let did=%sysfunc(open(&dataset));
%syscall set(did);
%do %while(0=%sysfunc(fetch(&did)));
%let &variable=%qsysfunc(trim(%superq(&variable)));
%if &quote=1 %then &sep.%sysfunc(quote(&&&variable,%str(%')));
%else %if &quote=2 %then &sep.%sysfunc(quote(&&&variable));
%else &sep.&&&variable;
%if &comma=1 %then %let sep=,;
%end;
%let did=%sysfunc(close(&did));
%if &paren=1 %then );
%mend varlist;
Example calls:
%put %varlist(sashelp.class,name);
%put %varlist(sashelp.class(where=(sex='M')),age,quote=0,comma=0);
So in your case you might use it like this:
data MyOutput;
set Somedata;
where MyVar2 in %varlist(Mydata(where=(MyVar1="H20")),MyVar2) ;
run;
You are better off with a macro.
%macro subset(inParameter, indata, outdata);
proc sql noprint;
create table &outdata as
select * from &indata
where myVar2 in (select distinct myVar2 from myData where myVar1 = "&inParameter);
quit;
%mend;
%subst(H20,Somedata,MyOutput);

String comparison (not equals) syntax for SAS

I am trying to do a string comparison, between a string that will be read from a config file and a string that I mention. Is the below correct?
%if &strategy ne 'ABC' %then %do;
if ctry eq 'CAN' or ctry eq 'US' then maxpos = 0;
%end;
%else %do;
if ctry eq 'US' then maxpos = 0;
strategy is the parameter that will be read from a config file in which I will specify strategy = ABC
does ABC have to be specified in quotes?
Is the use of ne (not equal) correct?
Macro language doesn't naturally use quotes for the most part (in comparisons like this, they're treated more or less as normal characters, not as string-enclosures), so it depends on whether &strategy contains the quote character or not.
%let strategy=ABC;
%if &strategy = 'ABC' %then %put equals; %else %put not equals;
...
not equals
but
%let strategy='ABC';
%if &strategy = 'ABC' %then %put equals; %else %put not equals;
...
equals
You would generally compare %if &strategy eq ABC in most cases.
ne and eq are fine, or you can use = and ^=, up to you - I prefer ne.

macro seems stuck in infinite loop, don't know how to debug

I'm trying to define a macro function that returns unique list items from a space separated list. This macro itself uses other macros that I tested and seem to work fine by themselves (see examples below), it's all very simple code.
However for some reason the code runs indefinitely and I don't know how to debug it properly. I usually debug using %put statements but they don't print here as there's no error and I stop the code manually.
Here is the main macro, followed by my other convenience macros that I use, you can execute the whole batch to load the macros and then check out given examples.
*---------------------------------------------------------------;
* LIST_UNIQUE ;
* Return only unique items from list, ;
* in order of first appearance ;
*---------------------------------------------------------------;
/* EXAMPLE
%put %list_unique(); ** (nothing)
%put %list_unique(a); ** a
%put %list_unique(a a); ** doesn't work (should be a)
%put %list_unique(a b); ** doesn't work (should be a b)
*/
%macro list_unique(data);
%local out curr_item;
%do i=1 %to %list_length(&data);
%let curr_item = %extract(&data,&i);
%if not %list_in(&curr_item,&out) %then %let out = &out &curr_item;
%end;
&out
%mend;
*---------------------------------------------------------------;
* LIST_LENGTH ;
* Length of space separated list ;
*---------------------------------------------------------------;
/* EXAMPLES :
%put %list_length(); ** 0
%put %list_length(item1 item2 item3); ** 3
*/
%macro list_length(data);
%sysfunc(countw(&data,%str( )))
%mend;
*---------------------------------------------------------------;
* LIST_IN ;
* check if item is in list ;
*---------------------------------------------------------------;
/* EXAMPLE
%put %list_in(,a); ** 0
%put %list_in(a,); ** 0
%put %list_in(a,a); ** 1
%put %list_in(a,a a); ** 1
%put %list_in(b,a b c d); ** 1
%put %list_in(e,a b c d); ** 0
*/
%macro list_in
(item /* item to search in list */
,list /* space separated list to quote */
);
/* exception when list has null length */
%if not %length(&list) %then 0%return;
/* general case */
%do i=1 %to %list_length(&list);
%if %extract_pos(&list,&i) = &item %then 1%return;
%end;
0
%mend;
*-------------------------------------------------------------------------------;
* EXTRACT_POS ;
* Extracts subset of values from space separated list ;
*-------------------------------------------------------------------------------;
/* EXAMPLES
%put %extract_pos(,1); ** (nothing)
%put %extract_pos(a b c d,); ** (nothing)
%put %extract_pos(a b c d,1); ** a
%put %extract_pos(a b c d,2 1:3 1); ** b a b c a
*/
%macro extract_pos
(data
,ind
);
%local i j token output new_ind;
%do i=1 %to %sysfunc(countw(&ind,%str( )));
%let token = %scan(&ind,&i,%str( ));
%if %index(&token,:) %then %do; /* if token with ':' */
%do j=%scan(&token,1,:) %to %scan(&token,2,:);
%let output = &output %scan(&data,&j,%str( ));
%end;
%end;
%else %do; /* if simple token */
%let output = &output %scan(&data,&token,%str( ));
%end;
%end;
&output
%mend;
You cannot protect macros you call from modifying your macro variables, but if the macros are designed properly they will NOT. Unless you are INTENDING to modify any existing macro variable you need to define your macro variables as local. One or more of your macros were using the macro variable I without defining it as local. So if there already existed a macro variable named I then the macro modified the existing variable's value.
Also one of your macros was calling %extract() instead of %extract_pos().
I also simplified your %list_in() macro to just be a call to an existing SAS function, like your %list_length() macro.
%macro list_unique
/*---------------------------------------------------------------
Return only unique items from list
---------------------------------------------------------------*/
(data /* Space delimited list of items */
);
%local i curr_item out ;
%do i=1 %to %list_length(&data);
%let curr_item = %extract_pos(&data,&i);
%if not %list_in(&curr_item,&out) %then %let out=&out &curr_item;
%end;
&out
%mend list_unique;
%macro list_length(data);
%sysfunc(countw(&data,%str( )))
%mend list_length;
%macro list_in
/*---------------------------------------------------------------
Check if item is in list
---------------------------------------------------------------*/
(item /* item to find in list */
,list /* space separated list to search */
);
%sysevalf(%sysfunc(indexw(&list,&item,%str( ))),boolean)
%mend list_in;
%macro extract_pos
/*-------------------------------------------------------------------------------
Extracts subset of values from space separated list
-------------------------------------------------------------------------------*/
(data /* Space delimited list of values */
,ind /* Space delimited list of positions or position ranges */
);
%local i j token output;
%do i=1 %to %sysfunc(countw(&ind,%str( )));
%let token = %scan(&ind,&i,%str( ));
%do j=%scan(&token,1,:) %to %scan(&token,-1,:);
/* Token is single position or range in format start:end */
%let output = &output %scan(&data,&j,%str( ));
%end;
%end;
&output
%mend extract_pos;
Test
831 %put %list_unique(); %** (nothing);
832 %put %list_unique(a); %** a ;
a
833 %put %list_unique(a a); %** doesnot work (should be a);
a
834 %put %list_unique(a b); %** doesnot work (should be a b);
a b
Enable Macro Debugging by adding this line to the beginning of your code, which will resolve the macro code and variables:
Options macrogen symbolgen mlogic mprint mfile;
Run your code and check your log for details,
When done Disable Macro Debugging by replacing the options in step 1 with the options below:
Options nomacrogen NoSymbolgen nomlogic nomprint nomfile;
For more details you can check the SAS Debugging documentation
http://support.sas.com/documentation/cdl/en/mcrolref/61885/HTML/default/viewer.htm#a001066200.htm
Variable I is shared down in nameSpace. An easy fix is to use different looping variable in each macro.
Found some documentation on SAS logic of sharing. SAS Blogs

Get list of files in directory under NOXCMD

Can not use Xcommands in SAS EG. No access to SAS Management Console. How can I get a list of files in a directory without using Xcommands?
Tried DINFO but can only get 1 piece of info. Need a list of all files in the selected directory. Am I missing something here?
data a;
rc=filename("mydir", c:\");
put "rc = 0 if the directory exists: " rc=;
did=dopen("mydir");
put did=;
numopts=doptnum(did);
put numopts=;
do i = 1 to numopts;
optname = doptname(did,i);
put i= optname=;
optval=dinfo(did,optname);
put optval=;
output;
end;
run;
I've not used Enterprise Guide but how about using a pipe'd filename? You cn then use that with the infile statement to put the result of the query into a dataset...
filename dirlist pipe "ls /<your-path>/*";
data dirlist ;
infile dirlist ;
format fname $300. ;
input fname $ ;
run;
Here's a couple of macros we use to do this. The main macro is %file_list but it also requires the %isDir macro in order to run. Some usage examples:
%put %file_list(iPath=e:\blah\); * TEST AGAINST A DIR THAT DOESNT EXIST;
%put %file_list(iPath=e:\SASDev); * TEST AGAINST A DIR THAT EXISTS;
%put %file_list(iPath=e:\SASDev\,iFiles_only=1); * LIST ONLY FILES;
%put %file_list(iPath=e:\sasdev\,iFiles_only=1,iFilter=auto); * FILTER TO ONLY FILES THAT CONTAIN THE STRING AUTO;
%isDir macro definition:
/******************************************************************************
** PROGRAM: CMN_MAC.ISDIR.SAS
**
** DESCRIPTION: DETERMINES IF THE SPECIFIED PATH EXISTS OR NOT.
** RETURNS: 0 IF THE PATH DOES NOT EXIST OR COULD NOT BE OPENED.
** 1 IF THE PATH EXISTS AND CAN BE OPENED.
**
** PARAMETERS: iPath: THE FULL PATH TO EXAMINE. NOTE THAT / AND \ ARE TREATED
** THE SAME SO &SASDIR/COMMON/MACROS IS THE SAME AS
** &SASDIR\COMMON\MACROS.
**
******************************************************************************/
%macro isDir(iPath=,iQuiet=1);
%local result dname did rc;
%let result = 0;
%let check_file_assign = %sysfunc(filename(dname,&iPath));
%put ASSIGNED FILEREF (0=yes, 1=no)? &check_file_assign &iPath;
%if not &check_file_assign %then %do;
%let did = %sysfunc(dopen(&dname));
%if &did %then %do;
%let result = 1;
%end;
%else %if not &iQuiet %then %do;
%put &err: (ISDIR MACRO).;
%put %sysfunc(sysmsg());
%end;
%let rc = %sysfunc(dclose(&did));
%end;
%else %if not &iQuiet %then %do;
%put &err: (ISDIR MACRO).;
%put %sysfunc(sysmsg());
%end;
&result
%mend;
%filelist macro definition:
/******************************************************************************
** PROGRAM: MACRO.FILE_LIST.SAS
**
** DESCRIPTION: RETURNS THE LIST OF FILES IN A DIRECTORY SEPERATED BY THE
** SPECIFIED DELIMITER. RETURNS AN EMPTY STRING IF THE THE
** DIRECTORY CAN'T BE READ OR DOES NOT EXIST.
**
** PARAMETERS: iPath: THE FULL PATH TO EXAMINE. NOTE THAT / AND \ ARE TREATED
** THE SAME SO &SASDIR/COMMON/MACROS IS THE SAME AS
** &SASDIR\COMMON\MACROS. WORKS WITH BOTH UNIX AND WINDOWS.
**
******************************************************************************/
/*
** TODO. THERES ABOUT 100 WAYS THIS COULD BE IMPROVED SUCH AS SIMPLIFYING IF STATEMENTS FOR FILTERS...
*/
%macro file_list(iPath=, iFilter=, iFiles_only=0, iDelimiter=|);
%local result did dname cnt num_members filename rc check_dir_exist check_file_assign;
%let result=;
%let check_dir_exist = %isDir(iPath=&iPath);
%let check_file_assign = %sysfunc(filename(dname,&iPath));
%put The desired path: &iPath;
%if &check_dir_exist and not &check_file_assign %then %do;
%let did = %sysfunc(dopen(&dname));
%let num_members = %sysfunc(dnum(&did));
%do cnt=1 %to &num_members;
%let filename = %qsysfunc(dread(&did,&cnt));
%if "&filename" ne "" %then %do;
%if "&iFilter" ne "" %then %do;
%if %index(%lowcase(&filename),%lowcase(&iFilter)) eq 0 %then %do;
%goto next;
%end;
%end;
%if &iFiles_only %then %do;
%if %isDir(iPath=%nrbquote(&iPath/&filename)) %then %do;
%goto next;
%end;
%end;
%let result = &result%str(&iDelimiter)&filename;
%next:
%end;
%else %do;
%put ERROR: (CMN_MAC.FILE_LIST) FILE CANNOT BE READ.;
%put %sysfunc(sysmsg());
%end;
%end;
%let rc = %sysfunc(dclose(&did));
%end;
%else %do;
%put ERROR: (CMN_MAC.FILE_LIST) PATH DOES NOT EXIST OR CANNOT BE OPENED.;
%put %sysfunc(sysmsg());
%put DIRECTORY EXISTS (1-yes, 0-no)? &check_dir_exist;
%put ASSIGN FILEREF SUCCESSFUL (0-yes, 1-no)? &check_file_assign;
%end;
/*
** RETURN THE RESULT. TRIM THE LEADING DELIMITER OFF THE FRONT OF THE RESULTS.
*/
%if "&result" ne "" %then %do;
%qsubstr(%nrbquote(&result),2)
%end;
%mend;
%let path=C:\ETC;
filename parent "&path\Data\CSV";
data files;
length file_name $50;
drop rc did i;
did=dopen("parent");
if did > 0 then do;
do i=1 to dnum(did);
file_name=dread(did,i);
output;
end;
rc=dclose(did);
end;
else put 'Could not open directory';
run;
* Some additions;
%global name;
%global count2;
%let name=;
%let count2=;
proc sql;
select file_name into :name separated by '*' from work.files;
%let count2 = &sqlobs;
quit;
This works fine. I use &name for other macro and do something with files... (load from CSV, for example).

A macro function to produce a macro variable from a data variable

data sample;
input x $;
datalines;
one
two
three
;
%macro variable_to_macvar(variable=, dataset=);
proc sql noprint;
select &variable into : outlist separated by ' '
from &dataset;
quit;
&outlist
%mend variable_to_macvar;
%put %variable_to_macvar(variable=x, dataset=sample);
Expected output: one two three. Instead I get an error. Why? Is this fixable?
I've successfully created other macros of a very similar form, where the function "returns" a value using the &macrovariable at the end of the macro without a semicolon. For example, here is a similar type of function that works:
%macro zcat(first=5, last=15, prefix=freq);
%let x=;
%do i = &first %to &last;
%let x=&x &prefix.&i;
%end;
&x
%mend zcat;
%put %zcat();
You cannot execute a macro that involves running a proc or a data step in the way that you're trying to do here. You would need to use something like %sysfunc(dosubl(proc sql...)) in order for that to work (assuming you have SAS 9.3+ - see Joe's answer above). Otherwise, you can't use proc sql within a function-style macro.
More details about dosubl:
http://support.sas.com/documentation/cdl/en/lefunctionsref/67398/HTML/default/viewer.htm#p09dcftd1xxg1kn1brnjyc0q93yk.htm
It would be a bit fiddly, but if you really wanted to make this work as a function-style macro in earlier versions of SAS, you could construct it using the open, fetchobs and getvarc functions instead.
Update: Here's an example (using call set rather than getvarc, as this turned out to be simpler), in case anyone needs to do this in SAS 9.2 or earlier.
%macro variable_to_macvar(var,ds);
%local rc dsid i;
%let &var =;
%global outlist;
%let outlist=;
%let dsid = %sysfunc(open(&ds,i));
%syscall set(dsid);
%let rc = 0;
%let i = 0;
%do %while(&rc = 0);
%let i = %eval(&i + 1);
%let rc = %sysfunc(fetchobs(&dsid,&i));
%if &rc = 0 %then %let outlist = &outlist &&&var;
%end;
%let rc = %sysfunc(close(&dsid));
&outlist
%mend;
%put %variable_to_macvar(var=x, ds=sample);
Now works for views as well as ordinary datasets.
DOSUBL is available (but experimental) in 9.3 (at least, 9.3TS1M2, which I have). This is how you'd do it.
data sample;
input x $;
datalines;
one
two
three
;
%macro variable_to_macvar(variable=, dataset=);
%let rc=%sysfunc(dosubl(%str(
proc sql noprint;
select &variable into : outlist separated by ' '
from &dataset;
quit;
)));
&outlist
%mend variable_to_macvar;
%put %variable_to_macvar(variable=x, dataset=sample);;
If you can't use DOSUBL, or want to avoid experimental things, you can do this with PROC FCMP rather than a macro. If you like to write functions, PROC FCMP is probably for you: actually being able to write functions, rather than having to deal with the annoyances of the macro language.
Alter your code at the end to
%global outlist;
%variable_to_macvar(variable=x, dataset=sample);
%put &outlist;
The %put wants to resolve only a macro variable or a single value. It cannot call a procedure. So call your macro and then print the result.
Also, delete the &outlist from the macro definition. Sorry I missed that initially.
EDIT: Alternative.
Change your macro definition to
%macro variable_to_macvar(variable=, dataset=);
proc sql noprint;
select &variable into : outlist separated by ' '
from &dataset;
quit;
%put &outlist
%mend variable_to_macvar;
Just do the %put inside the macro.
%variable_to_macvar(variable=x, dataset=sample);
will print the string to the log.
We have a utility macro that is probably one of our most used pieces of code that does this for us. It is similar to the code that #user667489 provided but includes some nice features including error catching, allows both character and numeric vars, allows you to specify seperators, quotes, quote characters, filters to the dataset, etc....
We just put this macro in our autocall library so that it's avaialble to all of our programs. Some examples of running the macro:
Example 1 - Default behaviour:
%put %variable_to_macvar(var=x, ds=samplex);
Result 1:
one,two,three
Not quite the desired output as the default seperator is a comma, this is easily changed though...
Example 2 - Specify to use a space character as a delimiter:
%put %ds2list(iDs=samplex, iField=x, iDelimiter=%str( ));
Result 2:
one two three
Example 3 - Quoting & example usage
data names;
input name $;
datalines;
John
Jim
Frankie
;
run;
%put %ds2list(iDs=names, iField=name, iQuote=1);
proc sql noprint;
create table xx as
select *
from sashelp.class
where name in (%ds2list(iDs=names, iField=name, iQuote=1))
;
quit;
Result 3:
The below is printed to the log:
'John','Jim','Frankie'
Notice how we don't need to even save the result to a macro variable to use it in the SQL statement! Swweeet! This works just as well for SQL passthrough queries, and any other data step or proc statement that you can throw it at. In the above example, a single row is returned as 'John' is the only match found...
Anyway, that's our solution here... been using this for >10 years and works well for me. Here is the macro:
/***************************************************************************
** PROGRAM: MACRO.DS2LIST.SAS
**
** UTILITY PROGRAM THAT DETECTS RETURNS A LIST OF FIELD VALUES FROM A
** DATASET IN DELIMITED FORMAT.
**
** PARAMETERS:
** iDs : THE LIBNAME.DATASET NAME THAT YOU WANT TO CHECK.
** iField : THE FIELD THAT CONTAINS THE VALUES YOU WANT RETURNED IN A
** DELIMITED FORMAT.
** iDelimiter: DEFAULT IS A COMMA. THE DELIMITER TO USE FOR THE RETURNED LIST.
** iDsOptions: ANY STANDARD DATASET OPTIONS THAT YOU WOULD LIKE TO APPLY SUCH
** AS A WHERE STATEMENT.
** iQuote : (0=NO,1=YES). DEFAULT=0/NO. DETERMINES WHETHER THE RETURNED
** LIST IS QUOTED OR NOT.
** iQuoteChar: (SINGLE,DOUBLE) DEFAULT=SINGLE. SPECIFIES WHETHER SINGLE
** OR DOUBLE QUOTES ARE USED WHEN QUOTING THE RETURNED LIST
**
*****************************************************************************
** VERSION:
** 1.8 MODIFIED: 11-OCT-2010 BY: KN
** ALLOW BLANK CHARACTER VALUES AND ALSO REMOVED TRAILING
** ALLOW PARENTHESES IN CHARACTER VALUES
*****************************************************************************/
%macro ds2list(iDs=, iField=, iDsOptions=, iDelimiter=%str(,), iQuote=0, iQuoteChar=single);
%local dsid pos rc result cnt quotechar;
%let result=;
%let cnt=0;
%if &iQuote %then %do;
%if "%upcase(&iQuoteChar)" eq "DOUBLE" %then %do;
%let quotechar = %nrstr(%");
%end;
%else %if "%upcase(&iQuoteChar)" eq "SINGLE" %then %do;
%let quotechar = %nrstr(%');
%end;
%else %do;
%let quotechar = %nrstr(%");
%put WARNING: MACRO.DS2LIST.SAS: PARAMETER IQUOTECHAR INCORRECT. DEFAULTED TO DOUBLE;
%end;
%end;
%else %do;
%let quotechar = ;
%end;
/*
** ENSURE ALL THE REQUIRED PARAMETERS WERE PASSED IN.
*/
%if "&iDs" ne "" and "&iField" ne "" %then %do;
%let dsid=%sysfunc(open(&iDs(&iDsOptions),i));
%if &dsid %then %do;
%let pos=%sysfunc(varnum(&dsid,&iField));
%if &pos %then %do;
%let rc=%sysfunc(fetch(&dsid));
%do %while (&rc eq 0);
%if "%sysfunc(vartype(&dsid,&pos))" = "C" %then %do;
%let value = %qsysfunc(getvarc(&dsid,&pos));
%if "%trim(&value)" ne "" %then %do;
%let value = %qsysfunc(cats(%nrstr(&value)));
%end;
%end;
%else %do;
%let value = %sysfunc(getvarn(&dsid,&pos));
%end;
/* WHITESPACE/CARRIAGE RETURNS REMOVED IN THE BELOW LINE */
/* TO ENSURE NO WHITESPACE IS RETURNED IN THE OUTPUT. */
%if &cnt ne 0 %then %do;%unquote(&iDelimiter)%end;%unquote(&quotechar&value&quotechar.)
%let cnt = %eval(&cnt + 1);
%let rc = %sysfunc(fetch(&dsid));
%end;
%if &rc ne -1 %then %do;
%put WARNING: MACRO.DS2LIST.SAS: %sysfunc(sysmsg());
%end;
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: FIELD &iField NOT FOUND IN DATASET %upcase(&iDs).;
%end;
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: DATASET %upcase(&iDs) COULD NOT BE OPENED.;
%end;
%let rc=%sysfunc(close(&dsid));
%end;
%else %do;
%put ERROR: MACRO.DS2LIST.SAS: YOU MUST SPECIFY BOTH THE IDS AND IFIELD PARAMETERS TO CALL THIS MACRO.;
%end;
%mend;