Categorizing variables based on conditions stored in another table SAS - sas

I would like to categorize variables from one table which looks like this:
Var1 Var2
19 0.2
30 0.1
45 0.2
With table that stores conditions for the categroziation
variable condition category
Var1 Var1<20 1
Var1 40>Var1>=20 2
Var1 Var1>=40 3
Var2 Var2<0.2 1
Var2 Var2>=0.2 2
And the result of that would be a new table created containing categories of variables based on first table:
Var1 Var2
1 2
2 1
3 2

This is just a duplicate of this previous question. Categorize variables basing on conditions from other data set
Code generation from data is much easier to create and debug if you just use SAs code to do it and not add in complications of macro code.
Here is the answer again in more detail. First let's make your example data printouts into actual SAS datasets.
data rawdata ;
input Var1 Var2;
cards;
19 0.2
30 0.1
45 0.2
;
data metadata ;
input variable :$32. condition :$200. category ;
cards;
Var1 Var1<20 1
Var1 40>Var1>=20 2
Var1 Var1>=40 3
Var2 Var2<0.2 1
Var2 Var2>=0.2 2
;
Now let's generate an SQL select statement with a CASE statement to generate each output variable from the metadata.
filename code temp;
data _null_;
set metadata end=eof;
by variable ;
file code ;
retain sep ' ';
if _n_=1 then put "create table want as select";
if first.variable then put sep $1. 'case ';
put ' when (' condition ') then ' category ;
if last.variable then put ' else . end as ' variable ;
if eof then put 'from rawdata' / ';' ;
sep=',' ;
run;
And run it.
proc sql;
%include code / source2 ;
quit;
Example SAS LOG:
1639 proc sql;
1640 %include code / source2 ;
NOTE: %INCLUDE (level 1) file CODE is file C:\Users\xxx\AppData\Local\Temp\1\SAS Temporary Files\_TD13724_AMRL20B7F00CGPP_\#LN00654.
1641 +create table want as select
1642 + case
1643 + when (Var1<20 ) then 1
1644 + when (40>Var1>=20 ) then 2
1645 + when (Var1>=40 ) then 3
1646 + else . end as Var1
1647 +,case
1648 + when (Var2<0.2 ) then 1
1649 + when (Var2>=0.2 ) then 2
1650 + else . end as Var2
1651 +from rawdata
1652 +;
NOTE: Table WORK.WANT created, with 3 rows and 2 columns.
Results:
Obs Var1 Var2
1 1 2
2 2 1
3 3 2
If you want to convert it to macro then just replace the hard coded input dataset names and output dataset names with macro variable references.
%macro gencat(indata=,outdata=,metadata=metadata);
filename code temp;
data _null_;
set &metadata end=eof;
by variable ;
file code ;
retain sep ' ';
if _n_=1 then put "create table &outdata as select";
if first.variable then put sep $1. 'case ';
put ' when (' condition ') then ' category ;
if last.variable then put ' else . end as ' variable ;
if eof then put "from &indata" / ';' ;
sep=',' ;
run;
proc sql;
%include code / nosource2 ;
quit;
%mend gencat;
So now the same result is gotten by calling with these values:
%gencat(indata=rawdata,outdata=want)
So the log now looks like this:
1783 %gencat(indata=rawdata,outdata=want)
MPRINT(GENCAT): filename code temp;
NOTE: PROCEDURE SQL used (Total process time):
real time 10.35 seconds
cpu time 0.20 seconds
MPRINT(GENCAT): data _null_;
MPRINT(GENCAT): set metadata end=eof;
MPRINT(GENCAT): by variable ;
MPRINT(GENCAT): file code ;
MPRINT(GENCAT): retain sep ' ';
MPRINT(GENCAT): if _n_=1 then put "create table want as select";
MPRINT(GENCAT): if first.variable then put sep $1. 'case ';
MPRINT(GENCAT): put ' when (' condition ') then ' category ;
MPRINT(GENCAT): if last.variable then put ' else . end as ' variable ;
MPRINT(GENCAT): if eof then put "from rawdata" / ';' ;
MPRINT(GENCAT): sep=',' ;
MPRINT(GENCAT): run;
NOTE: The file CODE is:
Filename=C:\Users\AppData\Local\Temp\1\SAS Temporary Files\_TD13724_AMRL20B7F00CGPP_\#LN00659,
RECFM=V,LRECL=32767,File Size (bytes)=0,
Last Modified=02Feb2018:12:36:39,
Create Time=02Feb2018:12:36:39
NOTE: 12 records were written to the file CODE.
The minimum record length was 1.
The maximum record length was 28.
NOTE: There were 5 observations read from the data set WORK.METADATA.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
MPRINT(GENCAT): proc sql;
MPRINT(GENCAT): create table want as select case when (Var1<20 ) then 1 when (40>Var1>=20 ) then 2 when (Var1>=40 ) then 3 else .
end as Var1 ,case when (Var2<0.2 ) then 1 when (Var2>=0.2 ) then 2 else . end as Var2 from rawdata ;
NOTE: Table WORK.WANT created, with 3 rows and 2 columns.
MPRINT(GENCAT): quit;

Here is a macro way to accomplish this. It assumes that the conditions in the table are in the order you want them applied and grouped by variable. If not, then sort the table appropriately.
First test data:
data have;
input Var1 Var2;
datalines;
19 0.2
30 0.1
45 0.2
;
data conditions;
informat variable condition $32.;
input variable $ condition $ category;
datalines;
Var1 Var1<20 1
Var1 40>Var1>=20 2
Var1 Var1>=40 3
Var2 Var2<0.2 1
Var2 Var2>=0.2 2
;
Now make a macro. We will read the table into macro variables and then write a datastep to apply them. We use IF/THEN/ELSE blocks for each variable.
%macro apply_conditions();
%local i j n;
proc sql noprint;
select count(*) into :n trimmed from conditions;
%do i=1 %to &n;
%local var&i;
%local condition&i;
%local category&i;
%end;
select variable, condition, category
into :var1 - :var&n,
:condition1 - :condition&n,
:category1 - :category&n
from conditions;
quit;
data want;
set have;
%do i=1 %to &n;
/*If the variable changes, then don't add the ELSE */
%if &i>1 %then %do;
%let j=%eval(&i-1);
%if &&var&i = &&var&j %then %do;
else
%end;
%end;
/*apply the condition*/
if &&condition&i then
&&var&i = &&category&i;
%end;
run;
%mend;
Finally run the macro. Using MPRINT to see the code that is generated.
options mprint;
%apply_conditions;

Related

Add new empty rows to a SAS table with names from another table

Assume I have table foo which contains a (dynamic) list of new rows which I want to add to another table have, so that it yields a table want looking e.g. like this:
x y p_14 p_15
1 2 2 99
2 4 7 24
Example data for foo:
id row_name
14 p_14
15 p_15
Example data for have:
x y p Z
1 2 14 2
1 2 15 99
1 2 16 59
2 4 14 7
2 4 15 24
2 4 16 58
What I have so far is the following which is not yet in macro shape:
proc sql;
create table want as
select old.*, t1.p_14, t2.p_15 /* choosing non-duplicate rows */
from (select x, y from have) old
left join (select x, y, z as p_14 from have where p=14) t1
on old.x=t1.x and old.y=t1.y
left join (select x, y, z as p_15 from have where p=15) t2
on old.x=t2.x and old.y=t2.y
;
quit;
Ideally, I am aiming for a macro where which takes foo as input and automatically creates all the joins from above. Also, the solution should not spit out any warnings in the console. My challenge is how to dynamically choose the correct (non-duplicate) rows.
PS: This is a follow-up question of Populate SAS macro-variable using a SQL statement within another SQL statement? The important bit is that it is not a full transpose, I guess.
You can go from HAVE to WANT with PROC TRANSPOSE.
proc transpose data=have out=want(drop=_name_) prefix=p_ ;
by x y ;
id p ;
var z;
run;
To limit it to the values of P that occur in FOO you could use a macro variable (as long as the number of observations in FOO is small enough).
proc sql noprint ;
select id into :idlist separated by ' ' from foo ;
quit;
proc transpose data=have out=want(drop=_name_) prefix=p_ ;
where p in (&idlist) ;
by x y ;
id p ;
var z;
run;
If the issue is you want variable P_17 to be in the result even if 17 does not appear in HAVE then add a little more complexity. For example add another data step that will force the creation of the empty variables. You can generate the list of variable names from the list of id's in FOO.
proc sql noprint ;
select id , cats('p_',id)
into :idlist separated by ' '
, :varlist separated by ' '
from foo
;
quit;
proc transpose data=have out=want(drop=_name_) prefix=p_ ;
where p in (&idlist) ;
by x y ;
id p ;
var z;
run;
data want ;
set want (keep=x y);
array all &varlist ;
set want ;
run;
Results:
Obs x y p_14 p_15 p_17
1 1 2 2 99 .
2 2 4 7 24 .
If the number of values is too large to store in a single macro variable (limit 64K bytes) you could generate the WHERE statement with a data step to a file and use %INCLUDE to add the WHERE statement into the code.
filename where temp;
data _null_;
set foo end=eof;
file where ;
if _n_=1 then put 'where p in (' #;
put id # ;
if eof then put ');' ;
run;
proc transpose ... ;
%include where / source2;
...
Use macro program:
data have;
input x y p Z;
cards;
1 2 14 2
1 2 15 99
1 2 16 59
2 4 14 7
2 4 15 24
2 4 16 58
;
data foo;
input id row_name $;
cards;
14 p_14
15 p_15
;
%macro test(dsn);
proc sql;
select count(*) into:n trimmed from &dsn;
select id into: value separated by ' ' from &dsn;
create table want as
select distinct a.x,a.y,
%do i=1 %to &n;
%let cur=%scan(&value,&i);
t&i..p_&cur
%if &i<&n %then ,;
%else ;
%end;
from have a
%do i=1 %to &n;
%let cur=%scan(&value,&i);
left join have (where=(p=&cur) rename=(z=p_&cur.)) t&i.
on a.x=t&i..x and a.y=t&i..y
%end;
;
quit;
%mend;
%test(foo);

Multiple set to macro variable

I'm trying to build an advanced system of reporting and logging, and I have a problem.
Example:
work.check_true is empty.
When this table is empty then variable like 'czy_wyslac' should have value 2.
If this table not empty, variable 'czy_wyslac' should have value from field "gen" (1 or 0) from table work.check_true.
Next step - depending on the value of variable 'czy_wyslac" processing one from three step to next macro. I do not have a problem with this.
Below is my code:
data _null_;
call symput('obscnt',0);
set work.Check_true;
call symput('obscnt',_n_);
stop;
run;
%macro get_table_size();
%global czy_wyslac;
%If &obscnt=0 %then call SYMPUTX('czy_wyslac',2);
%else
proc sql noprint;
select
gen into: czy_wyslac
from work.Check_true
;quit;
%mend;
%macro Create_log_mail();
%if &czy_wyslac. = 1 %then
%do;
data work.maile;
zal = "T:\XXX\XXX\Risk\XXXX\XXXX\OUTPUT\Results_of_compare &calosc..xlsx";
run;
options emailsys=XXemail host=XXXemailport=XXX;
FILENAME mail EMAIL;
DATA _NULL_;
SET WORK.maile END=eof;
FILE mail ENCODING='UTF-8';
PUT '!EM_TO! "XXXXX#XXXXXX"';
PUT 'Szanowni,';
PUT 'Załącznik zawiera znalezione różnice między szablonem kalkulatora a zawartym w systemie ATP.';
PUT 'Wpis _TYPE> = PLIK - baza porównywana';
PUT 'Wpis _TYPE> = ATP - rekord wyciągnięty z ATP';
PUT;
PUT 'Baza zawiera również wynik porównania wyposażenia oraz baseline.';
PUT;
PUT 'Pozdrawiam,';
PUT 'KJ SYSTEM REPORT';
PUT '!EM_FROM! XXXSYSTEM REPORT <noreply#XXXXX.pl>';
PUT '!EM_SENDER! XXXXSYSTEM REPORT <noreply#XXXX.pl>';
PUT '!EM_BCC! ';
PUT '!EM_SUBJECT! XXXXXXXX';
PUT '!EM_ATTACH!' zal;
PUT '!EM_REPLYTO! Please do not reply for this mail - regards :)';
PUT '!EM_SEND!' / '!EM_NEWMSG!';
IF eof THEN PUT '!EM_ABORT!';
RUN;
proc sql;
Create table LOG_CREATE as
Select
distinct
date()*86400 format datetime20. as EXTRACT_DATE,
date()*86400 format datetime20. as REFERENCE_DATE,
'MAIL: Results_of_compare' as STAGE_NAME,
'99_02_MAIL_RESULT' as PROCES_NAME,
'02_CALCULATOR_ATP' as SCHEMA_NAME,
20 as etap_no,
'SENT' as STATUS,
&Count_records_02. as records,
'Wysłano mail' as Comments,
. as alert_records,
'' as Alert_comments,
&_timer_start format datetime20. as START_PROCESS,
datetime() format datetime20. as END_PROCESS,
datetime() - &_timer_start format time13.2 as Duration
FROM work._PRODSAVAIL
;quit;
%end;
%else %if &czy_wyslac. = 0 %then %do;
proc sql;
Create table LOG_CREATE as
Select
distinct
date()*86400 format datetime20. as EXTRACT_DATE,
date()*86400 format datetime20. as REFERENCE_DATE,
'MAIL: Results_of_compare' as STAGE_NAME,
'99_02_MAIL_RESULT' as PROCES_NAME,
'02_CALCULATOR_ATP' as SCHEMA_NAME,
20 as etap_no,
'NOT SENT' as STATUS,
. as records,
'' as Comments,
. as alert_records,
'' as Alert_comments,
&_timer_start format datetime20. as START_PROCESS,
datetime() format datetime20. as END_PROCESS,
datetime() - &_timer_start format time13.2 as Duration
FROM work._PRODSAVAIL
;quit;
%end;
%mend;
%Create_log_mail();
If work.check is empty, the select gen into :czy_wyslac from work.check_true will not return anything, thereby retaining any previous value in &CZY_WYSLAC.
Therefore the below will accomplish what you want :
%LET CZY_WYSLAC = 2 ; /* default value */
proc sql noprint ;
select gen into :CZY_WYSLAC from work.check_true ;
quit ;

Populate SAS variable by repeating values

I have a SAS table with a lot of missing values. This is only a simple example.
The real table is much bigger (>1000 rows) and the numbers is not the same. But what is the same is that I have a column a that have no missing numbers. Column b and c have a sequence that is shorter than the length of a.
a b c
1 1b 1000
2 2b 2000
3 3b
4
5
6
7
What I want is to fill b an c with repeating the sequences until they columns are full. The result should look like this:
a b c
1 1b 1000
2 2b 2000
3 3b 1000
4 1b 2000
5 2b 1000
6 3b 2000
7 1b 1000
I have tried to make a macro but it become to messy.
The hash-of-hashes solution is the most flexible here, I suspect.
data have;
infile datalines delimiter="|";
input a b $ c;
datalines;
1|1b|1000
2|2b|2000
3|3b|
4| |
5| |
6| |
7| |
;;;;
run;
%let vars=b c;
data want;
set have;
rownum = _n_;
if _n_=1 then do;
declare hash hoh(ordered:'a');
declare hiter hih('hoh');
hoh.defineKey('varname');
hoh.defineData('varname','hh');
hoh.defineDone();
declare hash hh();
do varnum = 1 to countw("&vars.");
varname = scan("&vars",varnum);
hh = _new_ hash(ordered:'a');
hh.defineKey("rownum");
hh.defineData(varname);
hh.defineDone();
hoh.replace();
end;
end;
do rc=hih.next() by 0 while (rc=0);
if strip(vvaluex(varname)) in (" ",".") then do;
num_items = hh.num_items;
rowmod = mod(_n_-1,num_items)+1;
hh.find(key:rowmod);
end;
else do;
hh.replace();
end;
rc = hih.next();
end;
keep a &Vars.;
run;
Basically, one hash is built for each variable you are using. They're each added to the hash of hashes. Then we iterate over that, and search to see if the variable requested is populated. If it is then we add it to its hash. If it isn't then we retrieve the appropriate one.
Assuming that you can tell how many rows to use for each variable by counting how many non-missing values are in the column then you could use this code generation technique to generate a data step that will use the POINT= option SET statements to cycle through the first Nx observations for variable X.
First get a list of the variable names;
proc transpose data=have(obs=0) out=names ;
var _all_;
run;
Then use those to generate a PROC SQL select statement to count the number of non-missing values for each variable.
filename code temp ;
data _null_;
set names end=eof ;
file code ;
if _n_=1 then put 'create table counts as select ' ;
else put ',' #;
put 'sum(not missing(' _name_ ')) as ' _name_ ;
if eof then put 'from have;' ;
run;
proc sql noprint;
%include code /source2 ;
quit;
Then transpose that so that again you have one row per variable name but this time it also has the counts in COL1.
proc transpose data=counts out=names ;
var _all_;
run;
Now use that to generate SET statements needed for a DATA step to create the output from the input.
filename code temp;
data _null_;
set names ;
file code ;
length pvar $32 ;
pvar = cats('_point',_n_);
put pvar '=mod(_n_-1,' col1 ')+1;' ;
put 'set have(keep=' _name_ ') point=' pvar ';' ;
run;
Now use the generated statements.
data want ;
set have(drop=_all_);
%include code / source2;
run;
So for your example data file with variables A, B and C and 7 total observations the LOG for the generated data step looks like this:
1229 data want ;
1230 set have(drop=_all_);
1231 %include code / source2;
NOTE: %INCLUDE (level 1) file CODE is file .../#LN00026.
1232 +_point1 =mod(_n_-1,7 )+1;
1233 +set have(keep=a ) point=_point1 ;
1234 +_point2 =mod(_n_-1,3 )+1;
1235 +set have(keep=b ) point=_point2 ;
1236 +_point3 =mod(_n_-1,2 )+1;
1237 +set have(keep=c ) point=_point3 ;
NOTE: %INCLUDE (level 1) ending.
1238 run;
NOTE: There were 7 observations read from the data set WORK.HAVE.
NOTE: The data set WORK.WANT has 7 observations and 3 variables.
Populate a temporary array with the values, then check the row and add the appropriate value.
Setup the data
data have;
infile datalines delimiter="|";
input a b $ c;
datalines;
1|1b|1000
2|2b|2000
3|3b|
4| |
5| |
6| |
7| |
;
Get a count of the non-null values
proc sql noprint;
select count(*)
into :n_b
from have
where b ^= "";
select count(*)
into :n_c
from have
where c ^=.;
quit;
Now populate the missing values by repeating the contents of each array.
data want;
set have;
/*Temporary Arrays*/
array bvals[&n_b] $ 32 _temporary_;
array cvals[&n_c] _temporary_;
if _n_ <= &n_b then do;
/*Populate the b array*/
bvals[_n_] = b;
end;
else do;
/*Fill the missing values*/
b = bvals[mod(_n_+&n_b-1,&n_b)+1];
end;
if _n_ <= &n_c then do;
/*populate C values array*/
cvals[_n_] = c;
end;
else do;
/*fill in the missing C values*/
c = cvals[mod(_n_+&n_c-1,&n_c)+1];
end;
run;
data want;
set have;
n=mod(_n_,3);
if n=0 then b='3b';
else b=cats(n,'b');
if n in (1,0) then c=1000;
else c=2000;
drop n;
run;

Checking whether the DS has variable value if the variable has missing values then drop the column

am passing a DS in macro parameter with var= if its corresponding variable has same value but the variables has all missing values then drop it.
DATA details;
INPUT id name $ dept $ salary;
datalines;
01 John . 10000
02 Mary . 20000
03 Priya . 30000
05 Ram . 25000
;
DATA newdetails;
INPUT var_name $ var_core $;
DATALINES;
id perm
name perm
dept perm
salary req
;
%macro core_check(inds=,newds=,var_core_val= );
proc sql noprint;
select var_name
into :varname separated by ' '
from &newds
where var_core="&var_core_val.";
quit;
%let nvar=&sqlobs;
%put &varname;
%do i=1 %to &nvar;
%let var&i=%scan(&varname,&i);
proc sql;
select count(*)
into :nobs
from &inds where &&var&i is not missing ;
%put this = &nobs;
quit;
%end;
%mend;
%core_check(inds=work.details,newds=work.newdetails,var_core_val=perm);
Here is one way to check for empty variables using the NLEVELS output of PROC FREQ. Note that the ODS table might not create the NMissLevels or NNonMissLevels variables based on the results.
So for your problem we could make a macro that takes as its input the name of the dataset to check, the dataset to create and the dataset with the metadata about which variables are optional. First get the list of variables to check. Then check them using PROC FREQ. Then use a data step to generate a macro variable with the list of empty variables.
%macro drop_optional(inds=,newds=,metadata= );
%local varlist n emptyvars ;
proc sql noprint;
select var_name
into :varlist separated by ' '
from &metadata
where var_core='perm'
;
quit;
%let n=&sqlobs;
%if (&n) %then %do;
ods output nlevels=nlevels;
proc freq nlevels data=&inds ;
tables &varlist / noprint ;
run;
data nlevels;
length TableVar $32 NLevels NMissLevels NNonMissLevels 8;
set nlevels end=eof;
nmisslevels+0;
nnonmisslevels=nlevels-nmisslevels;
length emptyvars $32767;
retain emptyvars;
if nnonmisslevels=0 then emptyvars=catx(' ',emptyvars,tablevar);
if eof then call symputx('emptyvars',emptyvars);
run;
%end;
data &newds;
set &inds (drop=&emptyvars);
run;
%mend drop_optional;
So let's use your sample data.
data details;
input id name $ dept $ salary;
datalines;
01 John . 10000
02 Mary . 20000
03 Priya . 30000
05 Ram . 25000
;
data metadata;
input var_name $ var_core $;
DATALINES;
id perm
name perm
dept perm
salary req
;
And call the macro.
%drop_optional(inds=details,newds=details_new,metadata=metadata);

Split SAS dataset

I have a SAS dataset that looks like this:
id | dept | ...
1 A
2 A
3 A
4 A
5 A
6 A
7 A
8 A
9 B
10 B
11 B
12 B
13 B
Each observation represents a person.
I would like to split the dataset into "team" datasets, each dataset can have a maximum of 3 observations.
For the example above this would mean creating 3 datasets for dept A (2 of these datasets would contain 3 observations and the third dataset would contain 2 observations). And 2 datasets for dept B (1 containing 3 observations and the other containing 2 observations).
Like so:
First dataset (deptA1):
id | dept | ...
1 A
2 A
3 A
Second dataset (deptA2)
id | dept | ...
4 A
5 A
6 A
Third dataset (deptA3)
id | dept | ...
7 A
8 A
Fourth dataset (deptB1)
id | dept | ...
9 B
10 B
11 B
Fifth dataset (deptB2)
id | dept | ...
12 B
13 B
The full dataset I'm using contains thousands of observations with over 50 depts. I can work out how many datasets per dept are required and I think a macro is the best way to go as the number of datasets required is dynamic. But I can't figure out the logic to create the datasets so that they have have a maximum of 3 observations. Any help appreciated.
Another version.
Compared to DavB version, it only processes input data once and splits it into several tables in single datastep.
Also if more complex splitting rule is required, it can be implemented in datastep view WORK.SOURCE_PREP.
data WORK.SOURCE;
infile cards;
length ID 8 dept $1;
input ID dept;
cards;
1 A
2 A
3 A
4 A
5 A
6 A
7 A
8 A
9 B
10 B
11 B
12 B
13 B
14 C
15 C
16 C
17 C
18 C
19 C
20 C
;
run;
proc sort data=WORK.SOURCE;
by dept ID;
run;
data WORK.SOURCE_PREP / view=WORK.SOURCE_PREP;
set WORK.SOURCE;
by dept;
length table_name $32;
if first.dept then do;
count = 1;
table = 1;
end;
else count + 1;
if count > 3 then do;
count = 1;
table + 1;
end;
/* variable TABLE_NAME to hold table name */
TABLE_NAME = catt('WORK.', dept, put(table, 3. -L));
run;
/* prepare list of tables */
proc sql noprint;
create table table_list as
select distinct TABLE_NAME from WORK.SOURCE_PREP where not missing(table_name)
;
%let table_cnt=&sqlobs;
select table_name into :table_list separated by ' ' from table_list;
select table_name into :tab1 - :tab&table_cnt from table_list;
quit;
%put &table_list;
%macro loop_when(cnt, var);
%do i=1 %to &cnt;
when ("&&&var.&i") output &&&var.&i;
%end;
%mend;
data &table_list;
set WORK.SOURCE_PREP;
select (TABLE_NAME);
/* generate OUTPUT statements */
%loop_when(&table_cnt, tab)
end;
run;
You could try this:
%macro split(inds=,maxobs=);
proc sql noprint;
select distinct dept into :dept1-:dept9999
from &inds.
order by dept;
select ceil(count(*)/&maxobs.) into :numds1-:numds9999
from &inds.
group by dept
order by dept;
quit;
%let numdept=&sqlobs;
data %do i=1 %to &numdept.;
%do j=1 %to &&numds&i;
dept&&dept&i&&j.
%end;
%end;;
set &inds.;
by dept;
if first.dept then counter=0;
counter+1;
%do i=1 %to &numdept.;
%if &i.=1 %then %do;
if
%end;
%else %do;
else if
%end;
dept="&&dept&i" then do;
%do k=1 %to &&numds&i.;
%if &k.=1 %then %do;
if
%end;
%else %do;
else if
%end;
counter<=&maxobs.*&k. then output dept&&dept&i&&k.;
%end;
end;
%end;
run;
%mend split;
%split(inds=YOUR_DATASET,maxobs=3);
Just replace the INDS parameter value in the %SPLIT macro call to the name of your input data set.