In T-SQL I used to be able to do the following:
delete t1
from table1 t1
join table2 t2 on t1.rowid = t2.rowid and t1.value <> t2.value
I'd like to be able to do the same in SAS.
taking the code above and wrapping in proc sql; and quit; throws a syntax error.
Is below my only option ?
proc sql;
delete from table1 t1
where t1.value <> (select t2.value from table2 t2 where t1.rowid = t2.rowid)
and t1.rowid in (select t2.rowid from table t2);
quit;
Thank you.
So you have probably figured out, that delete is not very efficient.
If you have the disk space, I would recommend just creating a new table based on the inner join (the records you want), drop table1, and rename the results table1.
%let n=1000000;
data table1;
do rowid=1 to &n;
value = rowid**2;
output;
end;
run;
data table2;
do rowid=1 to &n;
value = (mod(rowid,2)=1)*rowid**2;
output;
end;
run;
proc sql noprint;
create table table1_new as
select a.*
from table1 as a
inner join
table2 as b
on a.rowid=b.rowid
and
a.value = b.value;
drop table table1;
quit;
proc datasets lib=work nolist;
change table1_new = table1;
run;
quit;
Related
I was trying to calculate past average stock returns. I find using the following "data step" code is much better than using "proc sql" code.
The data step code:
%macro same(start = ,end = );
proc sql;drop view temp;quit;
proc sql;
create table temp
as select distinct a.*, mean(b.ret_dm) as same_&start._&end, count(b.ret_dm) as sc_&start._&end
from msf1 as a left join msf1 as b
on a.stkcd = b.stkcd and &start <= a.ym - b.ym <= &end and a.month = b.month
group by a.stkcd,a.ym;
quit;
proc sql;
create table same
as select a.*, b.same_&start._&end, b.sc_&start._&end
from same as a left join temp as b
on a.stkcd = b.stkcd and a.ym = b.ym;
quit;
proc sql; drop table temp;quit;
%mend;
data same; set msf;run;
%same(start = 1, end = 12);
The proc sql code:
%macro MA_1;
%do p = 2 %to 9; *;
%put p &p;
proc printto log = junk ; run;
proc sql;
create table price&p
as select distinct a.*, b.count,b.ym
from price&p as a left join tradingdate as b
on a.date = b.date;
quit;
proc sort data = price&p; by stkcd ym date;quit;
data msf;
set price&p;
by stkcd ym date;
if last.ym;
run;
proc printto; run;
%do j = 1 %to %sysfunc(countw(&laglist));
%let lag = %scan(&laglist,&j);
%put lag &lag;
/*********************************************/
proc sql; drop table ma_&lag._&p ;quit;
%do i = 1 %to 2018; *;
proc printto log = junk ; run;
data getname;
set stock;
if _n_ = &i;
call symput('stkcd',stkcd);
run;
proc printto; run;
%put &i &stkcd;
proc printto log = junk ; run;
proc sql;
create table temp
as select distinct a.*, mean(b.prc) as ma_&lag._&p
from msf (where = (stkcd = "&stkcd" )) as a left join price&p (where = (stkcd = "&stkcd" )) as b
on a.stkcd = b.stkcd and 0 <= a.count - b.count <= &lag
group by a.stkcd, a.date
order by a.stkcd, a.date;
quit;
proc append base = ma_&lag._&p data = temp force; quit;
proc printto; run;
%end;
dm "log; clear;";
proc sql;
create table ma_allprc
as select a.*, b.ma_&lag._&p
from ma_allprc as a left join ma_&lag._&p as b
on a.stkcd = b.stkcd and a.date = b.date;
quit;
proc sql; drop table ma_&lag._&p;quit;
%end;
%end;
%mend;
%let laglist = 5 10 20 50 100 200 500 1000 2000; * ;
data ma_allprc; set msf;run;
%ma_1;
"Proc sql" is much slower than I thought. "Data step" takes about 3 hours, but "Proc sql" takes about 2 days.
I even have to loop over each stock when using proc sql, cause it takes up too much of the memory space, I have to say that using proc sql to calculate past averages is dumb, but currently I have no better ideas. :(
Does anybody have a solution with that..
I am working in SAS Enterprise guide and am running a proc sql query as follows:
proc sql;
CREATE TABLE average_apples AS
SELECT farm, size, type, mean(apples) as average_apples
FROM input_table
GROUP BY farm, size, type
;
quit;
For some of the data sets I am running this query on there are groups which have no observations assigned to them, so there is no entry for them in the query output.
How can I force this query to return a row for each of my groups (for example with a value of 0in the apples column?
Thanks up front for the help!
I'd do this:
/* sample input table */
data input_table;
length farm size type $3 apples 8;
stop; /* try also with this statement commented out
to check the result for non-empty input table */
run;
proc sql;
CREATE TABLE average_apples AS
SELECT farm, size, type, mean(apples) as average_apples
FROM input_table
GROUP BY farm, size, type
;
quit;
%let group_rows = &SQLOBS;
%put &group_rows;
data average_apples_blank;
if &group_rows ne 0 then set average_apples(obs=0);
else do;
array zeros {*} _numeric_ /* or your list of variables */;
do i=1 to dim(zeros);
zeros[i] = 0;
end;
output; /* empty row */
end;
drop i;
run;
proc append base=average_apples data=average_apples_blank force;
run;
Try this
proc sql;
select f.farm, s.size, t.type, coalesce(mean(apples), 0) as average_apples
from (select distinct farm from input_table) as f
, (select distinct size from input_table) as s
, (select distinct type from input_table) as t
left join input_table as i
on i.farm = f.farm and i.size = s.size and i.type t.type;
quit;
I did not test it, though. It it does not work, put this in a comment and I will debug it.
I have three piece of code. How can I combine them into one so that they look elegant? data1: pull data with some condition; data2: data1 left join new data; data3: set to data2 and create a new variable.
proc sql; create table data1 as select
a.ID,
b.decison_CD,
c.type,
from
dataA a,
dataB b,
dataC c,
where a.ID=b.ID
and a.ID=c.ID
and c.type not in ('Unknown')
and b.decison_CD in (‘Y’,’N’)
; quit;
proc sql;
create table data2 as select
a.*
,b.payId
from data1 a
left join datanew b
on a.ID=b.ID;
quit;
data data3;
set data2;
if payID= . then booked =0;
else if payID=1 then booked=1;
run;
It looks like you can just use left joins and treat datanew as a fourth dataset:
proc sql;
create table data1 as select
a.ID, b.decison_CD, c.type, d.payId,
case when missing(d.payId) then 0 else
case when d.payID = 1 then 1 end end as booked
from dataA as a
left join dataB (where = (decision_CD in('Y','N'))) as b on a.id = b.id
left join dataC (where = (type notin('Unknown'))) as c on a.id = c.id
left join datanew as d on a.id = d.id;
quit;
I want to store the count distinct of each variable from a table in another. I wanted to use a loop for it, over the list of the variables. So first, I stored the variables names in "vars", doing this:
proc sql ;
select name
into :vars separated by ' '
from dictionary.columns
where libname eq 'HW' and
memname eq "ORDERS";
quit;
Then, I created another list with the result of the count distinct with the following code:
%macro g();
%let b=;
%do i = 1 %to 3;
%let a=%scan(&vars,&i);
proc sql;
select count(distinct &a)
into :gaby from hw.ORDERS;
quit;
%let b=&b &gaby;
%end;
%put &b;
%mend g;
%g();
After this, I wanted to add both to a table, but I can add the vars variable but not the b variable.
data a;
call symput('lista', symget('vars'));
call symput('lista1', symget('b'));
do i=1 to 3;
timept=i;
variable=scan("&vars",i);
dist=scan("&b",i);
output;
end;
run;
The table shows correctly the name of the variables but instead of showing the count distinct (that were stored in b) shows the letter "b".
Is there a way to perform this? also, is there a way to perform it easily?
Thanks!!!!!!!!!!
You're pretty close. I would just use a single SQL pass and create an output table directly. If you want it in a column form, then use PROC TRANSPOSE.
proc sql noprint;
select name
into :vars separated by ' '
from dictionary.columns
where libname eq 'SASHELP' and
memname eq "SHOES";
quit;
%put &vars;
%macro create_table();
proc sql noprint;
%local i n var;
%let n = %sysfunc(countw(&vars));
create table output as
select
%do i=1 %to %eval(&n-1);
%let var = %scan(&vars,&i);
count(distinct &var) as &var,
%end;
%let var = %scan(&vars,&n);
count(distinct &var) as &var
from sashelp.shoes;
quit;
%mend;
%create_table;
proc transpose data=output out=want(rename=(_NAME_=variable COL1=Dist));
run;
I have one column of data and the column is named (Daily_Mileage). I have 15 different types of daily mileages and 250 rows. I want a separate count for each of the 15 daily mileages. I am using PROC SQL in SAS and it does not like the Cross join command. I am not really sure what I should do but this is what I started:
PROC SQL;
select A, B
From (select count(Daily_Mileage) as A from Work.full where Daily_Mileage = 'Farm Utility Vehicle (Class 7)') a
cross join (select count(Daily_Mileage) as B from Work.full where Daily_Mileage = 'Farm Truck Light (Class 35)') b);
QUIT;
Use case statements to define your counts as below.
proc sql;
create table submit as
select sum(case when Daily_Mileage = 'Farm Utility Vehicle (Class 7)'
then 1 else 0 end) as A,
sum(case when Daily_Mileage = 'Farm Truck Light (Class 35)'
then 1 else 0 end) as B
from Work.full
;
quit ;
Can't you just use a proc freq?
data example ;
input #1 Daily_Mileages $5. ;
datalines ;
TYPE1
TYPE1
TYPE2
TYPE3
TYPE3
TYPE3
TYPE3
;
run ;
proc freq data = example ;
table Daily_Mileages ;
run ;
/* Create an output dataset */
proc freq data = example ;
table Daily_Mileages /out=f_example ;
run ;
You can first create another column of ones, then SUM that column and GROUP BY Daily_Mileage. Let me know if I'm misunderstanding your questions.
PROC SQL;
CREATE TABLE tab1 AS
SELECT Daily_Mileage, 1 AS Count, SUM(Count) AS Sum
FROM <Whatever table your data is in>
GROUP BY Daily_Mileage;
QUIT;