Nested While Loop in Redshift - amazon-web-services

Nested While Loop in Redshift - amazon-web-services

My Table_1 looks like this
Parent_Id Child_Id Product Prod_count
1000 1 A 1
1000 2 A+B 1
1000 3 A 1
1000 4 B+C 1
2000 1 A 1
2000 2 B+C 1
2000 3 C 1
2000 4 D 1
I am trying to do Nested Loop here in this procedure,Loop based on Parent_Id, Each parent has different child so that also has to be read each row.
I have tried this
create or replace procedure sp_dummy(IN var1 int, IN var2 int, IN var3 int) as $$
Begin
create temp table find_id as(
select distinct parent_id,row_number() over(order by 1) as rw_num
from table_1
);
declare
tot_cnt int := (select count(distinct parent_id) from find_id );
init_loop int := 1;
in_init_loop int := 1;
in_tot_init_loop int;
v_parent_id int;
Begin
While init_loop <= tot_cnt
Loop
Raise info 'init_loop = %', Init_loop;
Execute 'Select parent_id into ' || v_parent_id || ' from find_id where rw_num = ' || Init_loop;
Raise info 'v_patient_id = %', v_patient_id;
Execute 'Select Count(*) into ' || in_tot_init_loop || ' from Table_1 where Parent_Id = ' || v_parent_id;
While in_init_loop <= in_tot_init_loop
Loop
Raise info 'in_init_loop = %', in_init_loop;
in_init_loop = in_init_loop + 1
End loop;
init_loop = init_loop + 1;
end loop;
End;
End;
$$ language plpgsql;
On trying this I am getting error Cannot Execute a Null Query string
I gave up on trying understanding this error!! :(

This line seems problematic:
Execute 'Select parent_id into ' || v_parent_id || ' from find_id where rw_num = ' || Init_loop;
The v_parent_id is empty, so it would translate into:
Select parent_id into NULL from find_id where rw_num = 1;
I think you actually wanted to write:
SELECT INTO v_parent_id
parent_id
FROM find_id
WHERE rw_num = Init_loop;
Yes, you can actually put the SQL in-line, rather than having to pass it as a string to EXECUTE. Take a look at the examples in Structure of PL/pgSQL - Amazon Redshift.

Related

AWS Redshift Stored Procedure Abortions

Stored Procedure Abortions
I've got two stored procedures which run every 4 hours. I'm experiencing this odd behaviour where both of these procedures get aborted exactly the same number of times as successful runs (see table below). Using pg_catalog.svl_stored_proc_call to get the proc run status.
When I look at pg_catalog.stl_load_errors I can't see any errors for the runs.
What's the best way to investigate this behaviour?
Code for datawarehouse.p_add_missing_tbls()
DECLARE
row RECORD;
row2 RECORD;
BEGIN
FOR row IN select * from
(
select distinct
concat(concat(lower(regexp_replace(d.service,'-','_')), '_'),
lower(regexp_replace(regexp_replace(d.entity_name,'::',''),'(.)([A-Z]+)','$1_$2'))) as "tbl_name",
concat(concat(concat(concat(lower(regexp_replace(d.service,'-','_')),'_'),lower(regexp_replace(regexp_replace(d.entity_name,'::',''),'(.)([A-Z]+)','$1_$2'))),'_'), d."key") as "key" ,
d.value_type,
case d.value_type
when '0' then 'varchar(32768)'
when '1' then 'numeric(20,10)'
when '2' then 'int(2)'
when '3' then 'timestamp'
when '4' then 'varchar(32768)'
end as "data_type"
from
datawarehouse.definitions d
where
d."key" not in ('id')
)
loop
--List of keys
-- select into row2 '\'' || listagg(distinct "key",'\',\'') || '\'' as keys from datawarehouse.definitions where concat(concat(lower(service),'_'),lower(entity_name)) = row.tbl_name;
-- select into row2 '\'' || listagg(distinct "key",'\',\'') || '\'' as keys from datawarehouse.definitions where concat(concat(lower(definitions.service),'_'),lower(definitions.entity_name)) = row.tbl_name;
select into row2 '\'' || listagg(distinct (concat(concat(concat(concat(lower(regexp_replace(service,'-','_')),'_'),lower(regexp_replace(regexp_replace(entity_name,'::',''),'(.)([A-Z]+)','$1_$2'))),'_'), "key")),'\',\'') || '\'' as keys from datawarehouse.definitions where concat(concat(lower(regexp_replace(definitions.service,'-','_')),'_'),lower(regexp_replace(regexp_replace(definitions.entity_name,'::',''),'(.)([A-Z]+)','$1_$2'))) = row.tbl_name and definitions."key" not in ('id');
--Delete staging tbl
execute 'drop table if exists staging.staging_'||row.tbl_name||';';
--Create staging tbl
EXECUTE 'create table staging.staging_'||row.tbl_name||' AS
select *
from (select
attribute_values.entity_id as '||row.tbl_name||'_id,
-- definitions."key",
concat(concat(concat(concat(lower(regexp_replace(definitions.service,''-'',''_'')),''_''),lower(regexp_replace(regexp_replace(definitions.entity_name,''::'',''''),''(.)([A-Z]+)'',''$1_$2''))),''_''), definitions."key")::varchar as "key",
concat(concat(lower(regexp_replace(definitions.service,''-'',''_'')),''_''),lower(regexp_replace(regexp_replace(definitions.entity_name,''::'',''''),''(.)([A-Z]+)'',''$1_$2''))) as "tbl_name",
attribute_values.updated_at as "updated_at",
attribute_values.destroyed_upstream as "deleted_upstream",
case definitions.value_type
when ''0'' then attribute_values.string_value::varchar
when ''1'' then attribute_values.number_value::varchar
when ''2'' then attribute_values.boolean_value::varchar
when ''3'' then attribute_values.datetime_value::varchar
when ''4'' then attribute_values.array_value::varchar
end as "final_value"
from
datawarehouse.attribute_values
left join
datawarehouse.definitions
on
definitions.id = attribute_values.definition_id
where
attribute_values.updated_at>= coalesce(((select max(updated_at) from datawarehouse.'|| row.tbl_name || ' )), (select min(av.updated_at) from datawarehouse.attribute_values av ))
and
tbl_name='''||row.tbl_name||'''
and
definitions."key" not in (''id'')
order by
entity_id desc)
PIVOT (max(final_value) for "key" in ( ' || row2.keys || ' )
);';
-- Drop staging.staging_col_info_v
-- execute 'drop view staging.staging_col_info_v;';
-- Create view
execute ' create or replace view staging.staging_col_info_v AS
with staging_tbl_info as (
select
d.table_schema ,
d.table_name ,
d.column_name as "column_name"
from
pg_catalog.svv_columns d
where
d.table_schema = ''staging''
and
d.table_name like ''staging_%''
),
tbl_info as (
select
col_name,
data_type
from
datawarehouse.tbl_col_v
)
select
*
from
staging_tbl_info s
inner join
tbl_info h
on
h.col_name = s.column_name;';
END LOOP;
RETURN;

How to use string as column name in Bigquery

There is a scenario where I receive a string to the bigquery function and need to use it as a column name.
here is the function
CREATE OR REPLACE FUNCTION METADATA.GET_VALUE(column STRING, row_number int64) AS (
(SELECT column from WORK.temp WHERE rownumber = row_number)
);
When I call this function as select METADATA.GET_VALUE("TXCAMP10",149); I get the value as TXCAMP10 so we can say that it is processed as SELECT "TXCAMP10" from WORK.temp WHERE rownumber = 149 but I need it as SELECT TXCAMP10 from WORK.temp WHERE rownumber = 149 which will return some value from temp table lets suppose the value as A
so ultimately I need value A instead of column name i.e. TXCAMP10.
I tried using execute immediate like execute immediate("SELECT" || column || "from WORK.temp WHERE rownumber =" ||row_number) from this stack overflow post to resolve this issue but turns out I can't use it in a function.
How do I achieve required result?

I don't think you can achieve this result with the help of UDF in standard SQL in BigQuery.
But it is possible to do this with stored procedures in BigQuery and EXECUTE IMMEDIATE statement. Consider this code, which simulates the situation you have:
create or replace table d1.temp(
c1 int64,
c2 int64
);
insert into d1.temp values (1, 1), (2, 2);
create or replace procedure d1.GET_VALUE(column STRING, row_number int64, out result int64)
BEGIN
EXECUTE IMMEDIATE 'SELECT ' || column || ' from d1.temp where c2 = ?' into result using row_number;
END;
BEGIN
DECLARE result_c1 INT64;
call d1.GET_VALUE("c1", 1, result_c1);
select result_c1;
END;

After some research and trial-error methods, I used this workaround to solve this issue. It may not be the best solution when you have too many columns but it surely works.
CREATE OR REPLACE FUNCTION METADATA.GET_VALUE(column STRING, row_number int64) AS (
(SELECT case
when column_name = 'a' then a
when column_name = 'b' then b
when column_name = 'c' then c
when column_name = 'd' then d
when column_name = 'e' then e
end from WORK.temp WHERE rownumber = row_number)
);
And this gives the required results.
Point to note: the number of columns you use in the case statement should be of the same datatype else it won't work

Big query analytical function not giving expected results

I am trying to write a sql in bigquery and I have a requirement to filter records based on a group by column and another column in the table
what I mean is I want to check if the group by column(column name:mnt) has more than one row then I have to check if col2 (col name: zel) value, then I have to apply a filter saying col2 ='X' and only pass that record else pass i.e dont filter the records if the col1 has only distinct one value per group
So I have written a sql to do this I have used row_number as well as rank , dense rank function but I noticed the value of rank and dense rank and row number functions return same value for a group
Please see the below code
#standardsql
with t1 as (SELECT mnt,
case when rank() over (partition by ltrim(rtrim(mnt)) order by
ltrim(rtrim(mnt)) asc) >1 then 'Y' else 'N' end
as flag,
rank() over (partition by mnt order by mnt) as rn,
dense_rank() over (partition by mnt order by mnt) as drn, FROM
projectname.datasetname.tablename1),
t2 as ( SELECT
mnt,
rel,
lif,
lts,
lokez FROM projectname.datasetname.tablename2
WHERE lts <> "" AND _PARTITIONTIME = TIMESTAMP(CURRENT_DATE()) ) ,
t3 as (SELECT
lif,
lifn,
lts,
par FROM `projectname.datasetname.tablename3`)
,t4 as (SELECT rcv FROM `projectname.datasetname.tablename4` WHERE mes
= 'PRO')
select * from (
SELECT t1.mnt as mnt,
t1.flag,
t1.rn,
t1.drn
t2.rel as zel,
t2.lokez as ZLOEKZ,
t4.rcv as Zrcv
FROM t1 left join t2 on replace(t1.mnt, '00000000', '') =
REPLACE(t2.mnt, '00000000', '') AND t1.lif = t2.lif and t2.lts <> ""
and
case when t1.flag = 'Y' and t2.rel ='X' then 1
when (t1.flag ='N' and t2.rel=t2.rel) or (t1.flag ='N' and t2.rel
is null) then 1
when t1.flag = 'Y' and t2.rel <>'X' then 2
else 3
end = 1
left join t3 ON t1.lif = t3.lif AND t2.lts = t3.lts AND
t3.par = 'BA' left join t4 on t4.rcv = t3.lifn and t2.lokez is null )
where ZLOEKZ is null order by mnt
As you can see I am using a case statement and even it seems to be not working fine. I am pasting the case condition below again
case when t1.flag = 'Y' and t2.rel ='X' then 1
when (t1.flag ='N' and t2.rel=t2.rel) or (t1.flag ='N' and
t2.rel
is null) then 1
when t1.flag = 'Y' and t2.rel <>'X' then 2
else 3
end = 1
But the expected record count did not match so I added the above sql lines to see if my analytical functions were giving me result I wanted
rank() over (partition by mnt order by mnt) as rn,
dense_rank() over (partition by mnt order by mnt) as drn
strangely for same mnt number the rank , dense rank and row_number function are assigning the same value what am i doing wrong here.
mnt flag rn drn rel lokez rcv
100 N 1 1 X abc 123
100 N 1 1 null xyz 123
100 N 1 1 null def 234
This is my output
I mean as per my code for same mnt number I am seeing flag set to N instead of Y and for the rank and dense rank are giving me same number for all 3 mnt it is generating 1 instead of 123 (note for rank function I understand) but dense rank should not do that
I tried to convey the issue as efficiently as I could please let me know if there is any clarifications I can provide.
any help appreciated
thanks

SELECT * EXCEPT(ct) FROM (
SELECT *, COUNT() OVER(PARTITION BY mnt) AS ct
) WHERE ct=1 or zel='X'
This is the code snippet for the problem you mentioned. Use this in your code according to the logic.

Pass a list/array in DB2 stored procedure

SELECT cc.clientid
FROM customer_client cc
GROUP BY cc.clientid
HAVING SUM(CASE WHEN cc.customerid IN (4567, 5678) THEN 1 ELSE 0 END) = COUNT(*)
AND COUNT(*) = 2;
I'm calling this query in a Db2 stored procedure where in I've to pass the list of customer id - any working suggestion?
I've tried passing it as below in procedure
CREATE PROCEDURE Find_Client_Customers (
IN IN_CUSTIDS VARCHAR(1000),
IN IN_CUST_COUNT INT)
but this is passing the list as a string.

You may use a string tokenizer:
create function regexp_tokenize_number(
source varchar(1024)
, pattern varchar(128))
returns table (seq int, tok bigint)
contains sql
deterministic
no external action
return
select seq, tok
from xmltable('for $id in tokenize($s, $p) return <i>{string($id)}</i>'
passing
source as "s"
, pattern as "p"
columns
seq for ordinality
, tok bigint path 'if (. castable as xs:long) then xs:long(.) else ()'
) t;
select *
from table(regexp_tokenize_number('123, 456', ',')) t;
SEQ TOK
--- ---
1 123
2 456
In your case:
SELECT cc.clientid
FROM customer_client cc
GROUP BY cc.clientid
HAVING SUM(CASE WHEN cc.customerid IN
(
select t.tok
from table(regexp_tokenize_number('4567, 5678', ',')) t
) THEN 1 ELSE 0 END) = COUNT(*)
AND COUNT(*) = 2;

Convert query to doctrine DQL

I have pretty big MySQL query for performance optimization I'm adding subqueries inside a join statement. With raw SQL all working fine. Here is the query:
SELECT
campaigns.id,
campaigns.name,
CONCAT(users.id, ' ', users.email) AS usersData,
CONCAT(campaigns.cpm, ' ', currencies.currency_code) AS cpm,
CONCAT(campaign_budgets.total_spend, ' ', currencies.currency_code) AS total_spend,
creatives.impressionsCount,
creatives.bidsCount,
creatives.winsAmount,
creatives.winsPercentage,
creatives.creativeIds
FROM campaigns
INNER JOIN users ON campaigns.user_id = users.id
INNER JOIN campaign_budgets ON campaigns.id = campaign_budgets.campaign_id
INNER JOIN currencies ON campaigns.currency_type_id = currencies.id
LEFT JOIN (
SELECT
GROUP_CONCAT(creatives.id) as creativeIds,
creatives.campaign_id,
creatives.user_id,
impressions.impressionsCount,
bids.bidsCount,
bids.winsAmount,
bids.winsPercentage
from creatives
LEFT JOIN (
SELECT
count(impressions.id) as impressionsCount,
impressions.user_id,
impressions.creative_id
from impressions
GROUP BY impressions.user_id
) as impressions ON creatives.user_id = impressions.user_id
LEFT JOIN (
SELECT
count(bids.id) as bidsCount,
SUM(CASE WHEN bids.status = 'won' THEN 1 ELSE 0 END) AS winsAmount,
SUM(CASE WHEN bids.status = 'won' THEN 1 ELSE 0 END) / COUNT(bids.id) * 100 AS winsPercentage,
bids.user_id,
bids.creative_id
from bids
GROUP BY bids.user_id
) as bids ON creatives.user_id = bids.user_id
GROUP BY creatives.campaign_id
) as creatives ON campaigns.id = creatives.campaign_id
GROUP BY campaigns.id
and I need convert it to Doctrine DQL somehow if is it possible. I've faced an issue when adding a subquery to join statement. Here is my code:
$columns = [
'campaign.id',
'campaign.name',
'CONCAT(owner.id,\' \', owner.email) as ownerEmail',
'CONCAT(campaign.cpm,\' \', currency.currencyCode) as cpm',
'CONCAT(budget.totalSpend,\' \', currency.currencyCode) as totalSpend',
'COUNT(imp.id) as impressionsCount',
'COUNT(bid.id) as totalBidsCount',
'SUM(case when bid.status = \'won\' then 1 else 0 end) as winsAmount',
'SUM(case when bid.status = \'won\' then 1 else 0 end)/COUNT(bid.id)*100 as winsPercentage',
];
$bids = $this->_em->getRepository(Bid::class)
->createQueryBuilder('bids')
->select([
'count(bids.id) as bidsCount',
'SUM(CASE WHEN bids.status = \'won\' THEN 1 ELSE 0 END) AS winsAmount',
'SUM(CASE WHEN bids.status = \'won\' THEN 1 ELSE 0 END) / COUNT(bids.id) * 100 AS winsPercentage',
'bids.userId',
'bids.creativeId'
])->getDQL();
$impressions = $this->_em->getRepository(Impression::class)
->createQueryBuilder('imp')
->select([
'count(imp.id) as impressionsCount',
'imp.userId',
'imp.creativeId'
])->getDQL();
$creative = $this->_em->getRepository(Creative::class) ->createQueryBuilder('cr')->select('cr.id')
->select([
'GROUP_CONCAT(cr.id) as creativeIds',
'cr.campaignId',
'cr.userId',
'impressions.impressionsCount',
'bids.bidsCount',
'bids.winsAmount',
'bids.winsPercentage'
])
->leftJoin(Impression::class, sprintf('(%s) as imp', $impressions), Expr\Join::WITH, 'imp.id = cr.userId')
->leftJoin(Bid::class, sprintf('(%s) as bid', $bids), Expr\Join::WITH, 'bids.id = cr.userId')
->getDQL();
$query = $this->createQueryBuilder('campaign')
->select($columns);
$query
->join('campaign.user', 'owner')
->join('campaign.campaignBudget', 'budget')
->join('campaign.currencyType', 'currency')
->leftJoin(Creative::class, sprintf('(%s) as creative', $creative), Expr\Join::WITH, 'campaign.id = cr.campaignId');
$query->groupBy('campaign.id');
$query->setMaxResults($limit);
$query->setFirstResult($offset);
return $query->getQuery()->useQueryCache(true)->getResult();
I'm getting the error [Syntax Error] line 0, col 626: Error: Expected Doctrine\ORM\Query\Lexer::T_IDENTIFIER, got '(', issue is in adding subquery to join statement.
I would appreciate any help!!

Okay I found a solution using DBAL instead of DQL using this as a reference

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Nested While Loop in Redshift - amazon-web-services

Related

AWS Redshift Stored Procedure Abortions

How to use string as column name in Bigquery

Big query analytical function not giving expected results

Pass a list/array in DB2 stored procedure

Convert query to doctrine DQL

Categories

Resources