Extract string from a large string oracle regexp - regex

I have String as below.
select b.col1,a.col2,lower(a.col3) from table1 a inner join table2 b on a.col = b.col and a.col = b.col
inner join (select col1, col2, col3,col4 from tablename ) c on a.col1=b.col2
where
a.col = 'value'
Output need to be table1,table2 and tablename from above string. please let me know the regex to get the result.

Should be a simple one :-)
SQL> WITH DATA AS(
2 select q'[select b.col1,a.col2,lower(a.col3) from table1 a inner join table2 b on
3 a.col = b.col and a.col = b.col inner join (select col1, col2, col3,col4 from tablename )
4 c on a.col1=b.col2 where a.col = 'value']' str
5 FROM DUAL)
6 SELECT LISTAGG(TABLE_NAMES, ' , ') WITHIN GROUP (
7 ORDER BY val) table_names
8 FROM
9 (SELECT 1 val,
10 regexp_substr(str,'table[[:alnum:]]+',1,level) table_names
11 FROM DATA
12 CONNECT BY level <= regexp_count(str,'table')
13 )
14 /
TABLE_NAMES
--------------------------------------------------------------------------------
table1 , table2 , tablename
SQL>
Brief explanation, so that OP/even others might find it useful :
The REGEXP_SUBSTR looks for the words 'table', it could be followed
by a number or string like 1,2, name etc.
To find all such words, I used connect by level technique, but it
gives the output in different rows.
Finally, to put them in a single row as comma separated values, I
used LISTAGG.
Oh yes, and that q'[]' is the string literal technique.

Related

BigQuery compare all the columns(100+) from two rows in a sinle table

I have input table as below-
id
col1
col2
time
01
abc
001
12:00
01
def
002
12:10
Required output table-
id
col1
col2
time
diff_field
01
abc
001
12:00
null
01
def
002
12:10
col1,col2
I need to compare both the rows and find all the columns for which there is difference in value and keep those column names in a new column diff_field.
I need a optimized solution for this as my table has more than 100 columns(all the columns need to be compared)
You might consider below approach:
WITH sample_table AS (
SELECT '01' id, 'abc' col1, '001' col2, '12:00' time UNION ALL
SELECT '01' id, 'def' col1, '002' col2, '12:10' time UNION ALL
SELECT '01' id, 'def' col1, '002' col2, '12:20' time UNION ALL
SELECT '01' id, 'ddf' col1, '002' col2, '12:30' time
)
SELECT * EXCEPT(curr, prev),
(SELECT STRING_AGG('col' || offset)
FROM UNNEST(SPLIT(curr)) c WITH offset
JOIN UNNEST(SPLIT(prev)) p WITH offset USING (offset)
WHERE c <> p AND offset < ARRAY_LENGTH(SPLIT(curr)) - 1
) diff_field
FROM (
SELECT *, FORMAT('%t', t) AS curr, LAG(FORMAT('%t', t)) OVER w AS prev
FROM sample_table t
WINDOW w AS (PARTITION BY id ORDER BY time)
);
Query results
Below approach has no dependency on actual columns' names or any names convention rather then only id and time
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
select t.*,
( select string_agg(col)
from unnest(extract_keys(cur)) as col with offset
join unnest(extract_values(cur)) as cur_val with offset using(offset)
join unnest(extract_values(prev)) as prev_val with offset using(offset)
where cur_val != prev_val and col != 'time'
) as diff_field
from (
select t, to_json_string(t) cur, to_json_string(ifnull(lag(t) over(win), t)) prev
from your_table t
window win as (partition by id order by time)
)
if apply to sample data in your question (or rather extended version of it that I borrowed from Jaytiger answer) - the output is

How to extract digit from string in SQL?

I have a string which has at least one digit per bracket. Now, I want to extract the digit(s). How do I do this in Redshift sql?
ColumnA ColumnB (output)
(,,,3,) 3
(2,,,) 2
(,,,1) 1
(1,,,3) 13
You could use REGEXP_REPLACE. Here's a snippet:
CREATE TABLE x (col1 varchar(255))
INSERT INTO x VALUES ('(,,,3,)'),('(2,,,)'),('(,,,1)'),('(1,,,3)');
select col1,
regexp_replace(col1,'[^\d]','','g') as col2
from x;
col1
col2
(,,,3,)
3
(2,,,)
2
(,,,1)
1
(1,,,3)
13
Try it in SQLFiddle
Jakob's answer would work. You can also do the same thing with REPLACE:
CREATE TABLE x (col1 varchar(255))
INSERT INTO x VALUES ('(,,,3,)'),('(2,,,)'),('(,,,1)'),('(1,,,3)')
SELECT REPLACE(
REPLACE(
REPLACE(
col1, ',', ''
) ,')', ''
), '(', ''
) FROM x
replace
3
2
1
13
SQLFiddle

Oracle - split the string by comma and get the last sub-str

I wanted to write an Oracle query to extract only the last sub-string of comma separated string like below:
DEST = "1,MMA SALAI,ARIANKUPAM,CITY CENTRE,G12 47H"
I am interested in only G12. How do I get in the Oracle query?
Thanks
Try
REGEXP_SUBSTR('1,MMA SALAI,ARIANKUPAM,CITY CENTRE,G12 47H', '[^,]+$')
But that will fetch G12 47H. You may consider
REGEXP_SUBSTR('1,MMA SALAI,ARIANKUPAM,CITY CENTRE,G12 47H', '([^, ]+)( +[^,]*)?$', 1,1,NULL,1)
This will give G12.
A little bit of substringing (see comments within the code):
SQL> with test (dest) as
2 (select '1,MMA SALAI,ARIANKUPAM,CITY CENTRE,G12 47H' from dual)
3 select
4 regexp_substr(dest, --> out of the DEST, give me ...
5 '\w+', --> ... the first word that begins right after ...
6 instr(dest, ',', 1, regexp_count(dest, ',')) + 1 --> ... postition of the last
7 ) result --> comma in the source string
8 from test;
RESULT
--------------------
G12
SQL>
Or, by splitting the comma-separated values into rows:
SQL> with test (dest) as
2 (select '1,MMA SALAI,ARIANKUPAM,CITY CENTRE,G12 47H' from dual)
3 select regexp_substr(col, '\w+') result
4 from (select regexp_substr(dest, '[^,]+', 1, level) col, --> split column to rows
5 row_number() over (order by level desc) rn --> the last row will be RN = 1
6 from test
7 connect by level <= regexp_count(dest, ',') + 1
8 )
9 where rn = 1;
RESULT
--------------------
G12
SQL>

SELECT MAX PARTITION TABLE

I have a table with partition on date(transaction_time), And I have a
problem with a select MAX.
I'm trying to get the row with the highest timestamp if I get more then 1 row in the result on one ID.
Example of data:
1. ID = 1 , Transaction_time = "2018-12-10 12:00:00"
2. ID = 1 , Transaction_time = "2018-12-09 12:00:00"
3. ID = 2 , Transaction_time = "2018-12-10 12:00:00"
4. ID = 2 , Transaction_time = "2018-12-09 12:00:00"
Result that I want:
1. ID = 1 , Transaction_time = "2018-12-10 12:00:00"
2. ID = 2 , Transaction_time = "2018-12-10 12:00:00"
This is my query
SELECT ID, TRANSACTION_TIME FROM `table1` AS T1
WHERE TRANSACTION_TIME = (SELECT MAX(TRANSACTION_TIME)
FROM `table1` AS T2
WHERE T2.ID = T1.ID )
The error I receive:
Error: Cannot query over table 'table1' without a filter over
column(s) 'TRANSACTION_TIME' that can be used for partition
elimination
It looks like BigQuery does not the correlated subquery in the WHERE clause. I don't know how to fix your current approach, but you might be able to just use ROW_NUMBER here:
SELECT t.ID, t.TRANSACTION_TIME
FROM
(
SELECT ID, TRANSACTION_TIME,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY TRANSACTION_TIME DESC) rn
FROM table1
) t
WHERE rn = 1;
can be done this way:
SELECT id, MAX(transaction_time) FROM `table1` GROUP BY id;

I want to know how to execute CONNECT BY REGEXP in Google Big query

I have following statement in oracle sql I want to run this in Google Big Query.
CONNECT BY REGEXP_SUBSTR(VALUE, '[^,]+', 1, LEVEL) IS NOT NULL)
How can I run above code in Big query?
I am guessing here - but usually this construct is used for so called string decomposition
So, in BigQuery you can use SPLIT(value) or REGEXP_EXTRACT_ALL(value, r'[^,]+') for this as in below examples
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, '1,2,3,4,5,6,7' AS value UNION ALL
SELECT 2, 'a,b,c,d'
)
SELECT id, SPLIT(value) value
FROM `project.dataset.table`
or
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, '1,2,3,4,5,6,7' AS value UNION ALL
SELECT 2, 'a,b,c,d'
)
SELECT id, REGEXP_EXTRACT_ALL(value, r'[^,]+') value
FROM `project.dataset.table`
both above query will return
Row id value
1 1 1
2
3
4
5
6
7
2 2 a
b
c
d
Here, as you can see - value in each row gets split into array of elements but still in the same row
To flatten result you can further use UNNEST() as in below examples
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, '1,2,3,4,5,6,7' AS value UNION ALL
SELECT 2, 'a,b,c,d'
)
SELECT id, value
FROM `project.dataset.table`,
UNNEST(SPLIT(value)) value
or
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, '1,2,3,4,5,6,7' AS value UNION ALL
SELECT 2, 'a,b,c,d'
)
SELECT id, value
FROM `project.dataset.table`,
UNNEST(REGEXP_EXTRACT_ALL(value, r'[^,]+')) value
both return below result (with all extracted elements in separate row)
Row id value
1 1 1
2 1 2
3 1 3
4 1 4
5 1 5
6 1 6
7 1 7
8 2 a
9 2 b
10 2 c
11 2 d