PL/SQL Regular expression not working - regex

I have the following code:
declare
l_input clob;
l_output clob;
function check_this_regex(
io_str in out clob
,o_found out clob
) return boolean
is
l_match clob;
begin
dbms_output.put_line('Matching against ->' || io_str || '<-');
l_match := regexp_substr(io_str, '"((y)*)"');
if l_match is null then
return false;
end if;
o_found := l_match;
return true;
end;
begin
l_input := to_clob('x');
dbms_output.put_line('l_input->' || l_input || '<-');
if (check_this_regex(l_input, l_output)) then
dbms_output.put_line('Found: ' || l_output);
else
dbms_output.put_line('Not found');
end if;
end;
Why does this output Found?

The problem should be checking a clob against NULL; editing your check this way
if l_match /* is null */ = empty_clob() then
gives :
l_input->x<-
Matching against ->x<-
Not found

regexp_substr for clob alwas returns not null value.Check example.
declare
v_clob clob;
v_in clob :='a';
v_str varchar2(10);
v_st_in varchar2(10) :='a';
begin
v_clob := regexp_substr(v_in,'xx');
if v_clob is null then
dbms_output.put_line('aaa');
end if;
v_str := regexp_substr(v_st_in,'xx');
if v_str is null then
dbms_output.put_line('aaa');
end if;
end;

Related

issue with string construction in plsql

I am having a string like
LEAST("col1", "col2") GREATEST("col1", "col2")
and from this string I want to a resultant string like
col1 IN LEAST(v_col1, v_col2) AND col2 IN GREATEST(v_col1, v_col2)
where v_ is appended to the column name and they represent predefined variables.
Number of columns can be dynamic.
I have tried too many options but I am not able to get the desired result.
Please suggest.
This are few options I am trying but seems not moving in right direction.
declare txt varchar2(1000);
anstxt varchar2(1000);
begin
txt := 'LEAST("col1","col2")';
select REGEXP_SUBSTR(TXT,'"([^"]*)"',1,1,'',1) into anstxt from dual;
DBMS_OUTPUT.PUT_LINE(anstxt);
end;
declare
v_Var5 varchar2(50);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'schemaname' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Var7 := ' IN (';
v_Var8 := ');';
--FOR i in c3
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT LOOP
IF(l_names.COUNT > 1) THEN
v_Var6 := v_Var6 || REPLACE(l_names(indx).column_expression, '"', '');
DBMS_OUTPUT.PUT_LINE(l_names(indx).column_expression);
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
END IF;
END LOOP;
v_Var6 := v_Var7 || REPLACE(v_Var6, ' ', ' AND ') || v_Var8;
--DBMS_OUTPUT.PUT_LINE(v_Var5);
DBMS_OUTPUT.PUT_LINE(v_Var6);
END;
Regards.
After so much trials, I have finally got what I was looking for.
I am sharing this for the forum's reference.
DECLARE
v_Var5 varchar2(1000);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
v_Var9 varchar2(1000);
v_Counter NUMBER;
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'myschema' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Counter := 1;
v_Var7 := ' IN (';
v_Var8 := ');';
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT
LOOP
--v_Counter := l_names.COUNT;
IF(l_names.COUNT > 1) THEN
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
FOR i IN 1..l_names.COUNT LOOP
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1),'v_' || REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1));
END LOOP;
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN (';
CASE
WHEN indx < l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ') AND ';
WHEN indx = l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ');';
END CASE;
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1),'v_' || REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1));
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN ';
v_Var5 := v_Var5 || v_Var9 || v_Var6;
END IF;
END LOOP;
--DBMS_OUTPUT.PUT_LINE('v_Var9' || CHR(13) || v_Var9);
--DBMS_OUTPUT.PUT_LINE('v_Var6' || CHR(13) ||v_Var6);
DBMS_OUTPUT.PUT_LINE('v_Var5' || CHR(13) ||v_Var5);
END;

How to use REGEXP_LIKE in trigger's When condition?

create or replace trigger emp_trig
before insert or update of salary on emp
for each row
when `REGEXP_LIKE(:new.job_id, 'ac*','i')` -- Here
BEGIN
IF inserting then
:new.commission_pct := 0.20;
elsif (:old.commission_pct is null) then
:new.commission_pct := 0.1;
END IF;
END;
create or replace trigger emp_trig
before insert or update of salary on emp
for each row
when (REGEXP_LIKE(new.job_id, 'ac*','i'))
BEGIN
IF inserting then
:new.commission_pct := 0.20;
elsif (:old.commission_pct is null) then
:new.commission_pct := 0.1;
END IF;
END;
/
Hey. if you are trying to do a simple match then avoid using Regular
expression. Instead go with LIKE and your test condition. Below
snippet illustrates a simple example to suffice your requirement. Hope
it helps
CREATE OR REPLACE TRIGGER emp_trig before
INSERT OR
UPDATE OF sal ON emp FOR EACH row
WHEN (new.job LIKE '%TEST%')
DECLARE
BEGIN
IF inserting THEN
:new.comm := 0.20;
elsif (:old.comm IS NULL) THEN
:new.comm := 0.1;
END IF;
END;

Checking for a pure string using regexp_like

I need to check a "substring of the first 6 characters" of an input string for a pure string.
declare
p_str varchar2(30) := 'ABCD1240';
l_result varchar2(20);
begin
if REGEXP_LIKE(substr(p_str,1,6), '[[:alpha:]]') then
dbms_output.put_line('It is a pure string');
else
dbms_output.put_line('It is an alphanumeric');
end if;
end;
/
I can see that the first 6 characters of the string ABCD1290 is alphanumeric as it contains 12.
But, the output that is printed says otherwise.
Am I doing something wrong with the "alpha" in regexp_like ?
I thought alpha was supposed to be pure characters and not numbers.
Here, ABCD1290 should give me: alphanumeric as output.
ABCDXY90 should be : pure string
Try this:
declare
l_res varchar2(100);
begin
for i in (select 'abcdef123' val from dual union
select '123abc123' from dual union
select '123456abc' from dual)
loop
if REGEXP_LIKE(i.val, '^\D{6}')
then
l_res := 'alpha';
else
l_res := 'numeric';
end if;
dbms_output.put_line(i.val || ' is ' || l_res);
end loop;
end;
123456abc is numeric
123abc123 is numeric
abcdef123 is alpha

In Oracle what is the fastest way to limit the characters in a string?

I have quite a number of text fields I need to deal with. In order to process them the first thing I need to do is to normalize the set of characters I deal with. I need my output string to contain the following;
A-Z, 0-9 and space and I want all lower case converted to upper case.
So I use the following in pl/sql;
X := UPPER(TRIM(REGEXP_REPLACE
(REGEXP_REPLACE(X, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
This is rather slow. What would be faster?
You can try this approach, which looks to be much faster based on some (very) loose tests. Its a function that's compiled natively:
CREATE OR REPLACE function clean_string(
in_string in varchar2)
return varchar2 AS
out_string varchar2(4000) := '';
in_length number;
cnt number := 0;
in_char char(1);
out_char char(1);
dec_char number;
prev_space boolean := false;
begin
--dbms_output.put_line('In string: ' || in_string);
in_length := LENGTH(in_string);
while cnt < in_length
LOOP
cnt := cnt + 1;
in_char := substr(in_string, cnt, 1);
dec_char := ascii(in_char);
-- blank out non alphanumerics
IF (
(dec_char >= 48 AND dec_char <= 57) OR
(dec_char >= 65 AND dec_char <= 90) OR
(dec_char >= 97 AND dec_char <= 122)
) THEN
--keep it
out_char := in_char;
ELSE
out_char := ' ';
END IF;
IF (NOT(prev_space AND out_char = ' ')) THEN
out_string := out_string || out_char;
END IF;
<<endloop>>
IF (out_char = ' ') THEN
prev_space := true;
ELSE
prev_space := false;
END IF;
END LOOP;
return trim(upper(out_string));
end;
ALTER SESSION SET PLSQL_CODE_TYPE=NATIVE;
ALTER function clean_string COMPILE;
And to test, I pulled 5 million rows from a table and cleaned some strings:
set serveroutput on
declare
cursor sel_cur1 is
select name, clean_string(name) as cln_name,
address1, clean_string(address1) as cln_addr1,
address2, clean_string(address2) as cln_addr2,
city, clean_string(city) as cln_city,
state, clean_string(state) as cln_state,
postalcode, clean_string(postalcode) as cln_zip
from my_table
where rownum <= 5000000;
cursor sel_cur2 is
select name,
address1,
address2,
city,
state,
postalcode
from my_table
where rownum <= 5000000;
l_cnt integer := 0;
l_cln_name varchar2(100);
l_cln_addr1 varchar2(100);
l_cln_addr2 varchar2(100);
l_cln_city varchar2(100);
l_cln_state varchar2(100);
l_cln_zip varchar2(100);
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
begin
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := clean_string(rec.name);
l_cln_addr1 := clean_string(rec.address1);
l_cln_addr2 := clean_string(rec.address2);
l_cln_city := clean_string(rec.city);
l_cln_state := clean_string(rec.state);
l_cln_zip := clean_string(rec.postalcode);
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Procedural approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur1
loop
-- cleaning already done in SQL
l_cnt := l_cnt + 1;
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('SQL approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.name, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr1 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address1, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr2 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address2, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_city := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.city, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_state := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.state, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_zip := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.postalcode, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Existing approach timing: ' || l_interval);
end;
And the output was:
Procedural approach timing: +00 00:02:04.0320
SQL approach timing: +00 00:02:49.4326
Existing approach timing: +00 00:05:50.1607
Also, the native compilation seems to only help a procedural approach to the processing (rather than calling the function from a SQL query), but appears to be much faster than the regexp_replace solution. Hope that helps.
First, let me say that I am not really answering my own question, but I am accepting tbone's answer. The reason for providing this answer, is the comments don't let me post what I really want.
I created a function almost identical to tbone's with a couple of tweaks, got rid of the UPPER by changing how I handle the lower case range of characters, and changed numbers to binary_integers.
FUNCTION CLEAN_STRING(IN_STRING in VARCHAR2) RETURN VARCHAR2
AS
OUT_STRING VARCHAR2(32767) := '';
IN_LENGTH BINARY_INTEGER;
CNT BINARY_INTEGER := 0;
IN_CHAR CHAR(1);
OUT_CHAR CHAR(1);
DEC_CHAR BINARY_INTEGER;
PREV_SPACE BOOLEAN := FALSE;
BEGIN
IN_LENGTH := LENGTH(IN_STRING);
WHILE CNT < IN_LENGTH
LOOP
CNT := CNT + 1;
IN_CHAR := SUBSTR(IN_STRING, CNT, 1);
DEC_CHAR := ASCII(IN_CHAR);
-- blank out non alphanumerics
IF ((DEC_CHAR >= 48 AND DEC_CHAR <= 57) OR
(DEC_CHAR >= 65 AND DEC_CHAR <= 90))
THEN
--keep it
OUT_CHAR := IN_CHAR;
ELSE
IF (DEC_CHAR >= 97 AND DEC_CHAR <= 122)
THEN
OUT_CHAR := CHR(DEC_CHAR - 32);
ELSE
OUT_CHAR := ' ';
END IF;
END IF;
IF (NOT(PREV_SPACE AND OUT_CHAR = ' '))
THEN
OUT_STRING := OUT_STRING || OUT_CHAR;
END IF;
<<endloop>>
IF (OUT_CHAR = ' ') THEN
PREV_SPACE := TRUE;
ELSE
PREV_SPACE := FALSE;
END IF;
END LOOP;
RETURN TRIM(OUT_STRING);
END CLEAN_STRING;
I then created a simple test rig like tbone did, but I tested the three different routines against each other. First I verify that they all return the same results and then time each routine. Here is the test rig;
set serveroutput on
DECLARE
CURSOR PATHMAST_CURS
IS
SELECT PATHMAST_TEXT_DIAGNOSIS FROM PATHMAST WHERE ROWNUM < 100000;
DUMMY CLOB;
DUMMY_1 CLOB;
DUMMY_2 CLOB;
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
diff_count_1 binary_integer := 0;
diff_count_2 binary_integer := 0;
BEGIN
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '), '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
DUMMY_1 := pathmast_utility_3.CLEAN_STRING(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '));
DUMMY_2 := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
IF DUMMY_1 != DUMMY
THEN
diff_count_1 := diff_count_1 + 1;
END IF;
IF DUMMY_2 != DUMMY
THEN
diff_count_2 := diff_count_2 + 1;
dbms_output.put_line('Regexp: ' || DUMMY);
dbms_output.put_line('Translate: ' || DUMMY_2);
END IF;
END LOOP;
dbms_output.put_line('CLEAN_STRING differences: ' || diff_count_1);
dbms_output.put_line('Translate differences: ' || diff_count_2);
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(PATH_REC.PATHMAST_TEXT_DIAGNOSIS, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Regexp approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := pathmast_utility_3.CLEAN_STRING(PATH_REC.PATHMAST_TEXT_DIAGNOSIS);
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('CLEAN_STRING approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('TRANSLATE approach timing: ' || l_interval);
-------------------------------------------------
END;
And here are the results;
anonymous block completed
CLEAN_STRING differences: 0
Translate differences: 0
Regexp approach timing: +00 00:00:52.9160
CLEAN_STRING approach timing: +00 00:00:05.5220
TRANSLATE approach timing: +00 00:00:13.4320
This is all without compiling native. So tbone is the big winner. Thank you tbone.
If for whatever reason you want/need to use the translate version, you should build the translate string programmatically in order to get all of the special characters.
Perhaps, you can use TRANSLATE instead of regex to remove special characters and convert lower case to upper case.
regexp_replace(
trim(
translate(x,
'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()_+-={[}]|/\"'':;,.<>?',
'ABCDEFGHIJKLMNOPQRSTUVWXYZ '
)
),
' {2,}',
' '
)
Tried it on a table with 1000 rows and column with random characters from anywhere between 1 to 4000.
Resulted in around 35% less time.(Did not try in PLSQL).

PL/SQL key-value String using Regex

I have a String stored in a table in the following key-value format: "Key1☺Value1☺Key2☺Value2☺KeyN☺ValueN☺".
Given a Key how can I extract the Value? Is regex the easiest way to handle this? I am new to PL/SQL as well as Regex.
In this case, I would use just a regular split and iterate through the resulting array.
public string GetValue(string keyValuePairedInput, string key, char separator)
{
var split = keyValuePairedInput.Split(separator);
if(split.Lenght % 2 == 1)
throw new KeyWithoutValueException();
for(int i = 0; i < split.Lenght; i += 2)
{
if(split[i] == key)
return split[i + 1];
}
throw new KeyNotFoundException();
}
(this was not compiled and is not pl/sql anyway, treat it as pseudocode ☺)
OK I hear your comment...
Making use of pl/sql functions, you might be able to use something like this:
select 'key' as keyValue,
(instr(keyValueStringField, keyValue) + length(keyValue) + 1) as valueIndex,
substr(keyValueStringField, valueIndex, instr(keyValueStringField, '\1', valueIndex) - valueIndex) as value
from Table
For this kind of string slicing and dicing in PL/SQL you will probably have to use regular expressions. Oracle has a number of regular expression functions you can use. The most commonly used one is REGEXP_LIKE which is very similar to the LIKE operator but does RegEx matching.
However you probably need to use REGEXP_INSTR to find the positions where the separators are then use the SUBSTR function to slice up the string at the matched positions. You could also consider using REGEXP_SUBSTR which does the RegEx matching and slicing in one step.
As an alternative to regular expressions...
Assuming you have an input such as this:
Key1,Value1|Key2,Value2|Key3,Value3
You could use some PL/SQL as shown below:
FUNCTION get_value_by_key
(
p_str VARCHAR2
, p_key VARCHAR2
, p_kvp_separator VARCHAR2
, p_kv_separator VARCHAR2
) RETURN VARCHAR2
AS
v_key VARCHAR2(32767);
v_value VARCHAR2(32767);
v_which NUMBER;
v_cur VARCHAR(1);
BEGIN
v_which := 0;
FOR i IN 1..length(p_str)
LOOP
v_cur := substr(p_str,i,1);
IF v_cur = p_kvp_separator
THEN
IF v_key = p_key
THEN
EXIT;
END IF;
v_key := '';
v_value := '';
v_which := 0;
ELSIF v_cur = p_kv_separator
THEN
v_which := 1;
ELSE
IF v_which = 0
THEN
v_key := v_key || v_cur;
ELSE
v_value := v_value || v_cur;
END IF;
END IF;
END LOOP;
IF v_key = p_key
THEN
RETURN v_value;
END IF;
raise_application_error(-20001, 'key not found!');
END;
To get the value for 'Key2' you could do this (assuming your function was in a package called test_pkg):
SELECT test_pkg.get_value_by_key('Key1,Value1|Key2,Value2|Key3,Value3','Key2','|',',') FROM dual