issue with string construction in plsql - regex

I am having a string like
LEAST("col1", "col2") GREATEST("col1", "col2")
and from this string I want to a resultant string like
col1 IN LEAST(v_col1, v_col2) AND col2 IN GREATEST(v_col1, v_col2)
where v_ is appended to the column name and they represent predefined variables.
Number of columns can be dynamic.
I have tried too many options but I am not able to get the desired result.
Please suggest.
This are few options I am trying but seems not moving in right direction.
declare txt varchar2(1000);
anstxt varchar2(1000);
begin
txt := 'LEAST("col1","col2")';
select REGEXP_SUBSTR(TXT,'"([^"]*)"',1,1,'',1) into anstxt from dual;
DBMS_OUTPUT.PUT_LINE(anstxt);
end;
declare
v_Var5 varchar2(50);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'schemaname' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Var7 := ' IN (';
v_Var8 := ');';
--FOR i in c3
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT LOOP
IF(l_names.COUNT > 1) THEN
v_Var6 := v_Var6 || REPLACE(l_names(indx).column_expression, '"', '');
DBMS_OUTPUT.PUT_LINE(l_names(indx).column_expression);
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
END IF;
END LOOP;
v_Var6 := v_Var7 || REPLACE(v_Var6, ' ', ' AND ') || v_Var8;
--DBMS_OUTPUT.PUT_LINE(v_Var5);
DBMS_OUTPUT.PUT_LINE(v_Var6);
END;
Regards.

After so much trials, I have finally got what I was looking for.
I am sharing this for the forum's reference.
DECLARE
v_Var5 varchar2(1000);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
v_Var9 varchar2(1000);
v_Counter NUMBER;
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'myschema' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Counter := 1;
v_Var7 := ' IN (';
v_Var8 := ');';
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT
LOOP
--v_Counter := l_names.COUNT;
IF(l_names.COUNT > 1) THEN
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
FOR i IN 1..l_names.COUNT LOOP
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1),'v_' || REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1));
END LOOP;
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN (';
CASE
WHEN indx < l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ') AND ';
WHEN indx = l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ');';
END CASE;
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1),'v_' || REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1));
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN ';
v_Var5 := v_Var5 || v_Var9 || v_Var6;
END IF;
END LOOP;
--DBMS_OUTPUT.PUT_LINE('v_Var9' || CHR(13) || v_Var9);
--DBMS_OUTPUT.PUT_LINE('v_Var6' || CHR(13) ||v_Var6);
DBMS_OUTPUT.PUT_LINE('v_Var5' || CHR(13) ||v_Var5);
END;

Related

multipart/form-data request pass BLOB files

I want to pass BLOB files to JIRA. How I can do that? I tried:
DECLARE
l_start TIMESTAMP;
l_duration NUMERIC;
l_result VARCHAR2(32767);
p_array APEX_APPLICATION_GLOBAL.VC_ARR2;
v_login VARCHAR2(100);
v_password VARCHAR2(100);
f_body_blob BLOB;
f_response CLOB;
f_body_clob CLOB;
BEGIN
v_login := 'user';
v_password := 'psw';
SELECT PHOTO INTO f_body_blob FROM LEAN5S_PHOTO WHERE ID = 189;
apex_web_service.g_request_headers.delete;
apex_web_service.g_request_headers(1).name := 'Content-Type';
apex_web_service.g_request_headers(1).value := 'multipart/form-data';
apex_web_service.g_request_headers(2).name := 'X-Atlassian-Token';
apex_web_service.g_request_headers(2).value := 'no-check';
apex_web_service.g_request_headers(3).name := 'file';
apex_web_service.g_request_headers(3).value := 'hand-sanitizer.png';
l_start := systimestamp;
f_response := apex_web_service.make_rest_request(p_url => 'http://site/rest/api/2/issue/key/attachments',
p_http_method => 'POST',
p_body_blob => f_body_blob,
p_username => v_login,
p_password => v_password
);
l_duration := round(extract(second from systimestamp - l_start) * 1000);
dbms_output.put_line(apex_web_service.g_status_code || ' ' || f_response);
END;
But I'm getting request error (500) and file is doesn't uploads:
500 500org.apache.commons.fileupload.FileUploadException:
the request was rejected because no multipart boundary was
foundjava.lang.RuntimeException:
org.apache.commons.fileupload.FileUploadException: the request was
rejected because no multipart boundary was found
My colleague was tried another variant:
DECLARE
p_url VARCHAR2(255) := 'http://site/rest/api/2/issue/key/attachments';
utl_req utl_http.req;
utl_resp utl_http.resp;
req_length BINARY_INTEGER;
response_body VARCHAR2(32767);
p_request_body clob;
l_newline VARCHAR2(50) := chr(13) || chr(10);
lco_boundary CONSTANT VARCHAR2(30) := 'AaB03x';
buffer raw(32767);
amount number(15) := 32767;
offset number(15) := 1;
l_attachment blob;
l_file_name VARCHAR2(255);
l_mime_type VARCHAR2(255);
l_response_header_name varchar2(256);
l_response_header_value varchar2(1024);
l_response_body varchar2(32767);
lang_context integer;
warning varchar2(1000);
blb blob;
tmp_blob blob default EMPTY_BLOB();
dest_offset integer := 1;
src_offset integer := 1;
BEGIN
SELECT PHOTO, FILENAME, MIMETYPE
INTO l_attachment, l_file_name, l_mime_type
FROM LEAN5S_PHOTO
WHERE ID = 156;
p_request_body := l_newline || '--' || lco_boundary || l_newline ||
'Content-Disposition: form-data; name="file"; filename="' ||
l_file_name || '"' || l_newline || 'Content-Type: ' ||
l_mime_type || l_newline ||
'Content-Transfer-Encoding: binary' || l_newline ||
l_newline ||
apex_web_service.blob2clobbase64(l_attachment) ||
l_newline || '--' || lco_boundary || '--';
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb,
p_request_body,
dbms_lob.getlength(p_request_body),
dest_offset,
src_offset,
0,
lang_context,
warning);
dbms_lob.append(blb, l_attachment);
req_length := dbms_lob.getlength(blb);
utl_req := utl_http.begin_request(url => p_url,
method => 'POST',
http_version => 'HTTP/1.1');
utl_http.set_authentication(utl_req, 'user', 'psw', 'Basic');
utl_http.set_header(utl_req, 'X-Atlassian-Token', 'no-check');
utl_http.set_header(utl_req, 'User-Agent', 'Mozilla/4.0');
utl_http.set_header(utl_req,
'Content-Type',
'multipart/form-data; boundary="' || lco_boundary || '"');
dbms_output.put_line(req_length);
IF req_length <= 32767 THEN
utl_http.set_header(utl_req, 'Content-Length', req_length);
utl_http.write_raw(utl_req, blb);
ELSIF req_length > 32767 THEN
utl_http.set_header(utl_req, 'Transfer-Encoding', 'chunked');
WHILE (offset < req_length) LOOP
dbms_lob.read(blb, amount, offset, buffer);
utl_http.write_raw(utl_req, buffer);
offset := offset + amount;
END LOOP;
END IF;
utl_resp := utl_http.get_response(utl_req);
dbms_output.put_line('Response> Status Code: ' || utl_resp.status_code);
for i in 1 .. utl_http.get_header_count(utl_resp) loop
utl_http.get_header(utl_resp, i, l_response_header_name, l_response_header_value);
dbms_output.put_line('Response> ' || l_response_header_name || ': ' || l_response_header_value);
end loop;
utl_http.read_text(utl_resp, l_response_body, 32767);
--utl_http.read_raw(utl_resp, response_body, 32767);
dbms_output.put_line('Response body>');
dbms_output.put_line(l_response_body);
utl_http.end_response(utl_resp);
EXCEPTION
WHEN UTL_HTTP.TOO_MANY_REQUESTS THEN
utl_http.END_RESPONSE(utl_resp);
END;
This variant is working, but file in JIRA is corrupted.
EDIT: Somethink with encoding. I have tried to send .txt file and in JIRA, file content is in Base64 format.

Calculation in one field

i'm new to sql and pl/sql. To practice I was giving an assignment to make a calculator. That part works. But they also want the possibility to type the calculation in the text field and then it needs to work. For example 4+4 (then the = button or enter on your keyboard) or 4+6-3=.
My calculator with buttons works, but not if I type a calculation in the text field. Can anyone help me with this?
This is the code I have in my total:
declare
l_operator varchar2(1) := :P3_OPERATOR;
l_value1 number := :P3_VALUE1;
l_value2 number := :P3_VALUE2;
l_result number := nvl(:P3_VALUE1,0);
begin
case l_operator
when '+' then
l_result := l_value1 + l_value2;
when '-' then
l_result := l_value1 - l_value2;
when '*' then
l_result := l_value1 * l_value2;
when '/' then
l_result := l_value1 / l_value2;
else
null;
end case;
:P3_OPERATOR := null;
:P3_VALUE2 := null;
:P3_VALUE1 := l_result;
:P3_NUMBERFIELD := l_result;
end;
with this for als extra for the +, -, * and \ .
:P12_OPERATOR := '*';
:P12_NUMBERFIELD := :P12_OPERATOR;
and this is the code for all my number buttons:
begin
if :P12_OPERATOR is null then
:P12_VALUE1 := :P12_VALUE1 || 4;
:P12_NUMBERFIELD := :P12_VALUE1;
elsif :P12_OPERATOR is not null then
:P12_VALUE2 := :P12_VALUE2 || 4;
:P12_NUMBERFIELD := :P12_VALUE2;
end if;
end;
This is not a typical way to use SQL or PL/SQL (or APEX which it looks like you are also using)!
You could evaluate any expression typed in with code like this:
begin
execute immediate 'select ' || :P3_NUMBERFIELD || ' from dual' into l_result;
exception
when others then
l_result := 'Invalid input';
end;
The exception part is to stop the calculator going wrong if the user types in nonsense like "hello world" instead of an arithmetic expression. The user would need to type in an expression like 4+4 without typing the equals sign, and then press a button to invoke the process to calculate the result.

PL/SQL Regular expression not working

I have the following code:
declare
l_input clob;
l_output clob;
function check_this_regex(
io_str in out clob
,o_found out clob
) return boolean
is
l_match clob;
begin
dbms_output.put_line('Matching against ->' || io_str || '<-');
l_match := regexp_substr(io_str, '"((y)*)"');
if l_match is null then
return false;
end if;
o_found := l_match;
return true;
end;
begin
l_input := to_clob('x');
dbms_output.put_line('l_input->' || l_input || '<-');
if (check_this_regex(l_input, l_output)) then
dbms_output.put_line('Found: ' || l_output);
else
dbms_output.put_line('Not found');
end if;
end;
Why does this output Found?
The problem should be checking a clob against NULL; editing your check this way
if l_match /* is null */ = empty_clob() then
gives :
l_input->x<-
Matching against ->x<-
Not found
regexp_substr for clob alwas returns not null value.Check example.
declare
v_clob clob;
v_in clob :='a';
v_str varchar2(10);
v_st_in varchar2(10) :='a';
begin
v_clob := regexp_substr(v_in,'xx');
if v_clob is null then
dbms_output.put_line('aaa');
end if;
v_str := regexp_substr(v_st_in,'xx');
if v_str is null then
dbms_output.put_line('aaa');
end if;
end;

Search every column, of every database of every schema for a regular expression

I'm working for a company undergoing acquisition at the moment. They use Oracle 11g and have a requirement for identifying all references to the current company name in their databases and having these listed by the schema/owner, table, column and number of occurrences in that column.
I've used the following with some success, as taken from another answer.
SET SERVEROUTPUT ON SIZE 100000
DECLARE
match_count INTEGER;
BEGIN
FOR T IN
(
SELECT owner, table_name, column_name
FROM all_tab_columns
WHERE
OWNER <> 'SYS' AND DATA_TYPE LIKE '%CHAR%'
) LOOP
EXECUTE IMMEDIATE
'SELECT COUNT(*) FROM ' || t.owner || '.' || t.table_name ||
' WHERE '||t.column_name||' = :1'
INTO MATCH_COUNT
USING 'NAME';
IF MATCH_COUNT > 0 THEN
dbms_output.put_line( t.owner ||' '|| t.table_name ||' '||t.column_name||' '||match_count );
END IF;
END LOOP;
END;
/
However it only finds literal strings of NAME and I also want to find Name, Name Shops, Name Accounts, Name someOtherStringICantGuess etc. So I think i should use a regular expression. I'm fine with the regular expression part, but it's how to incorporate it into the above functionality I'm unsure of. In fact i'm uncertain whether I will be adapting the above code, or doing something completely different.
One last thing: performance and duration of the run of the script are irrelevant and subordinate to the certainty of every column being checked. There is a dedicated environment that mimics production where this script will be deployed so it won't adversely affect the company's customers.
Thanks in advance.
EDIT: Just removed some company specific code...
The simplest method is to surround your search with upper.
SET SERVEROUTPUT ON SIZE 100000
DECLARE
-- set l_wildcard_search to true if you are using wildcards ('%'),
-- false if you want a straight match on the name
-- Wild card searches (like) are not able to use indexes whereas '='
-- potentially can.
l_wildcard_search CONSTANT BOOLEAN := FALSE;
match_count INTEGER;
--
l_searchvalue VARCHAR2 (100) := UPPER ('NAME');
l_cmd VARCHAR2 (200);
BEGIN
FOR t IN (SELECT owner, table_name, column_name
FROM all_tab_columns
WHERE owner NOT IN ('SYS', 'SYSTEM')
AND data_type LIKE '%CHAR%')
LOOP
BEGIN
l_cmd := 'SELECT COUNT(*) FROM '
|| t.owner
|| '.'
|| t.table_name
|| ' WHERE upper('
|| t.column_name
|| ')'
|| CASE WHEN l_wildcard_search THEN ' like ' ELSE ' = ' END
|| ':1';
DBMS_OUTPUT.put_line (l_cmd);
EXECUTE IMMEDIATE l_cmd INTO match_count USING l_searchvalue;
IF match_count > 0
THEN
DBMS_OUTPUT.put_line (t.owner || ' ' || t.table_name || ' ' || t.column_name || ' ' || match_count);
END IF;
EXCEPTION
WHEN OTHERS
THEN
DBMS_OUTPUT.put_line ('Error executing: ' || l_cmd);
END;
END LOOP;
END;
/
Here is your answer using regular expressions
SET SERVEROUTPUT ON SIZE 100000
DECLARE
match_count INTEGER;
l_searchvalue VARCHAR2 (100) := UPPER ('NAME');
l_cmd VARCHAR2 (200);
BEGIN
FOR t IN (SELECT owner, table_name, column_name
FROM all_tab_columns
WHERE owner NOT IN ('SYS', 'SYSTEM')
AND data_type LIKE '%CHAR%' and rownum < 10)
LOOP
BEGIN
l_cmd := 'SELECT COUNT(*) FROM '
|| t.owner
|| '.'
|| t.table_name
|| ' WHERE regexp_like('
|| t.column_name
|| ', :1)';
DBMS_OUTPUT.put_line (l_cmd);
EXECUTE IMMEDIATE l_cmd INTO match_count USING l_searchvalue;
IF match_count > 0
THEN
DBMS_OUTPUT.put_line (t.owner || ' ' || t.table_name || ' ' || t.column_name || ' ' || match_count);
END IF;
EXCEPTION
WHEN OTHERS
THEN
DBMS_OUTPUT.put_line ('Error executing: ' || l_cmd);
END;
END LOOP;
END;
/

In Oracle what is the fastest way to limit the characters in a string?

I have quite a number of text fields I need to deal with. In order to process them the first thing I need to do is to normalize the set of characters I deal with. I need my output string to contain the following;
A-Z, 0-9 and space and I want all lower case converted to upper case.
So I use the following in pl/sql;
X := UPPER(TRIM(REGEXP_REPLACE
(REGEXP_REPLACE(X, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
This is rather slow. What would be faster?
You can try this approach, which looks to be much faster based on some (very) loose tests. Its a function that's compiled natively:
CREATE OR REPLACE function clean_string(
in_string in varchar2)
return varchar2 AS
out_string varchar2(4000) := '';
in_length number;
cnt number := 0;
in_char char(1);
out_char char(1);
dec_char number;
prev_space boolean := false;
begin
--dbms_output.put_line('In string: ' || in_string);
in_length := LENGTH(in_string);
while cnt < in_length
LOOP
cnt := cnt + 1;
in_char := substr(in_string, cnt, 1);
dec_char := ascii(in_char);
-- blank out non alphanumerics
IF (
(dec_char >= 48 AND dec_char <= 57) OR
(dec_char >= 65 AND dec_char <= 90) OR
(dec_char >= 97 AND dec_char <= 122)
) THEN
--keep it
out_char := in_char;
ELSE
out_char := ' ';
END IF;
IF (NOT(prev_space AND out_char = ' ')) THEN
out_string := out_string || out_char;
END IF;
<<endloop>>
IF (out_char = ' ') THEN
prev_space := true;
ELSE
prev_space := false;
END IF;
END LOOP;
return trim(upper(out_string));
end;
ALTER SESSION SET PLSQL_CODE_TYPE=NATIVE;
ALTER function clean_string COMPILE;
And to test, I pulled 5 million rows from a table and cleaned some strings:
set serveroutput on
declare
cursor sel_cur1 is
select name, clean_string(name) as cln_name,
address1, clean_string(address1) as cln_addr1,
address2, clean_string(address2) as cln_addr2,
city, clean_string(city) as cln_city,
state, clean_string(state) as cln_state,
postalcode, clean_string(postalcode) as cln_zip
from my_table
where rownum <= 5000000;
cursor sel_cur2 is
select name,
address1,
address2,
city,
state,
postalcode
from my_table
where rownum <= 5000000;
l_cnt integer := 0;
l_cln_name varchar2(100);
l_cln_addr1 varchar2(100);
l_cln_addr2 varchar2(100);
l_cln_city varchar2(100);
l_cln_state varchar2(100);
l_cln_zip varchar2(100);
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
begin
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := clean_string(rec.name);
l_cln_addr1 := clean_string(rec.address1);
l_cln_addr2 := clean_string(rec.address2);
l_cln_city := clean_string(rec.city);
l_cln_state := clean_string(rec.state);
l_cln_zip := clean_string(rec.postalcode);
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Procedural approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur1
loop
-- cleaning already done in SQL
l_cnt := l_cnt + 1;
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('SQL approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.name, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr1 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address1, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr2 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address2, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_city := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.city, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_state := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.state, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_zip := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.postalcode, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Existing approach timing: ' || l_interval);
end;
And the output was:
Procedural approach timing: +00 00:02:04.0320
SQL approach timing: +00 00:02:49.4326
Existing approach timing: +00 00:05:50.1607
Also, the native compilation seems to only help a procedural approach to the processing (rather than calling the function from a SQL query), but appears to be much faster than the regexp_replace solution. Hope that helps.
First, let me say that I am not really answering my own question, but I am accepting tbone's answer. The reason for providing this answer, is the comments don't let me post what I really want.
I created a function almost identical to tbone's with a couple of tweaks, got rid of the UPPER by changing how I handle the lower case range of characters, and changed numbers to binary_integers.
FUNCTION CLEAN_STRING(IN_STRING in VARCHAR2) RETURN VARCHAR2
AS
OUT_STRING VARCHAR2(32767) := '';
IN_LENGTH BINARY_INTEGER;
CNT BINARY_INTEGER := 0;
IN_CHAR CHAR(1);
OUT_CHAR CHAR(1);
DEC_CHAR BINARY_INTEGER;
PREV_SPACE BOOLEAN := FALSE;
BEGIN
IN_LENGTH := LENGTH(IN_STRING);
WHILE CNT < IN_LENGTH
LOOP
CNT := CNT + 1;
IN_CHAR := SUBSTR(IN_STRING, CNT, 1);
DEC_CHAR := ASCII(IN_CHAR);
-- blank out non alphanumerics
IF ((DEC_CHAR >= 48 AND DEC_CHAR <= 57) OR
(DEC_CHAR >= 65 AND DEC_CHAR <= 90))
THEN
--keep it
OUT_CHAR := IN_CHAR;
ELSE
IF (DEC_CHAR >= 97 AND DEC_CHAR <= 122)
THEN
OUT_CHAR := CHR(DEC_CHAR - 32);
ELSE
OUT_CHAR := ' ';
END IF;
END IF;
IF (NOT(PREV_SPACE AND OUT_CHAR = ' '))
THEN
OUT_STRING := OUT_STRING || OUT_CHAR;
END IF;
<<endloop>>
IF (OUT_CHAR = ' ') THEN
PREV_SPACE := TRUE;
ELSE
PREV_SPACE := FALSE;
END IF;
END LOOP;
RETURN TRIM(OUT_STRING);
END CLEAN_STRING;
I then created a simple test rig like tbone did, but I tested the three different routines against each other. First I verify that they all return the same results and then time each routine. Here is the test rig;
set serveroutput on
DECLARE
CURSOR PATHMAST_CURS
IS
SELECT PATHMAST_TEXT_DIAGNOSIS FROM PATHMAST WHERE ROWNUM < 100000;
DUMMY CLOB;
DUMMY_1 CLOB;
DUMMY_2 CLOB;
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
diff_count_1 binary_integer := 0;
diff_count_2 binary_integer := 0;
BEGIN
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '), '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
DUMMY_1 := pathmast_utility_3.CLEAN_STRING(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '));
DUMMY_2 := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
IF DUMMY_1 != DUMMY
THEN
diff_count_1 := diff_count_1 + 1;
END IF;
IF DUMMY_2 != DUMMY
THEN
diff_count_2 := diff_count_2 + 1;
dbms_output.put_line('Regexp: ' || DUMMY);
dbms_output.put_line('Translate: ' || DUMMY_2);
END IF;
END LOOP;
dbms_output.put_line('CLEAN_STRING differences: ' || diff_count_1);
dbms_output.put_line('Translate differences: ' || diff_count_2);
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(PATH_REC.PATHMAST_TEXT_DIAGNOSIS, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Regexp approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := pathmast_utility_3.CLEAN_STRING(PATH_REC.PATHMAST_TEXT_DIAGNOSIS);
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('CLEAN_STRING approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('TRANSLATE approach timing: ' || l_interval);
-------------------------------------------------
END;
And here are the results;
anonymous block completed
CLEAN_STRING differences: 0
Translate differences: 0
Regexp approach timing: +00 00:00:52.9160
CLEAN_STRING approach timing: +00 00:00:05.5220
TRANSLATE approach timing: +00 00:00:13.4320
This is all without compiling native. So tbone is the big winner. Thank you tbone.
If for whatever reason you want/need to use the translate version, you should build the translate string programmatically in order to get all of the special characters.
Perhaps, you can use TRANSLATE instead of regex to remove special characters and convert lower case to upper case.
regexp_replace(
trim(
translate(x,
'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()_+-={[}]|/\"'':;,.<>?',
'ABCDEFGHIJKLMNOPQRSTUVWXYZ '
)
),
' {2,}',
' '
)
Tried it on a table with 1000 rows and column with random characters from anywhere between 1 to 4000.
Resulted in around 35% less time.(Did not try in PLSQL).