I'd like to execute for~loop using the \R\ delimiter.
DECLARE
v_idx NUMBER := 1;
v_text VARCHAR2(1000);
v_pattern VARCHAR2(32);
BEGIN
v_text := 'GR105^INF^191097-1^CT^test string r01\R\GR109^INF^191097-2^CR^test string r02 (1234)';
v_pattern := '\\R\\';
FOR repeat IN (SELECT trim(REGEXP_SUBSTR(v_text, v_pattern, 1, LEVEL)) item
FROM dual
CONNECT BY LEVEL <= REGEXP_COUNT(v_text, v_pattern))
LOOP
dbms_output.PUT_LINE('--------------- ' || v_idx || ' ---------------');
dbms_output.PUT_LINE('v_a = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 1)));
dbms_output.PUT_LINE('v_b = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 2)));
dbms_output.PUT_LINE('v_c = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 3)));
dbms_output.PUT_LINE('v_d = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 4)));
dbms_output.PUT_LINE('v_e = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 5)));
v_idx := v_idx + 1;
END LOOP;
END;
It works fine with '~' separator and '[^~]+' pattern.
But I don't know how to use '\R\'
You are only matching the delimiter \R\ and not the preceding sub-string. To match the preceding sub-string you can match (.*?)(\\R\\|$) and extract the contents of the first capturing group:
DECLARE
v_idx NUMBER := 1;
v_text VARCHAR2(1000);
v_pattern VARCHAR2(32);
BEGIN
v_text := 'GR105^INF^191097-1^CT^test string r01\R\GR109^INF^191097-2^CR^test string r02 (1234)';
v_pattern := '(.*?)(\\R\\|$)';
FOR repeat IN (SELECT trim(REGEXP_SUBSTR(v_text, v_pattern, 1, LEVEL, NULL, 1)) item
FROM dual
CONNECT BY LEVEL < REGEXP_COUNT(v_text, v_pattern))
LOOP
dbms_output.PUT_LINE('--------------- ' || v_idx || ' ---------------');
dbms_output.PUT_LINE('v_a = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 1)));
dbms_output.PUT_LINE('v_b = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 2)));
dbms_output.PUT_LINE('v_c = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 3)));
dbms_output.PUT_LINE('v_d = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 4)));
dbms_output.PUT_LINE('v_e = ' || trim(REGEXP_SUBSTR(repeat.item, '[^^]+', 1, 5)));
v_idx := v_idx + 1;
END LOOP;
END;
/
Note: LEVEL <= REGEXP_COUNT(v_text, v_pattern) needs to change to LEVEL < REGEXP_COUNT(v_text, v_pattern) as there will be a final zero-width match from the pattern (on .*?$) that needs to be ignored.
Which outputs:
--------------- 1 ---------------
v_a = GR105
v_b = INF
v_c = 191097-1
v_d = CT
v_e = test string r01
--------------- 2 ---------------
v_a = GR109
v_b = INF
v_c = 191097-2
v_d = CR
v_e = test string r02 (1234)
fiddle
Related
I want to pass BLOB files to JIRA. How I can do that? I tried:
DECLARE
l_start TIMESTAMP;
l_duration NUMERIC;
l_result VARCHAR2(32767);
p_array APEX_APPLICATION_GLOBAL.VC_ARR2;
v_login VARCHAR2(100);
v_password VARCHAR2(100);
f_body_blob BLOB;
f_response CLOB;
f_body_clob CLOB;
BEGIN
v_login := 'user';
v_password := 'psw';
SELECT PHOTO INTO f_body_blob FROM LEAN5S_PHOTO WHERE ID = 189;
apex_web_service.g_request_headers.delete;
apex_web_service.g_request_headers(1).name := 'Content-Type';
apex_web_service.g_request_headers(1).value := 'multipart/form-data';
apex_web_service.g_request_headers(2).name := 'X-Atlassian-Token';
apex_web_service.g_request_headers(2).value := 'no-check';
apex_web_service.g_request_headers(3).name := 'file';
apex_web_service.g_request_headers(3).value := 'hand-sanitizer.png';
l_start := systimestamp;
f_response := apex_web_service.make_rest_request(p_url => 'http://site/rest/api/2/issue/key/attachments',
p_http_method => 'POST',
p_body_blob => f_body_blob,
p_username => v_login,
p_password => v_password
);
l_duration := round(extract(second from systimestamp - l_start) * 1000);
dbms_output.put_line(apex_web_service.g_status_code || ' ' || f_response);
END;
But I'm getting request error (500) and file is doesn't uploads:
500 500org.apache.commons.fileupload.FileUploadException:
the request was rejected because no multipart boundary was
foundjava.lang.RuntimeException:
org.apache.commons.fileupload.FileUploadException: the request was
rejected because no multipart boundary was found
My colleague was tried another variant:
DECLARE
p_url VARCHAR2(255) := 'http://site/rest/api/2/issue/key/attachments';
utl_req utl_http.req;
utl_resp utl_http.resp;
req_length BINARY_INTEGER;
response_body VARCHAR2(32767);
p_request_body clob;
l_newline VARCHAR2(50) := chr(13) || chr(10);
lco_boundary CONSTANT VARCHAR2(30) := 'AaB03x';
buffer raw(32767);
amount number(15) := 32767;
offset number(15) := 1;
l_attachment blob;
l_file_name VARCHAR2(255);
l_mime_type VARCHAR2(255);
l_response_header_name varchar2(256);
l_response_header_value varchar2(1024);
l_response_body varchar2(32767);
lang_context integer;
warning varchar2(1000);
blb blob;
tmp_blob blob default EMPTY_BLOB();
dest_offset integer := 1;
src_offset integer := 1;
BEGIN
SELECT PHOTO, FILENAME, MIMETYPE
INTO l_attachment, l_file_name, l_mime_type
FROM LEAN5S_PHOTO
WHERE ID = 156;
p_request_body := l_newline || '--' || lco_boundary || l_newline ||
'Content-Disposition: form-data; name="file"; filename="' ||
l_file_name || '"' || l_newline || 'Content-Type: ' ||
l_mime_type || l_newline ||
'Content-Transfer-Encoding: binary' || l_newline ||
l_newline ||
apex_web_service.blob2clobbase64(l_attachment) ||
l_newline || '--' || lco_boundary || '--';
dbms_lob.createtemporary(blb, FALSE);
dest_offset := 1;
src_offset := 1;
lang_context := 0;
dbms_lob.converttoblob(blb,
p_request_body,
dbms_lob.getlength(p_request_body),
dest_offset,
src_offset,
0,
lang_context,
warning);
dbms_lob.append(blb, l_attachment);
req_length := dbms_lob.getlength(blb);
utl_req := utl_http.begin_request(url => p_url,
method => 'POST',
http_version => 'HTTP/1.1');
utl_http.set_authentication(utl_req, 'user', 'psw', 'Basic');
utl_http.set_header(utl_req, 'X-Atlassian-Token', 'no-check');
utl_http.set_header(utl_req, 'User-Agent', 'Mozilla/4.0');
utl_http.set_header(utl_req,
'Content-Type',
'multipart/form-data; boundary="' || lco_boundary || '"');
dbms_output.put_line(req_length);
IF req_length <= 32767 THEN
utl_http.set_header(utl_req, 'Content-Length', req_length);
utl_http.write_raw(utl_req, blb);
ELSIF req_length > 32767 THEN
utl_http.set_header(utl_req, 'Transfer-Encoding', 'chunked');
WHILE (offset < req_length) LOOP
dbms_lob.read(blb, amount, offset, buffer);
utl_http.write_raw(utl_req, buffer);
offset := offset + amount;
END LOOP;
END IF;
utl_resp := utl_http.get_response(utl_req);
dbms_output.put_line('Response> Status Code: ' || utl_resp.status_code);
for i in 1 .. utl_http.get_header_count(utl_resp) loop
utl_http.get_header(utl_resp, i, l_response_header_name, l_response_header_value);
dbms_output.put_line('Response> ' || l_response_header_name || ': ' || l_response_header_value);
end loop;
utl_http.read_text(utl_resp, l_response_body, 32767);
--utl_http.read_raw(utl_resp, response_body, 32767);
dbms_output.put_line('Response body>');
dbms_output.put_line(l_response_body);
utl_http.end_response(utl_resp);
EXCEPTION
WHEN UTL_HTTP.TOO_MANY_REQUESTS THEN
utl_http.END_RESPONSE(utl_resp);
END;
This variant is working, but file in JIRA is corrupted.
EDIT: Somethink with encoding. I have tried to send .txt file and in JIRA, file content is in Base64 format.
I am having a string like
LEAST("col1", "col2") GREATEST("col1", "col2")
and from this string I want to a resultant string like
col1 IN LEAST(v_col1, v_col2) AND col2 IN GREATEST(v_col1, v_col2)
where v_ is appended to the column name and they represent predefined variables.
Number of columns can be dynamic.
I have tried too many options but I am not able to get the desired result.
Please suggest.
This are few options I am trying but seems not moving in right direction.
declare txt varchar2(1000);
anstxt varchar2(1000);
begin
txt := 'LEAST("col1","col2")';
select REGEXP_SUBSTR(TXT,'"([^"]*)"',1,1,'',1) into anstxt from dual;
DBMS_OUTPUT.PUT_LINE(anstxt);
end;
declare
v_Var5 varchar2(50);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'schemaname' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Var7 := ' IN (';
v_Var8 := ');';
--FOR i in c3
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT LOOP
IF(l_names.COUNT > 1) THEN
v_Var6 := v_Var6 || REPLACE(l_names(indx).column_expression, '"', '');
DBMS_OUTPUT.PUT_LINE(l_names(indx).column_expression);
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
END IF;
END LOOP;
v_Var6 := v_Var7 || REPLACE(v_Var6, ' ', ' AND ') || v_Var8;
--DBMS_OUTPUT.PUT_LINE(v_Var5);
DBMS_OUTPUT.PUT_LINE(v_Var6);
END;
Regards.
After so much trials, I have finally got what I was looking for.
I am sharing this for the forum's reference.
DECLARE
v_Var5 varchar2(1000);
v_Var6 varchar2(1000);
v_Var7 varchar2(10);
v_Var8 varchar2(10);
v_Var9 varchar2(1000);
v_Counter NUMBER;
CURSOR c3 IS
SELECT column_expression FROM all_ind_expressions WHERE table_owner = 'myschema' AND table_name = 'mytable';
names_t c3%ROWTYPE;
TYPE names_ntt IS TABLE OF names_t%TYPE; -- must use type
l_names names_ntt;
BEGIN
v_Counter := 1;
v_Var7 := ' IN (';
v_Var8 := ');';
OPEN c3;
FETCH c3 BULK COLLECT INTO l_names;
CLOSE c3;
FOR indx IN 1..l_names.COUNT
LOOP
--v_Counter := l_names.COUNT;
IF(l_names.COUNT > 1) THEN
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
FOR i IN 1..l_names.COUNT LOOP
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1),'v_' || REGEXP_SUBSTR(l_names(i).column_expression,'"([^"]*)"',1,i,'',1));
END LOOP;
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN (';
CASE
WHEN indx < l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ') AND ';
WHEN indx = l_names.COUNT THEN
v_Var5 := v_Var5 || v_Var9 || v_Var6 || ');';
END CASE;
ELSE
v_Var6 := REPLACE(l_names(indx).column_expression, '"', '');
v_Var6 := REPLACE(v_Var6, REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1),'v_' || REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1));
v_Var9 := REGEXP_SUBSTR(l_names(indx).column_expression,'"([^"]*)"',1,indx,'',1) || ' IN ';
v_Var5 := v_Var5 || v_Var9 || v_Var6;
END IF;
END LOOP;
--DBMS_OUTPUT.PUT_LINE('v_Var9' || CHR(13) || v_Var9);
--DBMS_OUTPUT.PUT_LINE('v_Var6' || CHR(13) ||v_Var6);
DBMS_OUTPUT.PUT_LINE('v_Var5' || CHR(13) ||v_Var5);
END;
After getting report by sql commands in apex, I want to store data of variables sid,serial# corresponding to check boxes which are checked. After that I have to execute alter system kill session 'sid,serial#' in apex itself
Hi there I'll show you the approach used by skillbuilders on this tutorial, which is based on collections
The approach consist on four steps, the idea is to make a report over a collection rather than from a set of tables. I'll make a description of all steps but I strongly suggest that you see the video on the link above for specific details on the code.
First you need to create the collection (and named it) with the information that you
want to show on your report, for this you will make a string with the query itself and then bind the variables that you may (or may not) need. Here's a template for your collection, keep in mind that this template needs to be adjusted to your scenario. In your case what interest you the most will be the items #47 and #50, this are a status field (47) and a check box field(50).
DECLARE
l_collection_name APEX_COLLECTIONS.COLLECTION_NAME%TYPE;
l_bind_names APEX_APPLICATION_GLOBAL.VC_ARR2;
l_bind_values APEX_APPLICATION_GLOBAL.VC_ARR2;
l_query VARCHAR2(32767);
BEGIN
l_collection_name := 'REPLACE_NAME';
IF apex_collection.collection_exists(l_collection_name)
THEN
apex_collection.delete_collection(
p_collection_name => l_collection_name
);
END IF;
--Query to initialize collection
--Do not reorder columns later as that could break the values in the tabular form (reorder in report)
l_query :=
'SELECT col_1, ' --c001 => f01
|| ' col_2, ' --c002 => f02
|| ' col_3, ' --c003 => f03
|| ' col_4, ' --c004 => f04
|| ' col_5, ' --c005 => f05
|| ' NULL, ' --c006 => f06
|| ' NULL, ' --c007 => f07
|| ' NULL, ' --c008 => f08
|| ' NULL, ' --c009 => f09
|| ' NULL, ' --c010 => f10
|| ' NULL, ' --c011 => f11
|| ' NULL, ' --c012 => f12
|| ' NULL, ' --c013 => f13
|| ' NULL, ' --c014 => f14
|| ' NULL, ' --c015 => f15
|| ' NULL, ' --c016 => f16
|| ' NULL, ' --c017 => f17
|| ' NULL, ' --c018 => f18
|| ' NULL, ' --c019 => f19
|| ' NULL, ' --c020 => f20
|| ' NULL, ' --c021 => f21
|| ' NULL, ' --c022 => f22
|| ' NULL, ' --c023 => f23
|| ' NULL, ' --c024 => f24
|| ' NULL, ' --c025 => f25
|| ' NULL, ' --c026 => f26
|| ' NULL, ' --c027 => f27
|| ' NULL, ' --c028 => f28
|| ' NULL, ' --c029 => f29
|| ' NULL, ' --c030 => f30
|| ' NULL, ' --c031 => f31
|| ' NULL, ' --c032 => f32
|| ' NULL, ' --c033 => f33
|| ' NULL, ' --c034 => f34
|| ' NULL, ' --c035 => f35
|| ' NULL, ' --c036 => f36
|| ' NULL, ' --c037 => f37
|| ' NULL, ' --c038 => f38
|| ' NULL, ' --c039 => f39
|| ' NULL, ' --c040 => f40
|| ' NULL, ' --c041 => f41
|| ' NULL, ' --c042 => f42
|| ' NULL, ' --c043 => f43
|| ' NULL, ' --c044 => f44
|| ' NULL, ' --c045 => f45
|| ' NULL, ' --c046 => f46
|| ' ''O'', ' --c047 (for record status)
|| ' wwv_flow_item.md5(col_1, col_2, col_3, col_4, col_5) ' --c048 (for optimistic locking)
--c049 for (not used in collection/reserevered for seq_id array)
--c050 (not used in collection/reservered for delete checkbox array)
|| 'FROM some_table '
|| 'WHERE some_col = :PXX_ITEM_NAME';
l_bind_names(1) := 'PXX_ITEM_NAME';
l_bind_values(1) := v('PXX_ITEM_NAME');
apex_collection.create_collection_from_query_b(
p_collection_name => l_collection_name,
p_query => l_query,
p_names => l_bind_names,
p_values => l_bind_values
);
IF :REQUEST = 'ADD'
THEN
apex_collection.add_member(
p_collection_name => l_collection_name
);
END IF;
END;
Step two Once the collection has been successfully created you can make a report of it by querying directly to the collection, and by using the APEX_ITEM package you'll make this fields into proper items, notice that the fifth item is the checkbox linked to the 50th field on the collection. Once again, the code below it's just an example:
SELECT apex_item.hidden(47,c047,NULL,'f47_'|| '#ROWNUM#')
|| apex_item.hidden(48,c048,NULL,'f48_'|| '#ROWNUM#')
|| apex_item.hidden(49,seq_id,NULL,'f49_'|| '#ROWNUM#')
|| apex_item.hidden(1,c001,NULL,'f01_'|| '#ROWNUM#')
|| apex_item.checkbox(
50,
seq_id,
NULL,
CASE
WHEN c047 = 'D' THEN seq_id
END,
':',
'f50_' || '#ROWNUM#'
) AS delete_checkbox,
apex_item.text(
2,
c002,
20,
50,
NULL,
'f02_' || '#ROWNUM#'
) AS col_2,
apex_item.text(
3,
c003,
20,
50,
NULL,
'f03_' || '#ROWNUM#'
) AS col_3,
apex_item.select_list_from_lov_xl(
4,
NVL(c004, '-1'),
'NAMED_LOV',
NULL,
'YES',
'-1',
'- Select Value -',
'f04_' || '#ROWNUM#'
) AS col_4,
apex_item.select_list_from_lov(
5,
NVL(c005, '-1'),
'NAMED_LOV',
NULL,
'YES',
'-1',
'- Select Value -',
'f05_' || '#ROWNUM#'
) AS col_5
FROM apex_collections
WHERE collection_name = 'REPLACE_NAME'
ORDER BY c003
Step three So, now you have a collection displaying items that could be modified in the page, but will the changes be posted immediately into the collection?...nope, you need to update the collection first, for that you have this code, here it's dealing with updates, row creation and row deletion.
DECLARE
l_collection_name APEX_COLLECTIONS.COLLECTION_NAME%TYPE;
l_original_md5 VARCHAR2(32);
l_latest_md5 VARCHAR2(32);
BEGIN
l_collection_name := 'REPLACE_NAME';
FOR x IN 1 .. apex_application.g_f49.count
LOOP
IF apex_application.g_f01(x) IS NOT NULL --ID exists, check to see if record was updated
THEN
SELECT c048
INTO l_original_md5
FROM apex_collections
WHERE collection_name = l_collection_name
AND seq_id = apex_application.g_f49(x);
l_latest_md5 := wwv_flow_item.md5(
apex_application.g_f01(x),
apex_application.g_f02(x),
apex_application.g_f03(x),
apex_application.g_f04(x),
apex_application.g_f05(x)
);
IF l_original_md5 != l_latest_md5
THEN
apex_collection.update_member(
p_collection_name => l_collection_name,
p_seq => apex_application.g_f49(x),
p_c001 => apex_application.g_f01(x),
p_c002 => apex_application.g_f02(x),
p_c003 => apex_application.g_f03(x),
p_c004 => apex_application.g_f04(x),
p_c005 => apex_application.g_f05(x),
p_c047 => 'U',
p_c048 => apex_application.g_f48(x)
);
END IF;
ELSE --ID does not exist, must be new record
apex_collection.update_member(
p_collection_name => l_collection_name,
p_seq => apex_application.g_f49(x),
p_c001 => apex_application.g_f01(x),
p_c002 => apex_application.g_f02(x),
p_c003 => apex_application.g_f03(x),
p_c004 => apex_application.g_f04(x),
p_c005 => apex_application.g_f05(x),
p_c047 => 'N',
p_c048 => apex_application.g_f48(x)
);
END IF;
END LOOP;
IF :REQUEST = 'DELETE'
THEN
FOR x IN 1 .. apex_application.g_f50.count
LOOP
apex_collection.update_member_attribute(
p_collection_name => l_collection_name,
p_seq => apex_application.g_f50(x),
p_attr_number => '47',
p_attr_value => 'D'
);
END LOOP;
END IF;
END;
Finally Step Four Here we update the table/s based on the changes made to the collection. Using the 47th and 50th fields we know which ones should be updated, deleted, or created:
DECLARE
l_table_md5 VARCHAR2(32);
l_collection_name APEX_COLLECTIONS.COLLECTION_NAME%TYPE;
l_del_count PLS_INTEGER := 0;
l_upd_count PLS_INTEGER := 0;
l_ins_count PLS_INTEGER := 0;
l_success_message VARCHAR2(32767);
CURSOR op_lock_check_cur (
p_id IN NUMBER
)
IS
SELECT wwv_flow_item.md5(col_1, col_2, col_3, col_4, col_5)
FROM some_table
WHERE col_1 = op_lock_check_cur.p_id
FOR UPDATE;
BEGIN
l_collection_name := 'REPLACE_NAME';
FOR x IN (
SELECT *
FROM apex_collections
WHERE collection_name = l_collection_name
AND c047 IN ('N','U','D')
)
LOOP
IF x.c047 = 'N'
THEN
INSERT INTO some_table(
col_1,
col_2,
col_3,
col_4,
col_5
) VALUES (
some_table_seq.nextval,
x.c002,
x.c003,
x.c004,
x.c005
);
l_ins_count := l_ins_count + 1;
ELSIF x.c047 = 'U'
THEN
--Optimistic lock check
--MD5 should be identical to collection initialization proc
OPEN op_lock_check_cur(x.c001);
FETCH op_lock_check_cur INTO l_table_md5;
-- Compare the checksum values and if they are different, raise an error.
IF l_table_md5 != x.c048
THEN
raise_application_error(
-20001,
'Current version of data in database has changed '
|| 'since user initiated update process.'
);
END IF;
UPDATE some_table
SET col_2 = x.c002,
col_3 = x.c003,
col_4 = x.c004,
col_5 = x.c005
WHERE CURRENT OF op_lock_check_cur;
CLOSE op_lock_check_cur;
l_upd_count := l_upd_count + 1;
ELSIF x.c047 = 'D'
THEN
DELETE FROM some_table
WHERE col_1 = x.c001;
l_del_count := l_del_count + 1;
END IF;
END LOOP;
apex_collection.delete_collection(p_collection_name => l_collection_name);
l_success_message :=
l_ins_count || ' rows inserted, ' ||
l_upd_count || ' rows updated, ' ||
l_del_count || ' rows deleted';
:PX_SUCCESS_MESSAGE := l_success_message;
END;
That would be all, but once again look the tutorial from tip to toe, there's a lot of info that I can't post in here.
Disclaimer: I'm not a skillbuilders representative, I don't own any rights on the video and I'm not promoting skillbuilders web page in any way. I'm just a programmer that found this tutorial useful.
So con is simply a condition that I am matching coming from a date generator I built. All output from this function is immutable. So I had the 'awesome' task of converting these outputs to strings. Reason being I wanted to append/prepend markup to the output. This gets very cumbersome when dealing with a lot of variables. 365 days to be exact.
con0 = str(context[0])
con1 = str(context[1])
con2 = str(context[2])
con3 = str(context[3])
con4 = str(context[4])
con5 = str(context[5])
con6 = str(context[6])
con7 = str(context[7])
con8 = str(context[8])
con9 = str(context[9])
con10 = str(context[10])
con11 = str(context[11])
con12 = str(context[12])
...
con364 = str(context[364])
day0 = con0[0:10].replace("-", "");
day1 = con1[0:10].replace("-", "");
day2 = con2[0:10].replace("-", "");
day3 = con3[0:10].replace("-", "");
day4 = con4[0:10].replace("-", "");
day5 = con5[0:10].replace("-", "");
day6 = con6[0:10].replace("-", "");
day7 = con7[0:10].replace("-", "");
day8 = con8[0:10].replace("-", "");
day9 = con9[0:10].replace("-", "");
day10 = con10[0:10].replace("-", "");
day11 = con11[0:10].replace("-", "");
day12 = con12[0:10].replace("-", "");
...
day364 = con364[0:10].replace("-", "");
exclude = [ ' "/' + year0 + "/" + day0 + "*" + '"', ' "/' + year0 + "/" + day1 + "*" + '"', ' "/' + year0 + "/" + day2 + "*" + '"', ' "/' + year0 + "/" + day3 + "*" + '"', ' "/' + year0 + "/" + day4 + "*" + '"', ' "/' + year0 + "/" + day5 + "*" + '"', ' "/' + year0 + "/" + day6 + "*" + '"', ' "/' + year0 + "/" + day7 + "*" + '"', ' "/' + year0 + "/" + day8 + "*" + '"', ' "/' + year0 + "/" + day9 + "*" + '"', ' "/' + year0 + "/" + day10 + "*" + '"', ' "/' + year0 + "/" + day11 + "*" + '"', ' "/' + year0 + "/" + day12 ... + year0 + "/" + day364 + "*" + '"' ]
d0 = ' "*%s*"\n' % (day0)
y0 = ' "/%s/*"\n' % (year0)
w0 = ' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n'' %s\n' % (exclude[7],exclude[8],exclude[9],exclude[10],exclude[11],exclude[12]....exclude[364])
Is there a more pythonic way to make bulk string substitutions and generate lists easier than using my for i bash loops to build them for me?
When you have many variables all ending with a number, that's an excellent sign that you should be using a single list instead. You can concisely construct a list using list comprehensions.
cons = [str(context[i]) for i in range(365)]
days = [con[0:10].replace("-", "") for con in cons]
exclude = [' "/{}/{}*"'.format(year0, day) for day in days]
w0 = "\n".join(" " + day for for day in days)
I have quite a number of text fields I need to deal with. In order to process them the first thing I need to do is to normalize the set of characters I deal with. I need my output string to contain the following;
A-Z, 0-9 and space and I want all lower case converted to upper case.
So I use the following in pl/sql;
X := UPPER(TRIM(REGEXP_REPLACE
(REGEXP_REPLACE(X, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
This is rather slow. What would be faster?
You can try this approach, which looks to be much faster based on some (very) loose tests. Its a function that's compiled natively:
CREATE OR REPLACE function clean_string(
in_string in varchar2)
return varchar2 AS
out_string varchar2(4000) := '';
in_length number;
cnt number := 0;
in_char char(1);
out_char char(1);
dec_char number;
prev_space boolean := false;
begin
--dbms_output.put_line('In string: ' || in_string);
in_length := LENGTH(in_string);
while cnt < in_length
LOOP
cnt := cnt + 1;
in_char := substr(in_string, cnt, 1);
dec_char := ascii(in_char);
-- blank out non alphanumerics
IF (
(dec_char >= 48 AND dec_char <= 57) OR
(dec_char >= 65 AND dec_char <= 90) OR
(dec_char >= 97 AND dec_char <= 122)
) THEN
--keep it
out_char := in_char;
ELSE
out_char := ' ';
END IF;
IF (NOT(prev_space AND out_char = ' ')) THEN
out_string := out_string || out_char;
END IF;
<<endloop>>
IF (out_char = ' ') THEN
prev_space := true;
ELSE
prev_space := false;
END IF;
END LOOP;
return trim(upper(out_string));
end;
ALTER SESSION SET PLSQL_CODE_TYPE=NATIVE;
ALTER function clean_string COMPILE;
And to test, I pulled 5 million rows from a table and cleaned some strings:
set serveroutput on
declare
cursor sel_cur1 is
select name, clean_string(name) as cln_name,
address1, clean_string(address1) as cln_addr1,
address2, clean_string(address2) as cln_addr2,
city, clean_string(city) as cln_city,
state, clean_string(state) as cln_state,
postalcode, clean_string(postalcode) as cln_zip
from my_table
where rownum <= 5000000;
cursor sel_cur2 is
select name,
address1,
address2,
city,
state,
postalcode
from my_table
where rownum <= 5000000;
l_cnt integer := 0;
l_cln_name varchar2(100);
l_cln_addr1 varchar2(100);
l_cln_addr2 varchar2(100);
l_cln_city varchar2(100);
l_cln_state varchar2(100);
l_cln_zip varchar2(100);
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
begin
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := clean_string(rec.name);
l_cln_addr1 := clean_string(rec.address1);
l_cln_addr2 := clean_string(rec.address2);
l_cln_city := clean_string(rec.city);
l_cln_state := clean_string(rec.state);
l_cln_zip := clean_string(rec.postalcode);
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Procedural approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur1
loop
-- cleaning already done in SQL
l_cnt := l_cnt + 1;
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('SQL approach timing: ' || l_interval);
-------------------------------------------------
l_cnt := 0;
l_start := systimestamp;
for rec in sel_cur2
loop
l_cnt := l_cnt + 1;
l_cln_name := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.name, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr1 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address1, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_addr2 := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.address2, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_city := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.city, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_state := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.state, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
l_cln_zip := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(rec.postalcode, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
end loop;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Existing approach timing: ' || l_interval);
end;
And the output was:
Procedural approach timing: +00 00:02:04.0320
SQL approach timing: +00 00:02:49.4326
Existing approach timing: +00 00:05:50.1607
Also, the native compilation seems to only help a procedural approach to the processing (rather than calling the function from a SQL query), but appears to be much faster than the regexp_replace solution. Hope that helps.
First, let me say that I am not really answering my own question, but I am accepting tbone's answer. The reason for providing this answer, is the comments don't let me post what I really want.
I created a function almost identical to tbone's with a couple of tweaks, got rid of the UPPER by changing how I handle the lower case range of characters, and changed numbers to binary_integers.
FUNCTION CLEAN_STRING(IN_STRING in VARCHAR2) RETURN VARCHAR2
AS
OUT_STRING VARCHAR2(32767) := '';
IN_LENGTH BINARY_INTEGER;
CNT BINARY_INTEGER := 0;
IN_CHAR CHAR(1);
OUT_CHAR CHAR(1);
DEC_CHAR BINARY_INTEGER;
PREV_SPACE BOOLEAN := FALSE;
BEGIN
IN_LENGTH := LENGTH(IN_STRING);
WHILE CNT < IN_LENGTH
LOOP
CNT := CNT + 1;
IN_CHAR := SUBSTR(IN_STRING, CNT, 1);
DEC_CHAR := ASCII(IN_CHAR);
-- blank out non alphanumerics
IF ((DEC_CHAR >= 48 AND DEC_CHAR <= 57) OR
(DEC_CHAR >= 65 AND DEC_CHAR <= 90))
THEN
--keep it
OUT_CHAR := IN_CHAR;
ELSE
IF (DEC_CHAR >= 97 AND DEC_CHAR <= 122)
THEN
OUT_CHAR := CHR(DEC_CHAR - 32);
ELSE
OUT_CHAR := ' ';
END IF;
END IF;
IF (NOT(PREV_SPACE AND OUT_CHAR = ' '))
THEN
OUT_STRING := OUT_STRING || OUT_CHAR;
END IF;
<<endloop>>
IF (OUT_CHAR = ' ') THEN
PREV_SPACE := TRUE;
ELSE
PREV_SPACE := FALSE;
END IF;
END LOOP;
RETURN TRIM(OUT_STRING);
END CLEAN_STRING;
I then created a simple test rig like tbone did, but I tested the three different routines against each other. First I verify that they all return the same results and then time each routine. Here is the test rig;
set serveroutput on
DECLARE
CURSOR PATHMAST_CURS
IS
SELECT PATHMAST_TEXT_DIAGNOSIS FROM PATHMAST WHERE ROWNUM < 100000;
DUMMY CLOB;
DUMMY_1 CLOB;
DUMMY_2 CLOB;
l_interval interval day to second(4);
l_start timestamp;
l_end timestamp;
diff_count_1 binary_integer := 0;
diff_count_2 binary_integer := 0;
BEGIN
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '), '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
DUMMY_1 := pathmast_utility_3.CLEAN_STRING(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '));
DUMMY_2 := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
IF DUMMY_1 != DUMMY
THEN
diff_count_1 := diff_count_1 + 1;
END IF;
IF DUMMY_2 != DUMMY
THEN
diff_count_2 := diff_count_2 + 1;
dbms_output.put_line('Regexp: ' || DUMMY);
dbms_output.put_line('Translate: ' || DUMMY_2);
END IF;
END LOOP;
dbms_output.put_line('CLEAN_STRING differences: ' || diff_count_1);
dbms_output.put_line('Translate differences: ' || diff_count_2);
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := UPPER(TRIM(REGEXP_REPLACE(REGEXP_REPLACE(PATH_REC.PATHMAST_TEXT_DIAGNOSIS, '[^0-9A-Za-z ]', ' '),'( )* ',' ')));
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('Regexp approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := pathmast_utility_3.CLEAN_STRING(PATH_REC.PATHMAST_TEXT_DIAGNOSIS);
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('CLEAN_STRING approach timing: ' || l_interval);
-------------------------------------------------
l_start := systimestamp;
FOR PATH_REC IN PATHMAST_CURS
LOOP
DUMMY := regexp_replace(trim(translate(NVL(PATH_REC.PATHMAST_TEXT_DIAGNOSIS,' '),'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()''_+-={[}]|/\":;,.<>?µ’±€'||chr(9),'ABCDEFGHIJKLMNOPQRSTUVWXYZ ')),'( )* ',' ');
END LOOP;
l_end := systimestamp;
l_interval := l_end - l_start;
dbms_output.put_line('TRANSLATE approach timing: ' || l_interval);
-------------------------------------------------
END;
And here are the results;
anonymous block completed
CLEAN_STRING differences: 0
Translate differences: 0
Regexp approach timing: +00 00:00:52.9160
CLEAN_STRING approach timing: +00 00:00:05.5220
TRANSLATE approach timing: +00 00:00:13.4320
This is all without compiling native. So tbone is the big winner. Thank you tbone.
If for whatever reason you want/need to use the translate version, you should build the translate string programmatically in order to get all of the special characters.
Perhaps, you can use TRANSLATE instead of regex to remove special characters and convert lower case to upper case.
regexp_replace(
trim(
translate(x,
'abcdefghijklmnopqrstuvwxyz`~!##$%^&*()_+-={[}]|/\"'':;,.<>?',
'ABCDEFGHIJKLMNOPQRSTUVWXYZ '
)
),
' {2,}',
' '
)
Tried it on a table with 1000 rows and column with random characters from anywhere between 1 to 4000.
Resulted in around 35% less time.(Did not try in PLSQL).