Power BI: Renaming many tables and columns using the keyboard - powerbi

Is there a way to use the keyboard to rename tables and columns in Power BI. I have hundreds (or thousands) of columns and tables whose names need to be more human-readable than what is in the database. Using right-click | rename is very slow. Tabbing to the column and hitting F2 doesn't appear to work. What is the keystroke to enter rename mode?
Or... Is there a way to open a .pbix file in a text editor so I can do the work there? (Certainly Microsoft must have chosen some open, standard, portable format for the file -- like XML? ;) ) I have unzipped the file, but the DataModel file appears to be a binary and not an archive.

Based on user12439754's answer...
(The "ease of use" for this task within Power BI is horrible.)
Since I'm using SQL Server, I was able to write a script that does much of the work.
Issues/future enhancements:
Parameterize the schema (or search for all of them).
Remove the comma at the end of the #"Renamed Columns" definition.
Usage:
Run the script.
Remove the comma at the end of #"Renamed Columns".
Move column names to #"Removed Columns" as needed.
Change the names
to what you want the users to see. Paste the result (one table at a
time) into the advanced editor.
declare #q table (
id int identity(1,1) not null,
tbl varchar(128) not null,
col varchar(128) not null
)
insert #q
select o.name as 'Table'
, c.name as 'Column'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where s.name = 'dbo'
order by o.name
, c.colorder
declare #tbl varchar(128), #t varchar(128), #c varchar(128)
select #tbl = (select top 1 tbl from #q order by id)
declare #i int, #max int
set #i = 1
select #max = count(*) from #q
declare #out table(
id int identity(1,1) not null,
a varchar(4000) not null
)
while #i <= #max
begin
select #t = (select tbl from #q where id = #i)
insert #out
values ('let')
, (' Source = Sql.Database("FinancialDM", "FinancialDataMart"),')
, (' dbo_' + #t + ' = Source{[Schema="dbo",Item="' + #t + '"]}[Data],')
, (' #"Removed Columns" = Table.RemoveColumns(dbo_' + #t + ',{}),')
, (' #"Renamed Columns" = Table.RenameColumns(#"Removed Columns",{')
while #tbl = #t and #i <= #max
begin
select #c = ' {"' + col + '", "' + col + '"}, ' from #q where id = #i
insert #out
values (#c)
set #i = #i + 1
select #t = (select tbl from #q where id = #i)
end
insert #out
values (' })')
, ('in')
, (' #"Renamed Columns"')
, ('')
, ('')
, ('')
set #tbl = #t
end
select *
from #out

Sorry in advance if this only partially solves your problem-
One possible way to speed up the renaming of multiple columns within a table would be:
You will still need to change the table names manually - but this may speed up the column renaming especially if you have lots of tables with 20+ columns
Open Power Query Editor
Navigate to the table you wish to change the columns in (Rename it while your at it)
Reorder a column
Take the column names from the reorder columns step in the advanced editor
Manipulate in Excel or a text editor of your choice (this may require some work the first time but you can create something that will generate the necessary output)
Insert your string of changed column names in format: {"Column Original", "Column Changed"} to a newly inserted step #"Renamed Columns" = Table.RenameColumns(#"Last Step",{{"Column Original", "Column Changed"},{"Column Original", "Column Changed"},{"Column Original", "Column Changed"}}),

Related

Is there any way to get customer duplicate data from customers table then update all others except 1 row from duplicate data?

I am trying to get duplicate data of my customers table then after finding update isactive column of all duplicates found to 0 except 1 row of the duplicate data.
here is my script using oracle 19c:
merge into customers c
using (
WITH cte AS (
SELECT DISTINCT ROWID, fn_createfullname(firstname, middlename, lastname) as fullName, mobile, branchid, isactive,
ROW_NUMBER() OVER (PARTITION BY fn_createfullname(firstname, middlename, lastname), mobile, branchid ORDER BY ROWID) AS rn
FROM customers
)
select * from cte
WHERE rn > 1
) tbl
on (tbl.mobile = c.mobile and fn_createfullname(c.firstname, c.middlename, c.lastname) = tbl.fullname)
when matched then update
SET c.isactive = 0
WHERE rn > 1;
i am expecting to get all duplicate data then update single row from duplicate data.
plz any help.
after running my query is displaying this error:
Error report - ORA-30926: unable to get a stable set of rows in the
source tables

AWS Redshift dynamically select column name from RECORD

I have created below procedure in AWS redshift.
In the query2 (at ????) I want to select the column from rec based on value provided in field input variable.
e.g. if field = 'Fname' then in query2 it should insert rec.Fname.
Please let me know how to select column names dynamically from RECORD in open cursor.
CREATE OR REPLACE PROCEDURE test3(source_table varchar(100), target_table varchar(100), field varchar(100) )
LANGUAGE plpgsql
AS $$
declare
query1 text;
query2 text;
rec RECORD;
begin
query1 := 'SELECT id, ' || field ||', load_date, end_date FROM ' || source_table || ' ORDER BY id, load_date';
FOR rec IN execute query1
loop
query2 := 'insert into '|| target_table ||' values ('||quote_literal(rec.id)||', '||quote_literal(field)||','||**????**||','||quote_literal(rec.load_date)||')';
execute query2;
END LOOP;
RETURN;
END;
$$
;
It is early here so let me just reference an answer I gave for a similar situation (inserting instead of selecting). This should get you started - How to join System tables or Information Schema tables with User defined tables in Redshift
The code looks like:
CREATE OR REPLACE procedure rewrite_data()
AS
$$
DECLARE
row record;
BEGIN
drop table if exists fred;
create table fred (schemaname varchar(256),tablename varchar(256),"column"varchar(256), "type"varchar(256));
for row in select "schemaname"::text, "tablename"::text, "column"::text, "type"::text from pg_table_def where "schemaname" <> 'pg_catalog' LOOP
INSERT INTO fred(schemaname,tablename,"column","type") VALUES (row.schemaname,row.tablename,row."column",row."type");
END LOOP;
END;
$$ LANGUAGE plpgsql;
call rewrite_data();
select * from fred;
Given that you have gotten this far on your stored procedure this should get you over the finish line.

how to write regex for list of items sharing the same roots

So I have a list of keywords:
['xxxxl','xxxl','xxl','xl','xxxxt','xxxt','xxt','xt']
In bigquery, I want to write a regex, inside the following sql code
SELECT my_column
FROM table
REGEXP_CONTAINS(lower(my_column),regex)
so that my output table contains only the values that don't match any of the items in keywords list.
Thanks
Below is for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.lookup_table` AS (
SELECT ['xxxxl','xxxl','xxl','xl','xxxxt','xxxt','xxt','xt'] keywords
)
SELECT my_column
FROM `project.dataset.table`,
(SELECT STRING_AGG(LOWER(keyword), '|') exclude_pattern
FROM `project.dataset.lookup_table`,
UNNEST(keywords) keyword)
WHERE NOT REGEXP_CONTAINS(LOWER(my_column), exclude_pattern)
You can test / play with above using below simplified example
#standardSQL
WITH `project.dataset.lookup_table` AS (
SELECT ['xxxxl','xxxl','xxl','xl','xxxxt','xxxt','xxt','xt'] keywords
), `project.dataset.table` AS (
SELECT 'xxxxl' my_column UNION ALL
SELECT 'abc'
)
SELECT my_column
FROM `project.dataset.table`,
(SELECT STRING_AGG(LOWER(keyword), '|') exclude_pattern
FROM `project.dataset.lookup_table`,
UNNEST(keywords) keyword)
WHERE NOT REGEXP_CONTAINS(LOWER(my_column), exclude_pattern)
with output
Row my_column
1 abc

Indexing on temporary tables in Azure SQL Data Warehouse

Does Azure SQL Data Warehouse support any indexing on temporary tables? No mention of the limitation is found at https://learn.microsoft.com/en-us/azure/sql-data-warehouse/sql-data-warehouse-tables-temporary.
In one attempt at such, I received:
Cannot create a non-clustered index on a temporary table.
Does that wording infer a clustered index can be placed on a temporary table?
At one of my project usage temporary tables with indexes at several columns and with option adding new columns to them was quite critical to for me. (ALTER TABLE is also not supported for temp tables). The standard table was not the option as each query run had to have own table.
Finally, I used something that can be used as temporary tables but in fact is not a temporary table. I'm creating a standard table but with Guid as name and assign the name to a variable. So instead of #TempTable I'm using #MySemiTempTable but that work only for generated code. So I have to run that code using sp_executesql. Bellow example:
declare #MySemiTempTable NVARCHAR(MAX)
declare #sql NVARCHAR(MAX)
set #MySemiTempTable = 'TMP_' + CAST(NEWID () AS NVARCHAR(37))
set #sql = 'CREATE TABLE ['+ #MySemiTempTable + '] (Column1 [int], Column2 NVARCHAR(50))'
EXECUTE sp_executesql #sql
set #sql = 'INSERT INTO ['+ #MySemiTempTable + '] VALUES (1, ''test1'')'
EXECUTE sp_executesql #sql
set #sql = 'create unique index column1 on ['+ #MySemiTempTable + '] (column1)'
EXECUTE sp_executesql #sql
set #sql = 'select * from ['+ #MySemiTempTable + '] '
EXECUTE sp_executesql #sql
set #sql = 'drop table ['+ #MySemiTempTable + '] '
EXECUTE sp_executesql #sql
Clustered indexes are supported on temp tables. For example:
CREATE TABLE #temp_table
(
c1 integer
,c2 integer
)
WITH ( CLUSTERED INDEX (c1 ASC) )
;

Redshift. Convert comma delimited values into rows

I am wondering how to convert comma-delimited values into rows in Redshift. I am afraid that my own solution isn't optimal. Please advise. I have table with one of the columns with coma-separated values. For example:
I have:
user_id|user_name|user_action
-----------------------------
1 | Shone | start,stop,cancell...
I would like to see
user_id|user_name|parsed_action
-------------------------------
1 | Shone | start
1 | Shone | stop
1 | Shone | cancell
....
A slight improvement over the existing answer is to use a second "numbers" table that enumerates all of the possible list lengths and then use a cross join to make the query more compact.
Redshift does not have a straightforward method for creating a numbers table that I am aware of, but we can use a bit of a hack from https://www.periscope.io/blog/generate-series-in-redshift-and-mysql.html to create one using row numbers.
Specifically, if we assume the number of rows in cmd_logs is larger than the maximum number of commas in the user_action column, we can create a numbers table by counting rows. To start, let's assume there are at most 99 commas in the user_action column:
select
(row_number() over (order by true))::int as n
into numbers
from cmd_logs
limit 100;
If we want to get fancy, we can compute the number of commas from the cmd_logs table to create a more precise set of rows in numbers:
select
n::int
into numbers
from
(select
row_number() over (order by true) as n
from cmd_logs)
cross join
(select
max(regexp_count(user_action, '[,]')) as max_num
from cmd_logs)
where
n <= max_num + 1;
Once there is a numbers table, we can do:
select
user_id,
user_name,
split_part(user_action,',',n) as parsed_action
from
cmd_logs
cross join
numbers
where
split_part(user_action,',',n) is not null
and split_part(user_action,',',n) != '';
Another idea is to transform your CSV string into JSON first, followed by JSON extract, along the following lines:
... '["' || replace( user_action, '.', '", "' ) || '"]' AS replaced
... JSON_EXTRACT_ARRAY_ELEMENT_TEXT(replaced, numbers.i) AS parsed_action
Where "numbers" is the table from the first answer. The advantage of this approach is the ability to use built-in JSON functionality.
If you know that there are not many actions in your user_action column, you use recursive sub-querying with union all and therefore avoiding the aux numbers table.
But it requires you to know the number of actions for each user, either adjust initial table or make a view or a temporary table for it.
Data preparation
Assuming you have something like this as a table:
create temporary table actions
(
user_id varchar,
user_name varchar,
user_action varchar
);
I'll insert some values in it:
insert into actions
values (1, 'Shone', 'start,stop,cancel'),
(2, 'Gregory', 'find,diagnose,taunt'),
(3, 'Robot', 'kill,destroy');
Here's an additional table with temporary count
create temporary table actions_with_counts
(
id varchar,
name varchar,
num_actions integer,
actions varchar
);
insert into actions_with_counts (
select user_id,
user_name,
regexp_count(user_action, ',') + 1 as num_actions,
user_action
from actions
);
This would be our "input table" and it looks just as you expected
select * from actions_with_counts;
id
name
num_actions
actions
2
Gregory
3
find,diagnose,taunt
3
Robot
2
kill,destroy
1
Shone
3
start,stop,cancel
Again, you can adjust initial table and therefore skipping adding counts as a separate table.
Sub-query to flatten the actions
Here's the unnesting query:
with recursive tmp (user_id, user_name, idx, user_action) as
(
select id,
name,
1 as idx,
split_part(actions, ',', 1) as user_action
from actions_with_counts
union all
select user_id,
user_name,
idx + 1 as idx,
split_part(actions, ',', idx + 1)
from actions_with_counts
join tmp on actions_with_counts.id = tmp.user_id
where idx < num_actions
)
select user_id, user_name, user_action as parsed_action
from tmp
order by user_id;
This will create a new row for each action, and the output would look like this:
user_id
user_name
parsed_action
1
Shone
start
1
Shone
stop
1
Shone
cancel
2
Gregory
find
2
Gregory
diagnose
2
Gregory
taunt
3
Robot
kill
3
Robot
destroy
Here are two ways to achieve this.
In my example, I'm assuming that I am accepting a comma separated list of values. My values look like schema.table.column.
The first involves using a recursive CTE.
drop table if exists #dep_tbl;
create table #dep_tbl as
select 'schema.foobar.insert_ts,schema.baz.load_ts' as dep
;
with recursive tmp (level, dep_split, to_split) as
(
select 1 as level
, split_part(dep, ',', 1) as dep_split
, regexp_count(dep, ',') as to_split
from #dep_tbl
union all
select tmp.level + 1 as level
, split_part(a.dep, ',', tmp.level + 1) as dep_split_u
, tmp.to_split
from #dep_tbl a
inner join tmp on tmp.dep_split is not null
and tmp.level <= tmp.to_split
)
select dep_split from tmp;
the above yields:
|dep_split|
|schema.foobar.insert_ts|
|schema.baz.load_ts|
The second involves a stored procedure.
CREATE OR REPLACE PROCEDURE so_test(dependencies_csv varchar(max))
LANGUAGE plpgsql
AS $$
DECLARE
dependencies_csv_vals varchar(max);
BEGIN
drop table if exists #dep_holder;
create table #dep_holder
(
avoid varchar(60000)
);
IF dependencies_csv is not null THEN
dependencies_csv_vals:='('||replace(quote_literal(regexp_replace(dependencies_csv,'\\s','')),',', '\'),(\'') ||')';
execute 'insert into #dep_holder values '||dependencies_csv_vals||';';
END IF;
END;
$$
;
call so_test('schema.foobar.insert_ts,schema.baz.load_ts')
select
*
from
#dep_holder;
the above yields:
|dep_split|
|schema.foobar.insert_ts|
|schema.baz.load_ts|
in conclusion
If you only care about one single column in your input (the X delimited values), then I think the stored procedure is easier/faster.
However, if you have other columns you care about and want to keep those columns along with your comma separated value column now transformed to rows, OR, if you want to know the argument (original list of delimited values), I think the stored procedure is the way to go. In that case, you can just add those other columns to your columns selected in the recursive query.
You can get the expected result with the following query. I'm using "UNION ALL" to convert a column to row.
select user_id, user_name, split_part(user_action,',',1) as parsed_action from cmd_logs
union all
select user_id, user_name, split_part(user_action,',',2) as parsed_action from cmd_logs
union all
select user_id, user_name, split_part(user_action,',',3) as parsed_action from cmd_logs
Here's my equally-terrible answer.
I have a users table, and then an events table with a column that is just a comma-delimited string of users at said event. eg
event_id | user_ids
1 | 5,18,25,99,105
In this case, I used the LIKE and wildcard functions to build a new table that represents each event-user edge.
SELECT e.event_id, u.id as user_id
FROM events e
LEFT JOIN users u ON e.user_ids like '%' || u.id || '%'
It's not pretty, but I throw it in a WITH clause so that I don't have to run it more than once per query. I'll likely just build an ETL to create that table every night anyway.
Also, this only works if you have a second table that does have one row per unique possibility. If not, you could do LISTAGG to get a single cell with all your values, export that to a CSV and reupload that as a table to help.
Like I said: a terrible, no-good solution.
Late to the party but I got something working (albeit very slow though)
with nums as (select n::int n
from
(select
row_number() over (order by true) as n
from table_with_enough_rows_to_cover_range)
cross join
(select
max(json_array_length(json_column)) as max_num
from table_with_json_column )
where
n <= max_num + 1)
select *, json_extract_array_element_text(json_column,nums.n-1) parsed_json
from nums, table_with_json_column
where json_extract_array_element_text(json_column,nums.n-1) != ''
and nums.n <= json_array_length(json_column)
Thanks to answer by Bob Baxley for inspiration
Just improvement for the answer above https://stackoverflow.com/a/31998832/1265306
Is generating numbers table using the following SQL
https://discourse.looker.com/t/generating-a-numbers-table-in-mysql-and-redshift/482
SELECT
p0.n
+ p1.n*2
+ p2.n * POWER(2,2)
+ p3.n * POWER(2,3)
+ p4.n * POWER(2,4)
+ p5.n * POWER(2,5)
+ p6.n * POWER(2,6)
+ p7.n * POWER(2,7)
as number
INTO numbers
FROM
(SELECT 0 as n UNION SELECT 1) p0,
(SELECT 0 as n UNION SELECT 1) p1,
(SELECT 0 as n UNION SELECT 1) p2,
(SELECT 0 as n UNION SELECT 1) p3,
(SELECT 0 as n UNION SELECT 1) p4,
(SELECT 0 as n UNION SELECT 1) p5,
(SELECT 0 as n UNION SELECT 1) p6,
(SELECT 0 as n UNION SELECT 1) p7
ORDER BY 1
LIMIT 100
"ORDER BY" is there only in case you want paste it without the INTO clause and see the results
create a stored procedure that will parse string dynamically and populatetemp table, select from temp table.
here is the magic code:-
CREATE OR REPLACE PROCEDURE public.sp_string_split( "string" character varying )
AS $$
DECLARE
cnt INTEGER := 1;
no_of_parts INTEGER := (select REGEXP_COUNT ( string , ',' ));
sql VARCHAR(MAX) := '';
item character varying := '';
BEGIN
-- Create table
sql := 'CREATE TEMPORARY TABLE IF NOT EXISTS split_table (part VARCHAR(255)) ';
RAISE NOTICE 'executing sql %', sql ;
EXECUTE sql;
<<simple_loop_exit_continue>>
LOOP
item = (select split_part("string",',',cnt));
RAISE NOTICE 'item %', item ;
sql := 'INSERT INTO split_table SELECT '''||item||''' ';
EXECUTE sql;
cnt = cnt + 1;
EXIT simple_loop_exit_continue WHEN (cnt >= no_of_parts + 2);
END LOOP;
END ;
$$ LANGUAGE plpgsql;
Usage example:-
call public.sp_string_split('john,smith,jones');
select *
from split_table
You can try copy command to copy your file into redshift tables
copy table_name from 's3://mybucket/myfolder/my.csv' CREDENTIALS 'aws_access_key_id=my_aws_acc_key;aws_secret_access_key=my_aws_sec_key' delimiter ','
You can use delimiter ',' option.
For more details of copy command options you can visit this page
http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html