Hi i have the below JSON, would like to extract in PowerBI Query. My query is not able to extract Array's inside the JSON. I am unable to extract properties array values, where as i am able to extract user values.
Any help appreciated
Edit1: Added additional column Renames and achieved result based on
#AnkUser solution
Edit2: Below JSON
I would like form the power query to return as
Workers WorkCode Place
-----------------------
Manager 134 UK
delegate 135 Europe
Authority etc
There is no relationship between these columns. However, they will be used as additional filter data for the previous Query
Sample JSON
{
"Data": [
{
"Type": "Workers",
"Values": [
"Manager",
"Delegate",
"Authority"
]
},
{
"Type": "WorkCode",
"Values": [
"134",
"135",
"140",
"141",
"142",
"143",
"150"
]
},
{
"Type": "Place",
"Values": [
"UK",
"Europe"
]
}
]
}
Below Sample power query:
let
Source = Json.Document(Web.Contents("http:localhost")),
#"Converted to Table" = Table.FromList(Source, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Expanded Column1" = Table.ExpandRecordColumn(#"Converted to Table", "Column1", {"User", "Properties"}, {"Column1.User", "Column1.Properties"}),
#"Expanded Column1.User" = Table.ExpandRecordColumn(#"Expanded Column1", "Column1.User", {"recId", "Description", "Type", }, {"Column1.User.recId", "Column1.User.Description", "Column1.User.Type"}),
#"Expanded Column1.Properties" = Table.ExpandListColumn(#"Expanded Column1.User", "Column1.Properties"),
#"Expanded Column1.Properties1" = Table.ExpandRecordColumn(#"Expanded Column1.Properties", "Column1.Properties", {"PersonID", "HomeRef", "Designation", "EstateAgent", "Mortgage", "Broker", "Citizen"}, {"Column1.Properties.PersonID", "Column1.Properties.HomeRef", "Column1.Funds.Designation", Column1.Properties.EstateAgent", Column1.Properties.Mortgage", Column1.Properties.Broker",Column1.Properties.Citizen"})
)
in
#"Expanded Column1"
Sample data:
[
{
"User": {
"recId": "0154911",
"Description": "Lindsay Properties ltd",
"Type": "Organisation",
"Properties": [
{
"PersonID": 5636,
"HomeRef": 149065,
"Designation":"Owner",
"EstateAgent": {
"Code": "8533",
"Description": "Hunters-properties"
},
"Mortgage": {
"Code": "natwide",
"Description": "Bank limited"
},
"Broker": {
"Description": "Managecentre"
},
"Citizen": {
"UK": true,
"USA": false,
"Europe": false
}
},
{
"PersonID": 5636,
"HomeRef": 149066,
"Designation":"Owner",
"EstateAgent": {
"Code": "8533",
"Description": "Hunters-properties"
},
"Mortgage": {
"Code": "natwide",
"Description": "Bank limited"
},
"Broker": {
"Description": "Managecentre"
},
"Citizen": {
"UK": false,
"USA": false,
"Europe": false
}
}
]
}
},
{
"User": {
"recId": "0154912",
"Description": "Mr Mortimier properties",
"Type": "Person",
"Properties": [
{
"PersonID": 1636,
"HomeRef": 199065,
"Designation":"Owner",
"EstateAgent": {
"Code": "9533",
"Description": "Whitegates-properties"
},
"Mortgage": {
"Code": "Yoskhire society",
"Description": "society limited"
},
"Broker": {
"Description": "Managecentre"
},
"Citizen": {
"UK": true,
"USA": true,
"Europe": false
}
},
{
"PersonID": 1636,
"HomeRef": 199066,
"Designation":"Authority",
"EstateAgent": {
"Code": "9533",
"Description": "Whitegates-properties"
},
"Mortgage": {
"Code": "Yoskhire society",
"Description": "society limited"
},
"Broker": {
"Description": "Managecentre"
},
"Citizen": {
"UK": true,
"USA": true,
"Europe": false
}
}
]
}
}]
If I understand your question correctly, You want your array from propeties to be expanded as column for a Row.
To Test your use case I have used your data and I tried to create rows from it. Below screenshot is the result.
If this is what you need, below is the query I got from PowerBI which gives result.
Note: You might want to clean Names of column.
let
Source = Json.Document(File.Contents("C:\Users\achikhale\Desktop\stackoverflowPowerBIJson.json")),
#"Converted to Table" = Table.FromList(Source, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Expanded Column1" = Table.ExpandRecordColumn(#"Converted to Table", "Column1", {"User"}, {"Column1.User"}),
#"Expanded Column1.User" = Table.ExpandRecordColumn(#"Expanded Column1", "Column1.User", {"recId", "Description", "Type", "Properties"}, {"Column1.User.recId", "Column1.User.Description", "Column1.User.Type", "Column1.User.Properties"}),
#"Expanded Column1.User.Properties" = Table.ExpandListColumn(#"Expanded Column1.User", "Column1.User.Properties"),
#"Expanded Column1.User.Properties1" = Table.ExpandRecordColumn(#"Expanded Column1.User.Properties", "Column1.User.Properties", {"PersonID", "HomeRef", "Designation", "EstateAgent", "Mortgage", "Broker", "Citizen"}, {"Column1.User.Properties.PersonID", "Column1.User.Properties.HomeRef", "Column1.User.Properties.Designation", "Column1.User.Properties.EstateAgent", "Column1.User.Properties.Mortgage", "Column1.User.Properties.Broker", "Column1.User.Properties.Citizen"}),
#"Expanded Column1.User.Properties.EstateAgent" = Table.ExpandRecordColumn(#"Expanded Column1.User.Properties1", "Column1.User.Properties.EstateAgent", {"Code", "Description"}, {"Column1.User.Properties.EstateAgent.Code", "Column1.User.Properties.EstateAgent.Description"}),
#"Expanded Column1.User.Properties.Mortgage" = Table.ExpandRecordColumn(#"Expanded Column1.User.Properties.EstateAgent", "Column1.User.Properties.Mortgage", {"Code", "Description"}, {"Column1.User.Properties.Mortgage.Code", "Column1.User.Properties.Mortgage.Description"}),
#"Expanded Column1.User.Properties.Broker" = Table.ExpandRecordColumn(#"Expanded Column1.User.Properties.Mortgage", "Column1.User.Properties.Broker", {"Description"}, {"Column1.User.Properties.Broker.Description"}),
#"Expanded Column1.User.Properties.Citizen" = Table.ExpandRecordColumn(#"Expanded Column1.User.Properties.Broker", "Column1.User.Properties.Citizen", {"UK", "USA", "Europe"}, {"Column1.User.Properties.Citizen.UK", "Column1.User.Properties.Citizen.USA", "Column1.User.Properties.Citizen.Europe"})
in
#"Expanded Column1.User.Properties.Citizen"
If this is what you need I could add some more explanation (steps) on how I achieved this model of data
Edit:
New query for Data Note: your Json
let
Source = Json.Document(File.Contents("C:\Users\achikhale\Desktop\stackoverflowPowerBIJson1.json")),
#"Converted to Table" = Record.ToTable(Source),
#"Expanded Value" = Table.ExpandListColumn(#"Converted to Table", "Value"),
#"Expanded Value1" = Table.ExpandRecordColumn(#"Expanded Value", "Value", {"Type", "Values"}, {"Value.Type", "Value.Values"}),
#"Expanded Value.Values" = Table.ExpandListColumn(#"Expanded Value1", "Value.Values")
in
#"Expanded Value.Values"
But if I edit your Json as below
[{
"Data": [{
"Type": "Workers",
"Values": [
"Manager",
"Delegate",
"Authority"
]
}, {
"Type": "WorkCode",
"Values": [
"134",
"135",
"140",
"141",
"142",
"143",
"150"
]
}, {
"Type": "Place",
"Values": [
"UK",
"Europe"
]
}
]
}
]
Then you will get more clean Table and it's rows with below query.
Note below query will only work with my edited Json mentioned above.
let
Source = Json.Document(File.Contents("C:\Users\achikhale\Desktop\stackoverflowPowerBIJson1.json")),
#"Converted to Table" = Table.FromList(Source, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Expanded Column1" = Table.ExpandRecordColumn(#"Converted to Table", "Column1", {"Data"}, {"Column1.Data"}),
#"Expanded Column1.Data" = Table.ExpandListColumn(#"Expanded Column1", "Column1.Data"),
#"Expanded Column1.Data1" = Table.ExpandRecordColumn(#"Expanded Column1.Data", "Column1.Data", {"Type", "Values"}, {"Column1.Data.Type", "Column1.Data.Values"}),
#"Expanded Column1.Data.Values" = Table.ExpandListColumn(#"Expanded Column1.Data1", "Column1.Data.Values")
in
#"Expanded Column1.Data.Values"
Related
I'm trying to use the min and max aggregates of a quantitative column (Total bombers) grouped by another column (Country) in a rule mark of my visual. I want the rule y property to start at the minimum value of Total bombers and the rule y2 property to end at the maximum value of total bombers, but I can't seem to find the right syntax for the min and max aggregates of the rule mark. I can get the result I'm looking for by not grouping the marks but I would like to implement it for grouped marks so I can re-use it more easily.
Spec for grouped version I'm trying to use min and max aggregates in:
https://vega.github.io/editor/#/url/vega/N4IgJAzgxgFgpgWwIYgFwhgF0wBwqgegIDc4BzJAOjIEtMYBXAI0poHsDp5kTykSArJQBWENgDsQAGhAATONABONHJnaT0AQQAETJBBpRtAGxri422EkWZtcAB5IEOY3ErSQAdxqz6aAQAMATLwNGRYaABMQTI4SLKyZmT+MgZk4kjGEGgA2gC6MrJImCioOaAZCHBoIExsCExwitkyDIrGNVi4+ES0EJjUdIxMDBBNUBKYcOIDEwgkssRsbIryigQCTADMAQCcuwDssgAcBwHHCUgHx1tMTJFMuwKykXC7cABmWwAsBIpIngIACF6o1FABSSICaIAcSQOEoUAgxA8HxWyEwaFAmAAnjhqugkSjYtYxliQAAVNglYy6UFNbLocQMBpNDwATTg1hqzNZihAAF8BTJMP9xBA0YoELlsXiCSBJQgGMYUDJ9DUilMPA4cPz0GoqgAKTVwA1wY3FFmUTnWKTaACMdvtAEpnYKpLL8TUPjRjFN+TIdXq5JaENauYptAAeAC8Dt2xwCgryAoKIGgmQUMpAlXl9g8uK9+poVQ8YrI8u8vhgHlk9SQZnJmtKtXpzVRNDgxlkGuK1SFHpzTnlOILcpqpnM3Jk5floXCmJk4kMBNFDDgMgAXk02Gg1xu5PXG6hQM2anU+S0FZ3uzUqTS6ZfBcKKsOahNjCsx0WQCtEhkOhnJBxArd8+zIFZR0KI8NFPYoWwvMErx9Lse3QABhNgGBmRRRyFNMkHsLMylAFZOxmc9qUweoPAzVwanzF9f2UaZMXQVwPkXdMoEzGo8LTZBFAAa0ZcoQELeUyEULCcFRaTpRPBUkCgU1yVzGoxhYq8z3QRCGQ8KSZKYUcMKwnC8OFEBBJE7N5CUFQ1AkGoABkzAsSU7CWYwGEc8RtDYD5tGoh89OaXQcUsMzRSg8Tx3Yty5JoxSdPTJpO2ySzpgmeRyVYtlFPzRS6LzDtUN7LVLJM0Bir40rb3Qe9MkfJD3XTUU2CEglqp4+jCTYT8A2vMrTOw6LWv6aTOvQ+FyWITJ1xqaTsJ7SyJo6uAAHUfD8RS5u8gktlW9rOoAeTiKA6CqkA9oW1AAkoAQhQHU8FCgZRVHUGoAAU2DMTAIG0Dzgqa0KAeMyLRtw795QgHEGn6xKFLgkoNLSrNMvEbKupAPK9VAQrut49B8xkFD6pDCqZCumr0Bism0MpakQbbDLUhobdZvmglAksn1jA6IqevlD8v1Jm8GcwyG8NSY7Nu2mtdq5qIAiOybsZu+VTAXCAhJxJh9ufVocBNck+eMM7lMuzmDdQe1VfWi2Ltxa3bpVyyYDYUg8batXHatxWbbtp7Bzst6HM+9AACVlQZXRTU8OBpgh8ygqZ2lQu0XkWpFOKQEUGPEabeDUa01qsrYHLFIYY2+3JAnuKJkASaG8mTcq8kaZAOnxbvNPmv0yrIhdglohVmW1eHicwiwHW9YN+3Oq26tJ9QQ7x4d87-dADW0Hux6npTFMgA
Spec for non-grouped version that achieves what I want to do:
https://vega.github.io/editor/#/url/vega/N4IgJAzgxgFgpgWwIYgFwhgF0wBwqgegIDc4BzJAOjIEtMYBXAI0poHsDp5kTykSArJQBWENgDsQAGhAATONABONHJnaT0AQQAETJBBpRtAGxri422EkWZtcAB5IEOY3ErSQAdxqz6aAQAMATLwNGRYaABMQTI4SLKyZmT+MgZk4kjGEGgA2gC6MrJImCioOaAZCHBoIExsCExwitkyDIrGNVi4+ES0EJjUdIxMDBBNUBKYcOIDEwgkssRsbIryigQCTADMAQCcuwDssgAcBwHHCUgHx1tMTJFMuwKykXC7cABmWwAsBIpIngIACF6o1FABSSICaIAcSQOEoUAgxA8HxWyEwaFAmAAnjhqugkSjYtYxliQAAVNglYy6UFNbLocQMBpNDwATTg1hqzNZihAAF8BTJMP9xBA0YoELlsXiCSBJQgGMYUDJ9DUilMPA4cPz0GoqgAKTVwA1wY3FFmUTnWKTaACMdvtAEpnYKpLL8TUPjRjFN+TIdXq5JaENauYptAAeAC8Dt2xwC7s98pwNCWmJkZEUbAYOCYONyIBt-IKCpocGMshqAGFczNFIWZMRMgx5VSaXS+dlhSnvejlaqQOr0CbtfZdTUzRaptPNVaS06na7kyBcV70IrBx4R5TqZku2CWiAgxrQzkAOQAZTYxArtgAquJ1Be8toANTaecIS9PuhwWRtCvEopggV9BTyXsQEqeU4FIGZjxbYw20ZcoQAAOSceUhxLNB7SeSIZAAEQUKBlFUdQagAGRWRBtBUCAWVXTCqhqJgOQjPDdgANmCEASKUFQ1AkGoAEk8BZExaOlAUy1FJBxUVGU1zlfspW3NVGRDLVAwnYM52KU0aCNb9w1tB1lzdWTZNSKBMgUZSYJqewPHXeUzQ8MUyHlbxfBgDxZHqJAzHJTVSlqelmlRCsqzPLUhQ9aCsJqJsVI3EBTHMbkZC8+VQnCTNoMMAlRTbGQAC8mjYNBSrgQogpC1BQDCtjIuPH1K2rdAOwPOpu0FKCnMJNhjBWVzVPQFZEgyDocoU7yajsqYyBWVLAuQRrmuKcK+qPaLOtretRULIUyyQewHLKUAVgrGZWuweoPGgeznNXG7pkxdBXA+QrntcFKIJkZBFAAa1Q5rSPI4SNBAKizAsSU7CWZDoe0NgPm0B8r0PBlxvSzLqhkD4c2lJrtJ2tr3RPcQJnkckPrZMmXLJv75RcomYq67TqmFEBCxZuz-vQVKOti9A-ymQDgMMntUlFNgQbgGt4XJJC2xqHMGHEatef6HNFYAdR8PwybVgktl1+XFYAeTiKA6H50AzbQAJKAEIUEohwSKJE9AAAU2DMTAIG0RGsZxqKRQmkAIBxBoRtREnQu21r+t56ZaYJUAGb1UBmdAVnXo5g7R0MqnHZjwX5RFzmaglgCgJAhzdZoSrVdbAlAl5n1jA6AWXuG0aA3LEuQDrLXjqpvWFbgI2-Pb5CCWiS39azkBna+sIsAgEGcSYReBtaHAxzJnvjFtpB7dxBf1dQe0V5ni+r4rjeAgSjBb0ZgurbgJ+HZvgk98PaJXkN7aG1F4ahxWEjEaDBUboyAree8EdjxuRqATROj0yYtXQLtXG6caZsDpmTHO5J86VwHiAdmI8xbc3LuSQuwt9q0JvHeU0mNnwiQforZWOAAEa3rDrOWq854mydh3NAFthGPztv-U2EjUCu3dsAr2ZEhKUX9oHBCUDIysOQXgyOaV5Sx3jrNBUSdsEp1wZTAhmd6YzC-lQhhVci40K5mOXmFdGF82YVzPR7C-xcNSK3NeG8u4c17s4yhEwh6+MOhPRsU8f6iP8vIg+qBl7SMVvwzeBUd57wPu-PMJ9QBnz-tfNJt975ZN-rIip4j0lv15jAT+ucY4-3KS-BRQDPZGI1sqQm5isFbRKKnPatiiFr1IUzKJQsnHF1oR4mQXiXFMIWX4pBATOGSE8ZEWZ1c4ni2fJLRuMsp4hJyeE8skT+5zJiWNdZ8SGwnRqSky53DQkKIylvYOu997qyKcfMup9fTnzqV09J1T2mr06Tk12FtmmtIYR08FOSenWSAA
Any help much appreciated!
What you're trying to do isn't logically possible. You have faceted on country. That means each country gets a line (working), a symbol (working) but how would it get a rule where the start point is one country and the end point is the other. One group does not know anything about the other group so you couldn't have a rule spanning the two countries.
If you want to keep your facet, you can do the following:
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "A basic line chart example.",
"width": 500,
"height": 200,
"padding": 5,
"signals": [],
"data": [
{
"name": "bombers",
"url": "https://gist.githubusercontent.com/vdvoorder/5b30997d8708dda783bb2b95d2e9ef34/raw/Bomber%2520Gap.csv",
"format": {
"type": "csv",
"parse": {"Total bombers": "number", "Year": "number"}
},
"transform": [
{
"type": "formula",
"as": "date",
"expr": "time(datetime(datum.Year, 1, 1))"
},
{"type": "filter", "expr": "datum.Year <= 1980"}
]
},
{
"name": "bombers2",
"source": "bombers",
"transform": [
{
"type": "pivot",
"field": "Country",
"value": "Total bombers",
"groupby": ["date"]
}
]
}
],
"scales": [
{
"name": "x",
"type": "time",
"range": "width",
"domain": {"data": "bombers", "field": "date"}
},
{
"name": "y",
"type": "linear",
"range": "height",
"nice": true,
"zero": true,
"domain": {"data": "bombers", "field": "Total bombers"}
},
{
"name": "color",
"type": "ordinal",
"range": "category",
"domain": {"data": "bombers", "field": "Country"}
}
],
"axes": [
{"orient": "bottom", "scale": "x"},
{"orient": "left", "scale": "y"}
],
"marks": [
{
"type": "group",
"from": {
"facet": {"name": "series", "data": "bombers", "groupby": "Country"}
},
"marks": [
{
"description": "Line for evolution of total bombers by country",
"type": "line",
"from": {"data": "series"},
"encode": {
"enter": {
"x": {"scale": "x", "field": "date"},
"y": {"scale": "y", "field": "Total bombers"},
"stroke": {"scale": "color", "field": "Country"},
"strokeCap": {"value": "round"},
"strokeWidth": {"value": 3},
"strokeOpacity": {"value": 0.5}
}
}
},
{
"description": "Points for total bombers by country",
"type": "symbol",
"from": {"data": "series"},
"encode": {
"enter": {
"x": {"scale": "x", "field": "date"},
"y": {"scale": "y", "field": "Total bombers"},
"size": {"value": 50},
"fill": {"scale": "color", "field": "Country"},
"strokeWidth": {"value": 20},
"stroke": {"value": "lightskyblue"}
},
"update": {
"fillOpacity": {"value": 1},
"strokeOpacity": {"value": 0}
},
"hover": {"strokeOpacity": {"value": 1}}
}
}
]
},
{
"description": "Rulers between country total bomber numbers",
"type": "rule",
"from": {"data": "bombers2"},
"encode": {
"update": {
"x": {"scale": "x", "field": "date"},
"y": {"scale": "y", "field": "Soviet Union"},
"y2": {"scale": "y", "field": "United States"},
"stroke": {"value": "lightskyblue"},
"strokeWidth": {"value": 3},
"strokeOpacity": {"value": 0.5}
}
}
}
]
}
I have a power query record as follows:
Data = [
tracker = {
[
foo = {
[
field_a= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data"
],
[
foo = {
[
field_c= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data2"
]
}
]
When I convert it into a JSON, I obtain this:
{
"tracker": [
{
"foo": [
{
"field_a": "Something",
"field_b": "data2"
},
{
"field_a": "Something",
"field_c": "data2"
}
],
"bar": "Data"
},
{
"foo": [
{
"field_c": "Something",
"field_b": "data2"
},
{
"field_a": "Something",
"field_c": "data2"
}
],
"bar": "Data2"
}
]
}
I need the foo array to have structured objects within it, in the sense that each object should have field_a, field_b, field_c in it. Example:
foo = {
[
field_c= "Something",
field_b = "data2",
field_a = null
],
[
field_a= "Something",
field_c = "data2".
field_b = null
]
}
I tried using
List.Transform(x, each Record.TransformFields(_, {
{ "field_a", Text.Trim },
{ "field_b", Text.Trim },
{ "field_c", Text.Trim }
}, MissingField.UseNull))
however, I am losing reference to the bar field in the final output. Any help would be appreciated.
This seems to work for me.
let
Data = [
tracker = {
[
foo = {
[
field_a= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data"
],
[
foo = {
[
field_c= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data2"
]
}
],
#"Converted to Table" = Record.ToTable(Data),
#"Added Custom" = Table.AddColumn(#"Converted to Table", "Custom",
each let
a = [Value],
b = List.Transform(a, each Record.TransformFields(_, {
{ "foo", (x)=> List.Transform(x, (y)=> Record.TransformFields(y, {
{ "field_a", Text.Trim },
{ "field_b", Text.Trim },
{ "field_c", Text.Trim }
}, MissingField.UseNull) )}
}))
in b)
in
#"Added Custom"
Try this in powerquery
It splits on } then checks for the fields defined in the first row, and if one or two of them are missing, adds them, then puts the pieces back together
let TableWithPhrases = Table.Buffer(#table({"Keyword"}, {{"field_a"}, {"field_b"}, {"field_c"}})),
Source = [tracker = {[foo = {[
field_a= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data"
],
[
foo2 = {
[
field_c= "Something",
field_b = "data2"
],
[
field_a= "Something",
field_c = "data2"
]
},
bar = "Data2"
]
}
],
prior =Text.FromBinary(Json.FromValue(Source[tracker])),
Splitit=Text.Split(prior,"}"),
#"Converted to Table" = Table.FromList(Splitit, Splitter.SplitByNothing(), null, null, ExtraValues.Error),
#"Changed Type" = Table.TransformColumnTypes(#"Converted to Table",{{"Column1", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "Custom", each List.Select(TableWithPhrases[Keyword], (x) => not(Text.Contains([Column1], x)))),
#"Added Custom1" = Table.AddColumn(#"Added Custom", "Custom.1", each if List.Count([Custom])=Table.RowCount(TableWithPhrases) then "" else Text.Combine(List.Transform([Custom], each ","""&_&""":"""""))),
#"Added Custom2" = Table.AddColumn(#"Added Custom1", "Custom.2", each [Column1]&[Custom.1]),
final = Text.Combine(#"Added Custom2"[Custom.2],"}")
in final
Before
[{"foo":[{"field_a":"Something","field_b":"data2"},{"field_a":"Something","field_c":"data2"}],"bar":"Data"},{"foo2":[{"field_c":"Something","field_b":"data2"},{"field_a":"Something","field_c":"data2"}],"bar":"Data2"}]
After
[{"foo":[{"field_a":"Something","field_b":"data2","field_c":""},{"field_a":"Something","field_c":"data2","field_b":""}],"bar":"Data"},{"foo2":[{"field_c":"Something","field_b":"data2","field_a":""},{"field_a":"Something","field_c":"data2","field_b":""}],"bar":"Data2"}]
I have data with multiple dimensions, stored in the Druid cluster. for example, Data of movies and the revenue they earned from each country where they were screened.
I'm trying to build a query that the answer to be returned will be a table of all the movies, the total revenue of each of them, and the revenue for each country.
I succeeded to do it in Turnilo - it generated for me the following Druid query -
[
[
{
"queryType": "timeseries",
"dataSource": "movies_source",
"intervals": "2021-11-18T00:01Z/2021-11-21T00:01Z",
"granularity": "all",
"aggregations": [
{
"name": "__VALUE__",
"type": "doubleSum",
"fieldName": "revenue"
}
]
},
{
"queryType": "topN",
"dataSource": "movies_source",
"intervals": "2021-11-18T00:01Z/2021-11-21T00:01Z",
"granularity": "all",
"dimension": {
"type": "default",
"dimension": "movie_id",
"outputName": "movie_id"
},
"aggregations": [
{
"name": "revenue",
"type": "doubleSum",
"fieldName": "revenue"
}
],
"metric": "revenue",
"threshold": 50
}
],
[
{
"queryType": "topN",
"dataSource": "movies_source",
"intervals": "2021-11-18T00:01Z/2021-11-21T00:01Z",
"granularity": "all",
"filter": {
"type": "selector",
"dimension": "movie_id",
"value": "some_movie_id"
},
"dimension": {
"type": "default",
"dimension": "country",
"outputName": "country"
},
"aggregations": [
{
"name": "revenue",
"type": "doubleSum",
"fieldName": "revenue"
}
],
"metric": "revenue",
"threshold": 5
}
]
]
But it doesn't work when I'm trying to use it as a body for a Postman query - I got
{
"error": "Unknown exception",
"errorMessage": "Unexpected token (START_ARRAY), expected VALUE_STRING: need JSON String that contains type id (for subtype of org.apache.druid.query.Query)\n at [Source: (org.eclipse.jetty.server.HttpInputOverHTTP); line: 2, column: 3]",
"errorClass": "com.fasterxml.jackson.databind.exc.MismatchedInputException",
"host": null
}
How should I build the corresponding query so that it works with Postman?
I am not familiar with Turnilo but have you tried using the Druid Console to write SQL and convert to Native request with the "Explain SQL query" option under the "Run/..." menu?
Your native queries seem to be doing a Top N instead of listing all movies, so I think the SQL might be something like:
SELECT movie_id, country_id, SUM(revenue) total_revenue
FROM movies_source
WHERE __time BETWEEN '2021-11-18 00:01:00' AND '2021-11-21 00:01:00'
GROUP BY movie_id, country_id
ORDER BY total_revenue DESC
LIMIT 50
I don't have the data source to test, but tested with sample wikipedia data with similar query structure:
SELECT namespace, cityName, sum(sum_added) total
FROM "wikipedia" r
WHERE cityName IS NOT NULL
AND __time BETWEEN '2015-09-12 00:00:00' AND '2015-09-15 00:00:00'
GROUP BY namespace, cityName
ORDER BY total DESC
limit 50
which results in the following Native query:
{
"queryType": "groupBy",
"dataSource": {
"type": "table",
"name": "wikipedia"
},
"intervals": {
"type": "intervals",
"intervals": [
"2015-09-12T00:00:00.000Z/2015-09-15T00:00:00.001Z"
]
},
"virtualColumns": [],
"filter": {
"type": "not",
"field": {
"type": "selector",
"dimension": "cityName",
"value": null,
"extractionFn": null
}
},
"granularity": {
"type": "all"
},
"dimensions": [
{
"type": "default",
"dimension": "namespace",
"outputName": "d0",
"outputType": "STRING"
},
{
"type": "default",
"dimension": "cityName",
"outputName": "d1",
"outputType": "STRING"
}
],
"aggregations": [
{
"type": "longSum",
"name": "a0",
"fieldName": "sum_added",
"expression": null
}
],
"postAggregations": [],
"having": null,
"limitSpec": {
"type": "default",
"columns": [
{
"dimension": "a0",
"direction": "descending",
"dimensionOrder": {
"type": "numeric"
}
}
],
"limit": 50
},
"context": {
"populateCache": false,
"sqlOuterLimit": 101,
"sqlQueryId": "cd5aabed-5e08-49b7-af63-fe82c125d3ee",
"useApproximateCountDistinct": false,
"useApproximateTopN": false,
"useCache": false
},
"descending": false
}
I have inherited the following JSONField data structure:
[
{
"name": "Firstname",
"show": {
"value": true
},
"type": "text",
"uuid": "55668e45-07d1-404e-bf65-f6a3cacfaa97",
"label": {
"for": "Firstname",
"display": "First name"
},
"value": "Michael",
"options": [],
"required": true,
"component": "Input",
"placeholder": "Input text here",
"validationErrors": []
},
{
"name": "Surname",
"show": {
"value": true
},
"type": "text",
"uuid": "ce91fefa-66e3-4b08-8f1a-64d95771aa49",
"label": {
"for": "Surname",
"display": "Surname"
},
"value": "Roberts",
"options": [],
"required": true,
"component": "Input",
"placeholder": "Input text here",
"validationErrors": []
},
{
"name": "EmailAddress",
"show": {
"value": true
},
"type": "email",
"uuid": "6012a805-da62-4cee-8656-b7565b5f8756",
"label": {
"for": "Email",
"display": "Email"
},
"value": "michael#hiyield.co.uk",
"options": [],
"required": true,
"component": "Input",
"placeholder": "Input text here",
"validationErrors": []
},
{
"name": "University",
"show": {
"value": true
},
"type": "text",
"uuid": "434e3781-ab8a-4f09-9c68-5ec35188f3c7",
"label": {
"for": "University",
"display": "University/College"
},
"value": "University College London",
"options": [],
"required": true,
"component": "Input",
"placeholder": "Input text here",
"validationErrors": []
},
{
"name": "Subscribe",
"show": {
"value": true
},
"type": "checkbox",
"uuid": "79bdc29e-6357-4175-bf65-07be60776a29",
"label": {
"for": "Subscribe",
"display": "Subscribe to the KEVRI mailing list"
},
"value": true,
"options": [],
"required": true,
"component": "Checkbox",
"description": "KEVRI is committed to respecting and protecting your privacy. The data collected here will create your personalised report which we can email to you after this review if you wish. We will not share personal data with anyone else or send you any further emails.",
"placeholder": "",
"validationErrors": []
}
]
which exists on the models.JSONField called "about" for "MyModel", as follows:
class MyModel(
AbstractTimestampedModel
):
uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
about = models.JSONField()
I was wondering, how do I filter MyModel where the field within about is called by the "name": "EmailAddress" ... then query for that particular fields "value"?
Essentially, for the queryset MyModel.objects.all().filter() ... I want to filter out all the values where the EmailAddress is equal to some value ...
I'm not sure this is achievable within the Django ORM. However, there might be someone who could advise ...
If i am correct you should use jsonb_to_recordset PostgreSQL function.
Firstly you should create a custom database function since there is no function for that in Django Core.
class JSONRecordSet(Func):
template = "(SELECT id from %(function)s(%(expressions)s) as items(%(key)s %(output_type)s) where %(key)s='%(search)s')"
function = "jsonb_to_recordset"
def __init__(self, expression, key, output_type, search):
super().__init__(expression, key=key, output_type=output_type, search=search)
Please be aware of SQL injection.
After that, you can use this function with annotate.
MyModel.objects.annotate(_id=JSONRecordSet(expression="about", key="EmailAddress", output_type="text", search="foo#bar.com")).filter(id=F("_id"))
Return all MyModel instance which has "foo#bar.com" value in EmailAddress Key.
Try this approach:
MyModel.objects.filter(about__name='EmailAddress')
It might return the result you want.
Also, have a look at this link. It also describes how to query into nested dictionary using JSONField:
https://docs.djangoproject.com/en/3.2/topics/db/queries/#key-index-and-path-transforms
Thanks for your help,
I'm trying to transform the Array called 'Tags' hosting a list of pair of key-values into a list of columns: CustomerId, CustomerDisplayName, CustomerPath where each list contains a respective value being a store for each charge.
{
"periodFrom": "2020-11-09T00:00:00",
"periodTo": "2020-12-08T00:00:00",
"charges": [
{
"listPrice": 5.05,
"netPrice": 5.05,
"netPriceProrated": 5.05,
"subTotal": 5.05,
"currency": "CAD",
"isBilled": true,
"isProratable": true,
"deductions": [],
"fees": [],
"invoice": {
"number": "2822835",
"date": "2020-11-16T00:00:00",
"periodFrom": "2020-10-09T00:00:00",
"periodTo": "2020-11-08T00:00:00"
},
"taxes": [
{
"name": "GST",
"appliedRate": 5.0
},
{
"name": "QST",
"appliedRate": 9.975
}
],
"tags": [
{
"name": "CustomerId",
"value": "42c8edf4-365a-4068-bde6-33675832afbb"
},
{
"name": "CustomerDisplayName",
"value": "Blue Sky Group"
},
{
"name": "CustomerPath",
"value": "Devmesh Co-Branded/Blue Sky Group"
}
]
}
]
}
Here the actual snippet of code I'm actually
let
...
Response2 = Table.FromRecords( { Response } ),
#"Expand1" = Table.ExpandListColumn(Response2, "charges"),
#"Expand2" = Table.ExpandRecordColumn(#"Expand1", "charges", {"productId", "productName", "sku", "chargeId", "chargeName", "chargeType", "periodFrom", "periodTo", "quantity", "listPrice", "netPrice", "netPriceProrated", "subTotal", "currency", "isBilled", "isProratable", "deductions", "fees", "invoice", "taxes", "tags"}, {"charges.productId", "charges.productName", "charges.sku", "charges.chargeId", "charges.chargeName", "charges.chargeType", "charges.periodFrom", "charges.periodTo", "charges.quantity", "charges.listPrice", "charges.netPrice", "charges.netPriceProrated", "charges.subTotal", "charges.currency", "charges.isBilled", "charges.isProratable", "charges.deductions", "charges.fees", "charges.invoice", "charges.taxes", "charges.tags"})
in
#"Expand2"