I have a vector of maps (result of xml/parse) which contains the following vector of nested maps (I already got rid of some parts I don't want to keep):
[
{:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1a"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2a"]}
]}
]}
{:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}
]}
]}
]
Now I want to extract only some specific data from this structure, producing a result which looks like this:
[
{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1a",
"Attribute_2" "Value_1a"}
{"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1b",
"Attribute_2" "Value_1b"}
]
Which clojure tool could help me accomplish this?
I've found another question which is a bit similar, but whenever I tried some version of a map call the result I got was some kind of clojure.lang.LazySeq or clojure.core$map which I couldn't get to print properly to verify the result.
usually you can start from the bottom, gradually going up:
first you would like to parse the attr item:
(def first-content (comp first :content))
(defn get-attr [{[k v] :content}]
[(first-content k)
(first-content v)])
user> (get-attr {:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]})
;;=> ["ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]
then you would turn every item into a map of attrs:
(defn parse-item [item]
(into {} (map get-attr (:content item))))
(parse-item {:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}
]}
]})
;;=> {"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1b", "Attribute_2" "Value_2b"}
so the last thing you need do, is to map over the top level form, producing the required result:
(mapv parse-item data)
;;=> [{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1a", "Attribute_2" "Value_2a"}
;; {"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1b", "Attribute_2" "Value_2b"}]
You can easily solve tree-based problems using the Tupelo Forest library. You can see a video introduction from last year's Clojure Conj here.
For your problem, I'd approach it as follows. First, the data:
(dotest
(let [data-enlive
{:tag :root
:attrs nil
:content
[{:tag :SoapObject, :attrs nil,
:content
[{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1a"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2a"]}]}]}
{:tag :SoapObject, :attrs nil,
:content
[{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}]}]}]}]
and then the code
(with-debug-hid
(with-forest (new-forest)
(let [root-hid (add-tree-enlive data-enlive)
soapobj-hids (find-hids root-hid [:root :SoapObject])
objdata->map (fn [objdata-hid]
(let [fieldname-node (hid->node (find-hid objdata-hid [:ObjectData :FieldName]))
fieldvalue-node (hid->node (find-hid objdata-hid [:ObjectData :FieldValue]))]
{ (grab :value fieldname-node) (grab :value fieldvalue-node) }))
soapobj->map (fn [soapobj-hid]
(apply glue
(for [objdata-hid (hid->kids soapobj-hid)]
(objdata->map objdata-hid))))
results (mapv soapobj->map soapobj-hids)]
with intermediate results:
(is= (hid->bush root-hid)
[{:tag :root}
[{:tag :SoapObject}
[{:tag :ObjectData}
[{:tag :FieldName, :value "ID"}]
[{:tag :FieldValue, :value "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_1"}]
[{:tag :FieldValue, :value "Value_1a"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_2"}]
[{:tag :FieldValue, :value "Value_2a"}]]]
[{:tag :SoapObject}
[{:tag :ObjectData}
[{:tag :FieldName, :value "ID"}]
[{:tag :FieldValue, :value "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_1"}]
[{:tag :FieldValue, :value "Value_1b"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_2"}]
[{:tag :FieldValue, :value "Value_2b"}]]]])
(is= soapobj-hids [:0009 :0013])
and the final results:
(is= results
[{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1a",
"Attribute_2" "Value_2a"}
{"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1b",
"Attribute_2" "Value_2b"}]))))))
Further documentation is still in progress, but you can see API docs here and a live example of your problem here.
You can also compose transducers. I was reading the other day something on JUXT blog about creating xpath like functionality with transducers.
(def children (map :content))
(defn tagp [pred]
(filter (comp pred :tag)))
(defn tag= [tag-name]
(tagp (partial = tag-name)))
(def text (comp (mapcat :content) (filter string?)))
(defn fields [obj-datas]
(sequence (comp
(tag= :ObjectData)
(mapcat :content)
text)
obj-datas))
(defn clean [xml-map]
(let [fields-list (sequence (comp
(tag= :SoapObject)
children
(map fields))
xml-map)]
(map (partial apply hash-map) fields-list)))
No need for fancy tools here. You can get away with the simplest chunk of code.
(use '[plumbing.core])
(let [A ...your-data...]
(map (fn->> :content
(mapcat :content)
(mapcat :content)
(apply hash-map))
A))
I have a vector of maps wherein I need to remove the maps where the value of the name key is a duplicate, keeping the one that has the highest value of age. I have a solution but I don't think it looks clean. Is there a better way to do it without breaking it up into multiple functions?
Here is my data:
(def my-maps
[{:name "jess", :age 32}
{:name "ruxpin", :age 4}
{:name "jess", :age 35}
{:name "aero", :age 33}
{:name "banner", :age 4}])
Here is my solution:
(map first (vals (group-by :name (reverse (sort-by :name my-maps)))))
Result:
({:name "ruxpin", :age 4} {:name "jess", :age 35} {:name "banner", :age 4} {:name "aero", :age 33})
another way is the combination of group-by and max-key. The advantage of this method is that you don't need to sort your collection, and sort in turn has an impact on performance and if it can be avoided it should be.
(for [[_ vs] (group-by :name my-maps)]
(apply max-key :age vs))
;;=> ({:name "jess", :age 35}
;; {:name "ruxpin", :age 4}
;; {:name "aero", :age 33}
;; {:name "banner", :age 4})
short version
(->> my-set
(sort-by (juxt :name :age) #(compare %2 %1)) ; sort-by :name, :age in reverse order
(partition-by :name)
(map first))
a transducer version
(def xf (comp (partition-by :name) (map first)))
(->> my-set
(sort-by (juxt :name :age) #(compare %2 %1))
(into [] xf))
for large dataset, the transducer should be better
Your original solution was actually broken unfortunately. It just seemed to work because of the order you had the data in my-set in. Note how you never actually sort by age, so you can never guarantee what order the ages are in.
I solved this with another call to map:
(->> my-set (group-by :name)
(vals)
; Sort by age each list that group-by returns
(map #(sort-by :age %))
(map last)) ; This could also happen in the above map
Note how I'm sorting each :name group by :age, then I take the last of each grouping.
I would do it a little differently, using the max function instead of sorting:
(def my-maps
[{:name "jess", :age 32}
{:name "ruxpin", :age 4}
{:name "jess", :age 35}
{:name "aero", :age 33}
{:name "banner", :age 4}])
(dotest
(let [grouped-data (group-by :name my-maps)
name-age-maps (for [[name map-list] grouped-data]
(let [max-age (apply max
(map :age map-list))
name-age-map {name max-age}]
name-age-map))
final-result (reduce into {} name-age-maps)]
final-result))
with results:
grouped-data =>
{"jess" [{:name "jess", :age 32} {:name "jess", :age 35}],
"ruxpin" [{:name "ruxpin", :age 4}],
"aero" [{:name "aero", :age 33}],
"banner" [{:name "banner", :age 4}]}
name-age-maps =>
({"jess" 35} {"ruxpin" 4} {"aero" 33} {"banner" 4})
final-result =>
{"jess" 35, "ruxpin" 4, "aero" 33, "banner" 4}
Compare by vector fields with different weight and data type (size has more weight), size is descending, name is ascending:
(def some-vector [{:name "head" :size 3}
{:name "mouth" :size 1}
{:name "nose" :size 1}
{:name "neck" :size 2}
{:name "chest" :size 10}
{:name "back" :size 10}
{:name "abdomen" :size 6}
])
(->> (some-vector)
(sort #(compare (str (format "%3d" (:size %2)) (:name %1))
(str (format "%3d" (:size %1)) (:name %2))
)))
I have an Atom, like x:
(def x (atom {:name "A"
:id 1
:children [{:name "B"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}))
and need to update a submap like for example:
if :id is 2 , change :name to "Z"
resulting in an updated Atom:
{:name "A"
:id 1
:children [{:name "Z"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}
how can this be done?
You could do it with postwalk or prewalk from the clojure.walk namespace.
(def x (atom {:name "A"
:id 1
:children [{:name "B"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}))
(defn update-name [x]
(if (and (map? x) (= (:id x) 2))
(assoc x :name "Z")
x))
(swap! x (partial clojure.walk/postwalk update-name))
You could also use Zippers from the clojure.zip namespace
Find a working example here: https://gist.github.com/renegr/9493967
What's the idiomatic way of merging two lists of maps in Clojure where each map entry is identified by an id key?
What's an implementation for foo so that
(foo '({:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3})
'({:id 5 :value 5}
{:id 2 :value 2}
{:id 3 :value 3}
{:id 1 :value 1}
{:id 4 :value 4})) => '({:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}
{:id 4 :value 4}
{:id 5 :value 5})
is true?
(defn merge-by
"Merges elems in seqs by joining them on return value of key-fn k.
Example: (merge-by :id [{:id 0 :name \"George\"}{:id 1 :name \"Bernie\"}]
[{:id 2 :name \"Lara\"}{:id 0 :name \"Ben\"}])
=> [{:id 0 :name \"Ben\"}{:id 1 :name \"Bernie\"}{:id 2 :name \"Lara\"}]"
[k & seqs]
(->> seqs
(map (partial group-by k))
(apply merge-with (comp vector
(partial apply merge)
concat))
vals
(map first)))
How about this:
(defn foo [& colls]
(map (fn [[_ equivalent-maps]] (apply merge equivalent-maps))
(group-by :id (sort-by :id (apply concat colls)))))
This is generalized so that you can have an arbitrary number of input sequences, and an arbitrary grouping selector:
(def a [{:id 5 :value 5}
{:id 2 :value 2}
{:id 3 :value 3}
{:id 1 :value 1}
{:id 4 :value 4}])
(def b [{:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}])
(def c [{:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}
{:id 4 :value 4}
{:id 5 :value 5}])
(defn merge-vectors
[selector & sequences]
(let [unpack-grouped (fn [group]
(into {} (map (fn [[k [v & _]]] [k v]) group)))
grouped (map (comp unpack-grouped (partial group-by selector))
sequences)
merged (apply merge-with merge grouped)]
(sort-by selector (vals merged))))
(defn tst
[]
(= c
(merge-vectors :id a b)))