Removing nested values with Specter in Clojure - clojure

Suppose that I have a Clojure map like this:
(def mymap {:a [1 2 3] :b {:c [] :d [1 2 3]}})
I would like a function remove-empties that produces a new map in which entries from (:b mymap) that have an empty sequence as a value are removed. So (remove-empties mymap) would give the value:
{:a [1 2 3] :b {:d [1 2 3]}}
Is there a way to write a function to do this using Specter?

Here's how to do it with Specter:
(use 'com.rpl.specter)
(setval [:b MAP-VALS empty?] NONE my-map)
=> {:a [1 2 3], :b {:d [1 2 3]}}
In English, this says "Under :b, find all the map values that are empty?. Set them to NONE, i.e. remove them."

(update my-map :b (fn [b]
(apply dissoc b
(map key (filter (comp empty? val) b)))))

This is the specter solution:
(ns myns.core
(:require
[com.rpl.specter :as spc]))
(def my-map
{:a [1 2 3]
:b {:c []
:d [1 2 3]}})
(defn my-function
[path data]
(let [pred #(and (vector? %) (empty? %))]
(spc/setval [path spc/MAP-VALS pred] spc/NONE data)))
;; (my-function [:b] my-map) => {:a [1 2 3]
;; :b {:d [1 2 3]}}

I don't know specter either, but this is pretty simple to do in plain clojure.
(defn remove-empties [m]
(reduce-kv (fn [acc k v]
(cond (map? v) (let [new-v (remove-empties v)]
(if (seq new-v)
(assoc acc k new-v)
acc))
(empty? v) acc
:else (assoc acc k v)))
(empty m), m))
Caveat: For extremely nested data structures it might stack overflow.

So far I haven't found an approach with specter's filterer, because when I test filterers they seem to receive each map entry twice (once as a map entry and once as a 2-length vector) and giving different results between those seems to cause issues. However, we shouldn't be removing empty sequences anywhere they might appear, just map entries where they're the value.
I did seem to get a clojure.walk approach working that might still interest you, though.
(ns nested-remove
(:require [com.rpl.specter :as s]
[clojure.walk :refer [postwalk]]))
(defn empty-seq-entry? [entry]
(and (map-entry? entry) (sequential? (val entry)) (empty? (val entry))))
(defn remove-empties [root]
(postwalk #(if (map? %) (into (empty %) (remove empty-seq-entry? %)) %) root))
(remove-empties mymap) ;;=> {:a [1 2 3], :b {:d [1 2 3]}}

Assuming we only need to go one level deep and not search recursively like the accepted answer:
(setval [:b MAP-VALS empty?] NONE mymap)
A fully recursive solution that removes empty values in a map at any level
(def my-complex-map {:a [1] :b {:c [] :d [1 2 3] :e {:f "foo" :g []}}})
; declare recursive path that traverses map values
(declarepath DEEP-MAP-VALS)
(providepath DEEP-MAP-VALS (if-path map? [MAP-VALS DEEP-MAP-VALS] STAY))
(setval [DEEP-MAP-VALS empty?] NONE my-complex-map)
; => {:a [1], :b {:d [1 2 3, :e {:f "foo"}}}}
Reference the wiki on using specter recursively.

While I am not very familiar with Specter, in addition to the postwalk solution, you can solve this using tupelo.forest from the Tupelo library. You do need to rearrange the data a bit into Hiccup or Enlive format, then it's easy to identify any nodes with no child nodes:
(ns tst.clj.core
(:use clj.core tupelo.test)
(:require
[tupelo.core :as t]
[tupelo.forest :as tf] ))
(t/refer-tupelo)
(defn hid->enlive [hid]
(tf/hiccup->enlive (tf/hid->hiccup hid)))
(defn empty-kids?
[path]
(let [hid (last path)
result (and (tf/node-hid? hid)
(empty? (grab :kids (tf/hid->tree hid))))]
result))
; delete any nodes without children
(dotest
(tf/with-forest (tf/new-forest)
(let [e0 {:tag :root
:attrs {}
:content [{:tag :a
:attrs {}
:content [1 2 3]}
{:tag :b
:attrs {}
:content [{:tag :c
:attrs {}
:content []}
{:tag :d
:attrs {}
:content [1 2 3]}
]}]}
root-hid (tf/add-tree-enlive e0)
empty-paths (tf/find-paths-with root-hid [:** :*] empty-kids?)
empty-hids (mapv last empty-paths)]
(is= (hid->enlive root-hid) ; This is the original tree structure (Enlive format)
{:tag :root,
:attrs {},
:content
[{:tag :a,
:attrs {},
:content
[{:tag :tupelo.forest/raw, :attrs {}, :content [1]}
{:tag :tupelo.forest/raw, :attrs {}, :content [2]}
{:tag :tupelo.forest/raw, :attrs {}, :content [3]}]}
{:tag :b,
:attrs {},
:content
[{:tag :c, :attrs {}, :content []}
{:tag :d,
:attrs {},
:content
[{:tag :tupelo.forest/raw, :attrs {}, :content [1]}
{:tag :tupelo.forest/raw, :attrs {}, :content [2]}
{:tag :tupelo.forest/raw, :attrs {}, :content [3]}]}]}]})
(apply tf/remove-hid empty-hids) ; remove the nodes with no child nodes
(is= (hid->enlive root-hid) ; this is the result (Enlive format)
{:tag :root,
:attrs {},
:content
[{:tag :a,
:attrs {},
:content
[{:tag :tupelo.forest/raw, :attrs {}, :content [1]}
{:tag :tupelo.forest/raw, :attrs {}, :content [2]}
{:tag :tupelo.forest/raw, :attrs {}, :content [3]}]}
{:tag :b,
:attrs {},
:content
[{:tag :d,
:attrs {},
:content
[{:tag :tupelo.forest/raw, :attrs {}, :content [1]}
{:tag :tupelo.forest/raw, :attrs {}, :content [2]}
{:tag :tupelo.forest/raw, :attrs {}, :content [3]}]}]}]})
(is= (tf/hid->hiccup root-hid) ; same result in Hiccup format
[:root
[:a
[:tupelo.forest/raw 1]
[:tupelo.forest/raw 2]
[:tupelo.forest/raw 3]]
[:b
[:d
[:tupelo.forest/raw 1]
[:tupelo.forest/raw 2]
[:tupelo.forest/raw 3]]]])
)))

Related

Clojure: Transform nested maps into custom map keeping only specific attributes

I have a vector of maps (result of xml/parse) which contains the following vector of nested maps (I already got rid of some parts I don't want to keep):
[
{:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1a"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2a"]}
]}
]}
{:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}
]}
]}
]
Now I want to extract only some specific data from this structure, producing a result which looks like this:
[
{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1a",
"Attribute_2" "Value_1a"}
{"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1b",
"Attribute_2" "Value_1b"}
]
Which clojure tool could help me accomplish this?
I've found another question which is a bit similar, but whenever I tried some version of a map call the result I got was some kind of clojure.lang.LazySeq or clojure.core$map which I couldn't get to print properly to verify the result.
usually you can start from the bottom, gradually going up:
first you would like to parse the attr item:
(def first-content (comp first :content))
(defn get-attr [{[k v] :content}]
[(first-content k)
(first-content v)])
user> (get-attr {:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]})
;;=> ["ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]
then you would turn every item into a map of attrs:
(defn parse-item [item]
(into {} (map get-attr (:content item))))
(parse-item {:tag :SoapObject, :attrs nil, :content [
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}
]}
{:tag :ObjectData, :attrs nil, :content [
{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}
]}
]})
;;=> {"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1b", "Attribute_2" "Value_2b"}
so the last thing you need do, is to map over the top level form, producing the required result:
(mapv parse-item data)
;;=> [{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1a", "Attribute_2" "Value_2a"}
;; {"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1", "Attribute_1" "Value_1b", "Attribute_2" "Value_2b"}]
You can easily solve tree-based problems using the Tupelo Forest library. You can see a video introduction from last year's Clojure Conj here.
For your problem, I'd approach it as follows. First, the data:
(dotest
(let [data-enlive
{:tag :root
:attrs nil
:content
[{:tag :SoapObject, :attrs nil,
:content
[{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1a"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2a"]}]}]}
{:tag :SoapObject, :attrs nil,
:content
[{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["ID"]}
{:tag :FieldValue, :attrs nil, :content ["90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_1"]}
{:tag :FieldValue, :attrs nil, :content ["Value_1b"]}]}
{:tag :ObjectData, :attrs nil,
:content [{:tag :FieldName, :attrs nil, :content ["Attribute_2"]}
{:tag :FieldValue, :attrs nil, :content ["Value_2b"]}]}]}]}]
and then the code
(with-debug-hid
(with-forest (new-forest)
(let [root-hid (add-tree-enlive data-enlive)
soapobj-hids (find-hids root-hid [:root :SoapObject])
objdata->map (fn [objdata-hid]
(let [fieldname-node (hid->node (find-hid objdata-hid [:ObjectData :FieldName]))
fieldvalue-node (hid->node (find-hid objdata-hid [:ObjectData :FieldValue]))]
{ (grab :value fieldname-node) (grab :value fieldvalue-node) }))
soapobj->map (fn [soapobj-hid]
(apply glue
(for [objdata-hid (hid->kids soapobj-hid)]
(objdata->map objdata-hid))))
results (mapv soapobj->map soapobj-hids)]
with intermediate results:
(is= (hid->bush root-hid)
[{:tag :root}
[{:tag :SoapObject}
[{:tag :ObjectData}
[{:tag :FieldName, :value "ID"}]
[{:tag :FieldValue, :value "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_1"}]
[{:tag :FieldValue, :value "Value_1a"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_2"}]
[{:tag :FieldValue, :value "Value_2a"}]]]
[{:tag :SoapObject}
[{:tag :ObjectData}
[{:tag :FieldName, :value "ID"}]
[{:tag :FieldValue, :value "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_1"}]
[{:tag :FieldValue, :value "Value_1b"}]]
[{:tag :ObjectData}
[{:tag :FieldName, :value "Attribute_2"}]
[{:tag :FieldValue, :value "Value_2b"}]]]])
(is= soapobj-hids [:0009 :0013])
and the final results:
(is= results
[{"ID" "8d8edbb6-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1a",
"Attribute_2" "Value_2a"}
{"ID" "90e39036-cb0f-11e8-a8d5-f2801f1b9fd1",
"Attribute_1" "Value_1b",
"Attribute_2" "Value_2b"}]))))))
Further documentation is still in progress, but you can see API docs here and a live example of your problem here.
You can also compose transducers. I was reading the other day something on JUXT blog about creating xpath like functionality with transducers.
(def children (map :content))
(defn tagp [pred]
(filter (comp pred :tag)))
(defn tag= [tag-name]
(tagp (partial = tag-name)))
(def text (comp (mapcat :content) (filter string?)))
(defn fields [obj-datas]
(sequence (comp
(tag= :ObjectData)
(mapcat :content)
text)
obj-datas))
(defn clean [xml-map]
(let [fields-list (sequence (comp
(tag= :SoapObject)
children
(map fields))
xml-map)]
(map (partial apply hash-map) fields-list)))
No need for fancy tools here. You can get away with the simplest chunk of code.
(use '[plumbing.core])
(let [A ...your-data...]
(map (fn->> :content
(mapcat :content)
(mapcat :content)
(apply hash-map))
A))

Cleaner Way to Sort and Order a Vector of Maps in Clojure?

I have a vector of maps wherein I need to remove the maps where the value of the name key is a duplicate, keeping the one that has the highest value of age. I have a solution but I don't think it looks clean. Is there a better way to do it without breaking it up into multiple functions?
Here is my data:
(def my-maps
[{:name "jess", :age 32}
{:name "ruxpin", :age 4}
{:name "jess", :age 35}
{:name "aero", :age 33}
{:name "banner", :age 4}])
Here is my solution:
(map first (vals (group-by :name (reverse (sort-by :name my-maps)))))
Result:
({:name "ruxpin", :age 4} {:name "jess", :age 35} {:name "banner", :age 4} {:name "aero", :age 33})
another way is the combination of group-by and max-key. The advantage of this method is that you don't need to sort your collection, and sort in turn has an impact on performance and if it can be avoided it should be.
(for [[_ vs] (group-by :name my-maps)]
(apply max-key :age vs))
;;=> ({:name "jess", :age 35}
;; {:name "ruxpin", :age 4}
;; {:name "aero", :age 33}
;; {:name "banner", :age 4})
short version
(->> my-set
(sort-by (juxt :name :age) #(compare %2 %1)) ; sort-by :name, :age in reverse order
(partition-by :name)
(map first))
a transducer version
(def xf (comp (partition-by :name) (map first)))
(->> my-set
(sort-by (juxt :name :age) #(compare %2 %1))
(into [] xf))
for large dataset, the transducer should be better
Your original solution was actually broken unfortunately. It just seemed to work because of the order you had the data in my-set in. Note how you never actually sort by age, so you can never guarantee what order the ages are in.
I solved this with another call to map:
(->> my-set (group-by :name)
(vals)
; Sort by age each list that group-by returns
(map #(sort-by :age %))
(map last)) ; This could also happen in the above map
Note how I'm sorting each :name group by :age, then I take the last of each grouping.
I would do it a little differently, using the max function instead of sorting:
(def my-maps
[{:name "jess", :age 32}
{:name "ruxpin", :age 4}
{:name "jess", :age 35}
{:name "aero", :age 33}
{:name "banner", :age 4}])
(dotest
(let [grouped-data (group-by :name my-maps)
name-age-maps (for [[name map-list] grouped-data]
(let [max-age (apply max
(map :age map-list))
name-age-map {name max-age}]
name-age-map))
final-result (reduce into {} name-age-maps)]
final-result))
with results:
grouped-data =>
{"jess" [{:name "jess", :age 32} {:name "jess", :age 35}],
"ruxpin" [{:name "ruxpin", :age 4}],
"aero" [{:name "aero", :age 33}],
"banner" [{:name "banner", :age 4}]}
name-age-maps =>
({"jess" 35} {"ruxpin" 4} {"aero" 33} {"banner" 4})
final-result =>
{"jess" 35, "ruxpin" 4, "aero" 33, "banner" 4}
Compare by vector fields with different weight and data type (size has more weight), size is descending, name is ascending:
(def some-vector [{:name "head" :size 3}
{:name "mouth" :size 1}
{:name "nose" :size 1}
{:name "neck" :size 2}
{:name "chest" :size 10}
{:name "back" :size 10}
{:name "abdomen" :size 6}
])
(->> (some-vector)
(sort #(compare (str (format "%3d" (:size %2)) (:name %1))
(str (format "%3d" (:size %1)) (:name %2))
)))

Vector expected in Clojure?

Currently I am getting this error.
Parameter declaration symmetrize-body-parts should be a vector,
compiling:(braveclojure/core.clj:41:49)
My core.clj file
(ns braveclojure.core
(:gen-class))
(def asym-hobbit-body-parts [{:name "head" :size 3}
{:name "left-eye" :size 1}
{:name "left-ear" :size 1}
{:name "mouth" :size 1}
{:name "nose" :size 1}
{:name "neck" :size 2}
{:name "left-shoulder" :size 3}
{:name "left-upper-arm" :size 3}
{:name "chest" :size 10}
{:name "back" :size 10}
{:name "left-forearm" :size 3}
{:name "abdomen" :size 6}
{:name "left-kidney" :size 1}
{:name "left-hand" :size 2}
{:name "left-knee" :size 2}
{:name "left-thigh" :size 4}
{:name "left-lower-leg" :size 3}
{:name "left-achilles" :size 1}
{:name "left-foot" :size 2}])
(defn needs-matching-part? [part]
(re-find #"^left-" (:name part)))
(defn make-matching-part [part] {:name (clojure.string/replace (:name part) #"^left-" "right-")
:size (:size part)})
(defn symmetrize-body-parts
"Expects to see a seq of maps which have a :name and :size"
[asym-body-parts]
(loop [remaining-asym-parts asym-body-parts
final-body-parts []]
(if (empty? remaining-asym-parts)
final-body-parts
(let [[part & remaining] remaining-asym-parts
final-body-parts (conj final-body-parts part)]
(if (needs-matching-part? part)
(recur remaining (conj final-body-parts (make-matching-part part)))
(recur remaining final-body-parts))))))
(defn -main (symmetrize-body-parts asym-hobbit-body-parts))
I use lein run in the clojure app folder but it is raising the above error. I have switched different things around in regards to the parameter brackets [] location for the symmetrize-body-parts function definition from above to below the docstring as well but to no avail. What is going on here?
Following the tutorial from here.
Try:
(defn -main []
(symmetrize-body-parts asym-hobbit-body-parts))

Update hierarchical / tree structure in Clojure

I have an Atom, like x:
(def x (atom {:name "A"
:id 1
:children [{:name "B"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}))
and need to update a submap like for example:
if :id is 2 , change :name to "Z"
resulting in an updated Atom:
{:name "A"
:id 1
:children [{:name "Z"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}
how can this be done?
You could do it with postwalk or prewalk from the clojure.walk namespace.
(def x (atom {:name "A"
:id 1
:children [{:name "B"
:id 2
:children []}
{:name "C"
:id 3
:children [{:name "D" :id 4 :children []}]}]}))
(defn update-name [x]
(if (and (map? x) (= (:id x) 2))
(assoc x :name "Z")
x))
(swap! x (partial clojure.walk/postwalk update-name))
You could also use Zippers from the clojure.zip namespace
Find a working example here: https://gist.github.com/renegr/9493967

Merging two lists of maps where entries are identified by id in Clojure

What's the idiomatic way of merging two lists of maps in Clojure where each map entry is identified by an id key?
What's an implementation for foo so that
(foo '({:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3})
'({:id 5 :value 5}
{:id 2 :value 2}
{:id 3 :value 3}
{:id 1 :value 1}
{:id 4 :value 4})) => '({:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}
{:id 4 :value 4}
{:id 5 :value 5})
is true?
(defn merge-by
"Merges elems in seqs by joining them on return value of key-fn k.
Example: (merge-by :id [{:id 0 :name \"George\"}{:id 1 :name \"Bernie\"}]
[{:id 2 :name \"Lara\"}{:id 0 :name \"Ben\"}])
=> [{:id 0 :name \"Ben\"}{:id 1 :name \"Bernie\"}{:id 2 :name \"Lara\"}]"
[k & seqs]
(->> seqs
(map (partial group-by k))
(apply merge-with (comp vector
(partial apply merge)
concat))
vals
(map first)))
How about this:
(defn foo [& colls]
(map (fn [[_ equivalent-maps]] (apply merge equivalent-maps))
(group-by :id (sort-by :id (apply concat colls)))))
This is generalized so that you can have an arbitrary number of input sequences, and an arbitrary grouping selector:
(def a [{:id 5 :value 5}
{:id 2 :value 2}
{:id 3 :value 3}
{:id 1 :value 1}
{:id 4 :value 4}])
(def b [{:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}])
(def c [{:id 1 :bar true :value 1}
{:id 2 :bar false :value 2}
{:id 3 :value 3}
{:id 4 :value 4}
{:id 5 :value 5}])
(defn merge-vectors
[selector & sequences]
(let [unpack-grouped (fn [group]
(into {} (map (fn [[k [v & _]]] [k v]) group)))
grouped (map (comp unpack-grouped (partial group-by selector))
sequences)
merged (apply merge-with merge grouped)]
(sort-by selector (vals merged))))
(defn tst
[]
(= c
(merge-vectors :id a b)))