Merge two complex data structures - clojure

I'm having trouble finding solution to the following problem:
Lets say I have a map:
(def defaults {
:name "John"
:surname "Doe"
:info {:date-of-birth "01-01-1980"
:registered [{:type "newsletter" :name "breaking news" }]}
})
And then I pass a similar structured map but I want to conjoin the vectors and overwrite the rest of the keys:
(def new {
:name "Peter"
:info {:date-of-birth "11-01-1986"
:registered [{:type "alert" :name "mobile-alert" }]}
})
And I want this result:
{:name "Peter"
:surname "Doe"
:info {:date-of-birth "11-01-1986"
:registered [{:type "newsletter" :name "breaking news" }
{:type "alert" :name "mobile-alert" }]}}
Now I can do this easily by using static syntax like:
(reduce conj (get-in defaults [:info :registered]) (get-in new [:info :registered]))
(There is probably a better way...) But I was hoping more of a dynamic function with the following properties:
Keep all keys from both maps, without knowing the structure
Update any keys with the values from the right map
if the val of a key is a vector, then conj the vector with the vector of the right map (if the appropriate key exists of course)
Thanks for the help in advance :)

You should definitely look at merge-with function. This is possible implementation:
(defn deep-merge [a b]
(merge-with (fn [x y]
(cond (map? y) (deep-merge x y)
(vector? y) (concat x y)
:else y))
a b))

Here's a possible implementation for this kind of functionality. It is at least a starting point, you may need some extra validations depending on the possible structure of your data (e.g. what if the overriding map's value is a vector but the value in the default map is not even a collection?).
(declare merge-maps)
(defn merge-map [x [k v]]
(cond (vector? v)
(assoc x k (vec (reduce conj (x k) v)))
(map? v)
(assoc x k (merge-maps (x k) v))
:esle
(assoc x k v)))
(defn merge-maps [x y]
(reduce merge-map x y))
(merge-maps defaults new)
;= {:info {:date-of-birth "11-01-1986",
;= :registered [{:name "breaking news", :type "newsletter"}
;= {:name "mobile-alert", :type "alert"}]},
;= :name "Peter",
;= :surname "Doe"}

Related

How to transform a list of maps to a nested map of maps?

Getting data from the database as a list of maps (LazySeq) leaves me in need of transforming it into a map of maps.
I tried to 'assoc' and 'merge', but that didn't bring the desired result because of the nesting.
This is the form of my data:
(def data (list {:structure 1 :cat "A" :item "item1" :val 0.1}
{:structure 1 :cat "A" :item "item2" :val 0.2}
{:structure 1 :cat "B" :item "item3" :val 0.4}
{:structure 2 :cat "A" :item "item1" :val 0.3}
{:structure 2 :cat "B" :item "item3" :val 0.5}))
I would like to get it in the form
=> {1 {"A" {"item1" 0.1}
"item2" 0.2}}
{"B" {"item3" 0.4}}
2 {"A" {"item1" 0.3}}
{"B" {"item3" 0.5}}}
I tried
(->> data
(map #(assoc {} (:structure %) {(:cat %) {(:item %) (:val %)}}))
(apply merge-with into))
This gives
{1 {"A" {"item2" 0.2}, "B" {"item3" 0.4}},
2 {"A" {"item1" 0.3}, "B" {"item3" 0.5}}}
By merging I lose some entries, but I can't think of any other way. Is there a simple way? I was even about to try to use specter.
Any thoughts would be appreciated.
If I'm dealing with nested maps, first stop is usually to think about update-in or assoc-in - these take a sequence of the nested keys. For a problem like this where the data is very regular, it's straightforward.
(assoc-in {} [1 "A" "item1"] 0.1)
;; =>
{1 {"A" {"item1" 0.1}}}
To consume a sequence into something else, reduce is the idiomatic choice. The reducing function is right on the edge of the complexity level I'd consider an anonymous fn for, so I'll pull it out instead for clarity.
(defn- add-val [acc line]
(assoc-in acc [(:structure line) (:cat line) (:item line)] (:val line)))
(reduce add-val {} data)
;; =>
{1 {"A" {"item1" 0.1, "item2" 0.2}, "B" {"item3" 0.4}},
2 {"A" {"item1" 0.3}, "B" {"item3" 0.5}}}
Which I think was the effect you were looking for.
Roads less travelled:
As your sequence is coming from a database, I wouldn't worry about using a transient collection to speed the aggregation up. Also, now I think about it, dealing with nested transient maps is a pain anyway.
update-in would be handy if you wanted to add up any values with the same key, for example, but the implication of your question is that structure/cat/item tuples are unique and so you just need the grouping.
juxt could be used to generate the key structure - i.e.
((juxt :structure :cat :item) (first data))
[1 "A" "item1"]
but it's not clear to me that there's any way to use this to make the add-val fn more readable.
You may continue to use your existing code. Only the final merge has to change:
(defn deep-merge [& xs]
(if (every? map? xs)
(apply merge-with deep-merge xs)
(apply merge xs)))
(->> data
(map #(assoc {} (:structure %) {(:cat %) {(:item %) (:val %)}}))
(apply deep-merge))
;; =>
{1
{"A" {"item1" 0.1, "item2" 0.2},
"B" {"item3" 0.4}},
2
{"A" {"item1" 0.3},
"B" {"item3" 0.5}}}
Explanation: your original (apply merge-with into) only merge one level down. deep-merge from above will recurse into all nested maps to do the merge.
Addendum: #pete23 - one use of juxt I can think of is to make the function reusable. For example, we can extract arbitrary fields with juxt, then convert them to nested maps (with yet another function ->nested) and finally do a deep-merge:
(->> data
(map (juxt :structure :cat :item :val))
(map ->nested)
(apply deep-merge))
where ->nested can be implemented like:
(defn ->nested [[k & [v & r :as t]]]
{k (if (seq r) (->nested t) v)})
(->nested [1 "A" "item1" 0.1])
;; => {1 {"A" {"item1" 0.1}}}
One sample application (sum val by category):
(let [ks [:cat :val]]
(->> data
(map (apply juxt ks))
(map ->nested)
(apply (partial deep-merge-with +))))
;; => {"A" 0.6000000000000001, "B" 0.9}
Note deep-merge-with is left as an exercise for our readers :)
(defn map-values [f m]
(into {} (map (fn [[k v]] [k (f v)])) m))
(defn- transform-structures [ss]
(map-values (fn [cs]
(into {} (map (juxt :item :val) cs))) (group-by :cat ss)))
(defn transform [data]
(map-values transform-structures (group-by :structure data)))
then
(transform data)

clojure find arbitrarily nested key

Is there an easy way in Clojure (maybe using specter) to filter collections depending on whether the an arbitrarily nested key with a known name contains an element ?
Ex. :
(def coll [{:res [{:a [{:thekey [
"the value I am looking for"
...
]
}
]}
{:res ...}
{:res ...}
]}])
Knowing that :a could have a different name, and that :thekey could be nested somewhere else.
Let's say I would like to do :
#(find-nested :thekey #{"the value I am looking for"} coll) ;; returns a vector containing the first element in coll (and maybe others)
use zippers.
in repl:
user> coll
[{:res [{:a [{:thekey ["the value I am looking for"]}]} {:res 1} {:res 1}]}]
user> (require '[clojure.zip :as z])
nil
user> (def cc (z/zipper coll? seq nil coll))
#'user/cc
user> (loop [x cc]
(if (= (z/node x) :thekey)
(z/node (z/next x))
(recur (z/next x))))
["the value I am looking for"]
update:
this version is flawed, since it doesn't care about :thekey being the key in a map, or just keyword in a vector, so it would give unneeded result for coll [[:thekey [1 2 3]]]. Here is an updated version:
(defn lookup-key [k coll]
(let [coll-zip (z/zipper coll? #(if (map? %) (vals %) %) nil coll)]
(loop [x coll-zip]
(when-not (z/end? x)
(if-let [v (-> x z/node k)] v (recur (z/next x)))))))
in repl:
user> (lookup-key :thekey coll)
["the value I am looking for"]
user> (lookup-key :absent coll)
nil
lets say we have the same keyword somewhere in a vector in a coll:
(def coll [{:res [:thekey
{:a [{:thekey ["the value I am looking for"]}]}
{:res 1} {:res 1}]}])
#'user/coll
user> (lookup-key :thekey coll)
["the value I am looking for"]
which is what we need.

Alternatives to repeated use of partial when using clojure's `comp`

Given a collection"
[{:key "key_1" :value "value_1"}, {:key "key_2" :value "value_2"}]
I would like to convert this to:
{"key_1" "value_1" "key_2" "value_2"}
An function to do this would be:
(defn long->wide [xs]
(apply hash-map (flatten (map vals xs))))
I might simplify this using the threading macro:
(defn long->wide [xs]
(->> xs
(map vals)
(flatten)
(apply hash-map)))
This still requires explicit definition of the function argument which I am not doing anything with other than passing to the first function. I might then rewrite this using comp to remove this:
(def long->wide
(comp (partial apply hash-map) flatten (partial map vals)))
This however requires repeated use of partial which to me is a lot of noise in the function.
Is there a some function in clojure that combines comp and ->> so I can create a higher order function without repeated use of partial, and also which out having to create a new function?
Since many of the answers here already don't answer the original question, but
suggest different approaches, I put that one back up too.
I'd go with reduce and destructuring:
(reduce
(fn [m {:keys [key value]}]
(assoc m key value))
{}
[{:key "key_1" :value "value_1"}, {:key "key_2" :value "value_2"}])
Note, that this will also work with string keys (which you mentioned in the comments) (note :strs):
(reduce
(fn [m {:strs [key value]}]
(assoc m key value))
{}
[{"key" "key_1" "value" "value_1"}, {"key" "key_2" "value" "value_2"}])
Another (point-free) version, when using keywords:
(partial (into {} (map (juxt :key :value))))
Since you mentioned in the comments, that you are using values from a DB, there might also be the chance, that you can switch to just return value tuples. Then the whole operation is just:
(into {} [["key_1" "value_1"]["key_2" "value_2"]])
Also note, that the use of vals on a map and expecting "insertion order" is
dangerous. Small maps are ordered only by accident:
user=> (take 3 (zipmap (range 3) (range 3)))
([0 0] [1 1] [2 2])
user=> (take 3 (zipmap (range 100) (range 100)))
([0 0] [65 65] [70 70])
An other alternative to the nice answers is also:
(apply hash-map (mapcat vals [{:key "key_1" :value "value_1"}, {:key "key_2" :value "value_2"}]))
or:
((comp #(apply hash-map %) #(mapcat vals %)) [{:key "key_1" :value "value_1"}, {:key "key_2" :value "value_2"}])
which are exactly the same.
As with clojure, so many ways to solve most problems.
(partial #(reduce (fn [r m] (assoc r (m :key) (m :value)))
{}
%)))
Not sure if the creation of anonymous functions violates your condition or not but this isn't adding functions to the namespace so I thought I'd throw it out there. This also has the benefit of not requiring the keys in the input maps to be keywords as :key and :value can be replaced with values of any type since the map is in the function position. For example:
(partial #(reduce (fn [r m] (assoc r (m "key") (m "value")))
{}
%)))

clojure way to update a map inside a vector

What is the clojure way to update a map inside a vector e.g. if I have something like this, assuming each map has unique :name
(def some-vec
[{:name "foo"
....}
{:name "bar"
....}
{:name "baz"
....}])
I want to update the map in someway if it has :name equal to foo. Currently I'm using map, like this
(map (fn [{:keys [name] :as value}]
(if-not (= name "foo")
value
(do-something .....))) some-vec)
But this will loop through the entire vector even though I only update one item.
Keep the data as a map instead of a vector of map-records, keyed by :name.
(def some-data
{"foo" {:name "foo" :other :stuff}
"bar" {:name "bar" :other :stuff}
"baz" {:name "baz" :other :stuff}})
Then
(assoc-in some-data ["bar" :other] :things)
produces
{"foo" {:other :stuff, :name "foo"},
"bar" {:other :things, :name "bar"},
"baz" {:other :stuff, :name "baz"}}
in one go.
You can capture the basic manipulation in
(defn assoc-by-fn [data keyfn datum]
(assoc data (keyfn datum) datum))
When, for example,
(assoc-by-fn some-data :name {:name "zip" :other :fassner})
produces
{"zip" {:other :fassner, :name "zip"},
"foo" {:other :stuff, :name "foo"},
"bar" {:other :stuff, :name "bar"},
"baz" {:other :stuff, :name "baz"}}
Given that you have a vector of maps, your code looks fine to me. Your concern about "looping through the entire vector" is a natural consequence of the fact that you're doing a linear search for the :name and the fact that vectors are immutable.
I wonder whether what you really want is a vector of maps? Why not a map of maps?
(def some-map
{"foo" {...}
"bar" (...}
"baz" {...}}
Which you could then update with update-in?
Given this shape of the input data and unless you have an index that can tell you which indices the maps with a given value of :name reside at, you will have to loop over the entire vector. You can, however, minimize the amount of work involved in producing the updated vector by only "updating" the matching maps, rather than rebuilding the entire vector:
(defn update-values-if
"Assumes xs is a vector. Will update the values for which
pred returns true."
[xs pred f]
(let [lim (count xs)]
(loop [xs xs i 0]
(if (< i lim)
(let [x (nth xs i)]
(recur (if (pred x)
(assoc xs i (f x))
xs)
(inc i)))
xs))))
This will perform as many assoc operations as there are values in xs for which pred returns a truthy value.
Example:
(def some-vec [{:name "foo" :x 0} {:name "bar" :x 0} {:name "baz" :x 0}])
(update-values-if some-vec #(= "foo" (:name %)) #(update-in % [:x] inc))
;= [{:name "foo", :x 1} {:name "bar", :x 0} {:name "baz", :x 0}]
Of course if you're planning to transform the vector in this way with some regularity, then Thumbnail's and Paul's suggestion to use a map of maps will be a much more significant improvement. That remains the case if :name doesn't uniquely identify the maps – in that case, you could simply transform your original vector using frequencies and deal with a map of vectors (of maps with a given :name).
If you're working with vector, you should know index of element that you want to change, otherwise you have to traverse it in some way.
I can propose this solution:
(defn my-update [coll val fnc & args]
(let [index (->> (map-indexed vector coll)
(filter (fn [[_ {x :name}]] (= x val)))
ffirst)]
(when index
(apply update-in coll [index] fnc args))))
Where:
coll - given collection of maps;
val - value of field :name;
fnc - updating function;
args - arguments of the updating function.
Let's try it:
user> (def some-vec
[{:name "foo"}
{:name "bar"}
{:name "baz"}])
;; => #'user/some-vec
user> (my-update some-vec "foo" assoc :boo 12)
;; => [{:name "foo", :boo 12} {:name "bar"} {:name "baz"}]
user> (my-update some-vec "bar" assoc :wow "wow!")
;; => [{:name "foo"} {:name "bar", :wow "wow!"} {:name "baz"}]
I think that Thumbnail's answer may be quite useful for you. If you can keep your data as a map, these manipulations become much easier. Here is how you can transform your vector into a map:
user> (apply hash-map (interleave (map :name some-vec) some-vec))
;; => {"foo" {:name "foo"}, "bar" {:name "bar"}, "baz" {:name "baz"}}

applying function in a map to each value in a map based on its key

Suppose I have a map:
{:name "foo"
:age "bar"}
And another one
{:name (fn [val] (println val))
:age (fn [val] (= val "bar"))}
I want to apply function keyed by :name on second map to the first map, which also keyed by :name and the function keyed by :age to the first map which keyed by :age. How to do this the clojure way?
You can use merge-with
(def m1 {:name "foo"
:age "bar"})
(def m2 {:name (fn [val] (println val))
:age (fn [val] (= val "bar"))})
user=> (merge-with #(%1 %2) m2 m1)
foo
{:name nil, :age true}
map over one map and get corresponding function from the other one.
(def m1 {:name "foo"
:age "bar"})
(def m2 {:name (fn [val] (println val))
:age (fn [val] (= val "bar"))})
(map (fn [[k v]]
((get m2 k) v))
m1)
Each iteration over the map passes a vector to the function, in your sample:
[:name "foo"]
[:age "bar"]
So destructuring the function parameter into [[k v]] gives you each key/value separately.
(def data { :name "don knotts"
:dob "1/1/1940"
:cob "Valdosta" })
(def fxns {:name identity :dob identity :cob clojure.string/reverse})
(defn bmap [data fxn]
(apply merge (for [[k1 d] data [k2 f] fxn :when (= k1 k2)]
{k1 (f d)})))
;=user>{:cob "atsodlaV", :dob "1/1/1940", :name "don knotts"}
I like this, if you need more resilience:
(defn fmm [m fm]
(let [f (fn [k] ((get fm k identity) (k m)))
ks (keys m)]
(zipmap ks (map f ks))))