Clojure: Deep-merge hash maps with dot-separated string keys - clojure

I'm looking for a function to achieve the following example result:
{"foo1" "baz"
"foo2.bar" "baz"
"foo2.bar2" "baz"
"foo3_bar" "baz"}
=>
{:foo1 "baz"
:foo2 {:bar "baz"
:bar2 "baz"}
:foo3 {:bar "baz"}}
As one can see, it's a bit different from a classic deep-merge as the keys have to be keywordized first in a way that dot- and underscore postfixes are converted to hash maps (instead of the usual #[_\.]=> -).

(defn parse-keys-and-merge
[hm]
(reduce-kv (fn [hm k v]
(assoc-in hm (map keyword (clojure.string/split k #"[\._]"))
(if (map? v)
(parse-keys-and-merge v)
v)))
{} hm))
This does not work for your hash-map because your hash-map does not clarify whether the entry for :foo should be "baz" or {:bar "baz", :bar2 "baz"}. With a fixed hash-map it works:
(parse-keys-and-merge {"foo2_bar" "baz", "foo.bar2" "baz", "foo.bar" "baz"})
;; {:foo {:bar "baz", :bar2 "baz"}, :foo2 {:bar "baz"}}

With inspiration from #lgrapenthin I came up for this solution. It is on the upside short and concise and on the downside expensive (which is not to bad for my use case) and the overwriting strategy is determined by Clojure's hash map sorting (aka for user's undetermined):
(defn- deep-merge [& maps]
(if (every? map? maps)
(apply merge-with deep-merge maps)
(last maps)))
(defn- str-keys-to-map [[k v]]
(let [ks (map keyword (filter not-empty (string/split k #"[\._]")))]
(when-not (empty? ks) (assoc-in {} ks v))))
(defn deep-keywordize-keys [m]
(->> m (map str-keys-to-map) (apply deep-merge)))

You could use a function like this one.
Please note that it could be optimized to do tail recursion.
(defn deep-hashmap-merge
[ m ]
(let
[
tget (fn [r k d]
(let
[ t (get r k d)]
(if (associative? t) t d)))
get-keylist-value (fn [r [k & ks] kv]
(if (nil? ks)
(assoc r k kv)
(assoc r k (get-keylist-value (tget r k {}) ks kv))))
]
(reduce #(get-keylist-value %1 (map keyword (clojure.string/split (first %2) #"[_\.]")) ( second %2)) {} m)
)
)
And the output would then be :
user=> (deep-hashmap-merge
#_=> {"foo" "baz"
#_=> "foo.bar" "baz"
#_=> "foo.bar2" "baz"
#_=> "foo2_bar" "baz"})
{:foo {:bar "baz", :bar2 "baz"}, :foo2 {:bar "baz"}}

Related

Using clojure, Is there a better way to to remove a item from a sequence, which is the value in a map?

There is a map containing sequences. The sequences contain items.
I want to remove a given item from any sequence that contains it.
The solution I found does what it should, but I wonder if there is a better
or more elegant way to achieve the same.
my current solution:
(defn remove-item-from-map-value [my-map item]
(apply merge (for [[k v] my-map] {k (remove #(= item %) v)})))
The test describe the expected behaviour:
(require '[clojure.test :as t])
(def my-map {:keyOne ["itemOne"]
:keyTwo ["itemTwo" "itemThree"]
:keyThree ["itemFour" "itemFive" "itemSix"]})
(defn remove-item-from-map-value [my-map item]
(apply merge (for [[k v] my-map] {k (remove #(= item %) v)})))
(t/is (= (remove-item-from-map-value my-map "unknown-item") my-map))
(t/is (= (remove-item-from-map-value my-map "itemFive") {:keyOne ["itemOne"]
:keyTwo ["itemTwo" "itemThree"]
:keyThree ["itemFour" "itemSix"]}))
(t/is (= (remove-item-from-map-value my-map "itemThree") {:keyOne ["itemOne"]
:keyTwo ["itemTwo"]
:keyThree ["itemFour" "itemFive" "itemSix"]}))
(t/is (= (remove-item-from-map-value my-map "itemOne") {:keyOne []
:keyTwo ["itemTwo" "itemThree"]
:keyThree ["itemFour" "itemFive" "itemSix"]}))
I'm fairly new to clojure and am interested in different solutions.
So any input is welcome.
I throw in the specter
version for good measure. It keeps the vectors inside the map
and it's really compact.
(setval [MAP-VALS ALL #{"itemFive"}] NONE my-map)
Example
user=> (use 'com.rpl.specter)
nil
user=> (def my-map {:keyOne ["itemOne"]
#_=> :keyTwo ["itemTwo" "itemThree"]
#_=> :keyThree ["itemFour" "itemFive" "itemSix"]})
#_=>
#'user/my-map
user=> (setval [MAP-VALS ALL #{"itemFive"}] NONE my-map)
{:keyOne ["itemOne"],
:keyThree ["itemFour" "itemSix"],
:keyTwo ["itemTwo" "itemThree"]}
user=> (setval [MAP-VALS ALL #{"unknown"}] NONE my-map)
{:keyOne ["itemOne"],
:keyThree ["itemFour" "itemFive" "itemSix"],
:keyTwo ["itemTwo" "itemThree"]}
i would go with something like this:
user> (defn remove-item [my-map item]
(into {}
(map (fn [[k v]] [k (remove #{item} v)]))
my-map))
#'user/remove-item
user> (remove-item my-map "itemFour")
;;=> {:keyOne ("itemOne"),
;; :keyTwo ("itemTwo" "itemThree"),
;; :keyThree ("itemFive" "itemSix")}
you could also make up a handy function map-val performing mapping on map values:
(defn map-val [f data]
(reduce-kv
(fn [acc k v] (assoc acc k (f v)))
{} data))
or shortly like this:
(defn map-val [f data]
(reduce #(update % %2 f) data (keys data)))
user> (map-val inc {:a 1 :b 2})
;;=> {:a 2, :b 3}
(defn remove-item [my-map item]
(map-val (partial remove #{item}) my-map))
user> (remove-item my-map "itemFour")
;;=> {:keyOne ("itemOne"),
;; :keyTwo ("itemTwo" "itemThree"),
;; :keyThree ("itemFive" "itemSix")}
I think your solution is mostly okay, but I would try to avoid the apply merge part, as you can easily recreate a map from a sequence with into. Also, you could also use map instead of for which I think is a little bit more idiomatic in this case as you don't use any of the list comprehension features of for.
(defn remove-item-from-map-value [m item]
(->> m
(map (fn [[k vs]]
{k (remove #(= item %) vs)}))
(into {})))
Another solution much like #leetwinski:
(defn remove-item [m i]
(zipmap (keys m)
(map (fn [v] (remove #(= % i) v))
(vals m))))
Here's a one-liner which does this in an elegant way. The perfect function for me to use in this scenario is clojure.walk/prewalk. What this fn does is it traverse all of the sub-forms of the form that you pass to it and it transforms them with the provided fn:
(defn remove-item-from-map-value [data item]
(clojure.walk/prewalk #(if (map-entry? %) [(first %) (remove #{item} (second %))] %) data))
What the remove-item-from-map-value fn will do is it will check if current form is a map entry and if so, it will remove specified key from its value (second element of the map entry, which is a vector containing a key and a value, respectively).
The best this about this approach is that is is completely extendable: you could decide to do different things for different types of forms, you can also handle nested forms, etc.
It took me some time to master this fn but once I got it I found it extremely useful!

Is there a short form for creating hash-map in Clojure?

Is there a short form/macro that allows me to do
(defn f [a b c]
{a b c})
instead of
(defn f [a b c]
{:a a :b b :c c})
(defmacro as-map [& syms]
(zipmap (map keyword syms) syms))
Usage:
(def a 42)
(def b :foo)
(as-map a b)
;;-> {:a 42 :b :foo}
Note that to support namespaced keywords, you'd have to drop support for ns aliases if you want to keep it as short:
(defmacro as-map [& syms]
(zipmap (map keyword syms) (map (comp symbol name) syms)))
Usage:
(def a 42)
(def b :foo)
(as-map example/a foo-of/b)
;;-> {:example/a 42 :foo-of/b :foo}
Advice: Likely not a good idea, saves you a few keyboard hits at the cost of readability and expressivity and flexibility in naming local bindings.
This shows the steps. Remove the println's for actual use:
(ns clj.core
(:gen-class))
(defmacro hasher [& args]
(let [keywords (map keyword args)
values args
keyvals-list (interleave keywords values)
]
(println "keywords " keywords)
(println "values " values)
(println "keyvals-list " keyvals-list)
`(hash-map ~#keyvals-list)
)
)
(def a 1)
(def b 2)
(println \newline "result: " (hasher a b))
> lein run
keywords (:a :b)
values (a b)
keyvals-list (:a a :b b)
result: {:b 2, :a 1}
This is an old snippet of mine I've had kicking around for a while.
(declare ^:private restructure*)
(defn ^:private restructure-1 [m [e k]]
(cond
(= :strs e) (reduce #(assoc %1 (name %2) %2) m k)
(= :keys e) (reduce #(assoc %1 (keyword (namespace %2) (name %2)) %2) m k)
:else (assoc m k (restructure* e))))
(defn ^:private restructure* [form]
(if-not (map? form)
form
(as-> {} v
(reduce restructure-1 v form)
`(hash-map ~#(mapcat identity v)))))
(defmacro restructure [form]
(restructure* form))
The idea is that it provides the complement of clojure.core/destructure which goes from a destructuring form to bindings, this captures bindings and constructs a datastructure.
(let [x 1 y 2 z 3]
(restructure {:keys [x y z]}))
;; => {:x 1 :y 2 :z 3}

clojure find arbitrarily nested key

Is there an easy way in Clojure (maybe using specter) to filter collections depending on whether the an arbitrarily nested key with a known name contains an element ?
Ex. :
(def coll [{:res [{:a [{:thekey [
"the value I am looking for"
...
]
}
]}
{:res ...}
{:res ...}
]}])
Knowing that :a could have a different name, and that :thekey could be nested somewhere else.
Let's say I would like to do :
#(find-nested :thekey #{"the value I am looking for"} coll) ;; returns a vector containing the first element in coll (and maybe others)
use zippers.
in repl:
user> coll
[{:res [{:a [{:thekey ["the value I am looking for"]}]} {:res 1} {:res 1}]}]
user> (require '[clojure.zip :as z])
nil
user> (def cc (z/zipper coll? seq nil coll))
#'user/cc
user> (loop [x cc]
(if (= (z/node x) :thekey)
(z/node (z/next x))
(recur (z/next x))))
["the value I am looking for"]
update:
this version is flawed, since it doesn't care about :thekey being the key in a map, or just keyword in a vector, so it would give unneeded result for coll [[:thekey [1 2 3]]]. Here is an updated version:
(defn lookup-key [k coll]
(let [coll-zip (z/zipper coll? #(if (map? %) (vals %) %) nil coll)]
(loop [x coll-zip]
(when-not (z/end? x)
(if-let [v (-> x z/node k)] v (recur (z/next x)))))))
in repl:
user> (lookup-key :thekey coll)
["the value I am looking for"]
user> (lookup-key :absent coll)
nil
lets say we have the same keyword somewhere in a vector in a coll:
(def coll [{:res [:thekey
{:a [{:thekey ["the value I am looking for"]}]}
{:res 1} {:res 1}]}])
#'user/coll
user> (lookup-key :thekey coll)
["the value I am looking for"]
which is what we need.

changing nested map value without knowing keys

I need to change a value in a nested map where I don't know the values of keys in advance. I have come up with the following to do that.
;; input {String {String [String]}}
;; output {String {String String}}
(defn join-z
[x-to-y-to-z]
(zipmap (keys x-to-y-to-z)
(map (fn [y-to-z] (into {} (map (fn [[y z]] {y (clojure.string/join z)})
(seq y-to-z))))
(seq (vals x-to-y-to-z)))))
(def example
{"a" {"b" ["c" "d" "e"]}
"m" {"n" ["o" "p"]}})
;; (join-z example) => {"m" {"n" "op"}, "a" {"b" "cde"}}
This seems to be a hack. What is idiomatic clojure to do this? Or, is there something like Haskell's lens library to use?
UPDATE: based on user5187212 answer
(defn update-vals [f m0]
(reduce-kv (fn [m k v] (assoc m k (f v)))
{}
m0))
;; (update-vals clojure.string/join {"b" ["c" "d" "e"]}) => {"b" "cde"}
(defn join-z [x-to-y-to-z]
(update-vals (partial update-vals clojure.string/join) x-to-y-to-z))
;; (join-z example) => {"m" {"n" "op"}, "a" {"b" "cde"}}
This seems much more elegant. Thanks!
I would suggest reduce-kv.
For the last layer you can use something like:
(defn foo [x]
(reduce-kv
(fn [m k v]
(assoc m k (clojure.string/join v)))
{}
x))
then call it as many times as you need...
(reduce-kv
(fn [m k v]
(assoc m k (foo v)))
{}
example)
An other approach could be over all nested keys and then
(reduce
(fn [m ks]
(update-in m ks clojure.string/join))
example
all-nested-keys)
The short answer is yes, that is how you do it :)
I would go for something more like this:
(into {} (for [[k v] example]
[k (into {} (for [[k2 v2] v]
[k2 (string/join v2)]))]))
Which is pretty much the same thing.
There is a library called Specter
https://github.com/nathanmarz/specter
for queries and transformations:
(ns specter.core
(:require
[clojure.string :as string]
[com.rpl.specter :as s]))
(def example
{"a" {"b" ["c" "d" "e"]}
"m" {"n" ["o" "p"]}})
(s/transform
[s/ALL s/LAST s/ALL s/LAST]
string/join
example)
Which I think is a pretty neat way to express it.

Find Value of Specific Key in Nested Map

In Clojure, how can I find the value of a key that may be deep in a nested map structure? For example:
(def m {:a {:b "b"
:c "c"
:d {:e "e"
:f "f"}}})
(find-nested m :f)
=> "f"
Clojure offers tree-seq to do a depth-first traversal of any value. This will simplify the logic needed to find your nested key:
(defn find-nested
[m k]
(->> (tree-seq map? vals m)
(filter map?)
(some k)))
(find-nested {:a {:b {:c 1}, :d 2}} :c)
;; => 1
Also, finding all matches becomes a matter of replacing some with keep:
(defn find-all-nested
[m k]
(->> (tree-seq map? vals m)
(filter map?)
(keep k)))
(find-all-nested {:a {:b {:c 1}, :c 2}} :c)
;; => [2 1]
Note that maps with nil values might require some special treatment.
Update: If you look at the code above, you can see that k can actually be a function which offers a lot more possibilities:
to find a string key:
(find-nested m #(get % "k"))
to find multiple keys:
(find-nested m #(some % [:a :b]))
to find only positive values in maps of integers:
(find-nested m #(when (some-> % :k pos?) (:k %)))
If you know the nested path then use get-in.
=> (get-in m [:a :d :f])
=> "f"
See here for details: https://clojuredocs.org/clojure.core/get-in
If you don't know the path in your nested structure you could write a function that recurses through the nested map looking for the particular key in question and either returns its value when it finds the first one or returns all the values for :f in a seq.
If you know the "path", consider using get-in:
(get-in m [:a :d :f]) ; => "f"
If the "path" is unknown you can use something like next function:
(defn find-in [m k]
(if (map? m)
(let [v (m k)]
(->> m
vals
(map #(find-in % k)) ; Search in "child" maps
(cons v) ; Add result from current level
(filter (complement nil?))
first))))
(find-in m :f) ; "f"
(find-in m :d) ; {:e "e", :f "f"}
Note: given function will find only the first occurrence.
Here is a version that will find the key without knowing the path to it. If there are multiple matching keys, only one will be returned:
(defn find-key [m k]
(loop [m' m]
(when (seq m')
(if-let [v (get m' k)]
v
(recur (reduce merge
(map (fn [[_ v]]
(when (map? v) v))
m')))))))
If you require all values you can use:
(defn merge-map-vals [m]
(reduce (partial merge-with vector)
(map (fn [[_ v]]
(when (map? v) v))
m)))
(defn find-key [m k]
(flatten
(nfirst
(drop-while first
(iterate (fn [[m' acc]]
(if (seq m')
(if-let [v (get m' k)]
[(merge-map-vals m') (conj acc v)]
[(merge-map-vals m') acc])
[nil acc]))
[m []])))))