Clojure/dataset: group-by multiple columns hierarchically? - clojure

I would like to implement a function that can group-by for multiple columns hierarchically. I can illustrate my requirement by the following tentative implementation for two columns:
(defn group-by-two-columns-hierarchically
[col1 col2 table]
(let [data-by-col1 ($group-by col1 table)
data-further-by-col2 (into {} (for [[k v] data-by-col1] [k ($group-by col2 v)]))
]
data-further-by-col2
))
I'm seeking help how to generalize on arbitrary number of columns.
(I understand that Incanter supports group-by for multiple columns but it only provides a structure not hierarchy, a map of composite key of multiple columns to value of datasets.)
Thanks for your help!
Note: to make Michał's solution work for incanter dataset, only a slight modification is needed, replacing "group-by" by "incanter.core/$group-by", illustrated by the following experiment:
(defn group-by*
"Similar to group-by, but takes a collection of functions and returns
a hierarchically grouped result."
[fs coll]
(if-let [f (first fs)]
(into {} (map (fn [[k vs]]
[k (group-by* (next fs) vs)])
(incanter.core/$group-by f coll)))
coll))
(def table (incanter.core/dataset ["x1" "x2" "x3"]
[[1 2 3]
[1 2 30]
[4 5 6]
[4 5 60]
[7 8 9]
]))
(group-by* [:x1 :x2] table)
=>
{{:x1 1} {{:x2 2}
| x1 | x2 | x3 |
|----+----+----|
| 1 | 2 | 3 |
| 1 | 2 | 30 |
},
{:x1 4} {{:x2 5}
| x1 | x2 | x3 |
|----+----+----|
| 4 | 5 | 6 |
| 4 | 5 | 60 |
},
{:x1 7} {{:x2 8}
| x1 | x2 | x3 |
|----+----+----|
| 7 | 8 | 9 |
}}

(defn group-by*
"Similar to group-by, but takes a collection of functions and returns
a hierarchically grouped result."
[fs coll]
(if-let [f (first fs)]
(into {} (map (fn [[k vs]]
[k (group-by* (next fs) vs)])
(group-by f coll)))
coll))
Example:
user> (group-by* [:foo :bar :quux]
[{:foo 1 :bar 1 :quux 1 :asdf 1}
{:foo 1 :bar 1 :quux 2 :asdf 2}
{:foo 1 :bar 2 :quux 1 :asdf 3}
{:foo 1 :bar 2 :quux 2 :asdf 4}
{:foo 2 :bar 1 :quux 1 :asdf 5}
{:foo 2 :bar 1 :quux 2 :asdf 6}
{:foo 2 :bar 2 :quux 1 :asdf 7}
{:foo 2 :bar 2 :quux 2 :asdf 8}
{:foo 1 :bar 1 :quux 1 :asdf 9}
{:foo 1 :bar 1 :quux 2 :asdf 10}
{:foo 1 :bar 2 :quux 1 :asdf 11}
{:foo 1 :bar 2 :quux 2 :asdf 12}
{:foo 2 :bar 1 :quux 1 :asdf 13}
{:foo 2 :bar 1 :quux 2 :asdf 14}
{:foo 2 :bar 2 :quux 1 :asdf 15}
{:foo 2 :bar 2 :quux 2 :asdf 16}])
{1 {1 {1 [{:asdf 1, :bar 1, :foo 1, :quux 1}
{:asdf 9, :bar 1, :foo 1, :quux 1}],
2 [{:asdf 2, :bar 1, :foo 1, :quux 2}
{:asdf 10, :bar 1, :foo 1, :quux 2}]},
2 {1 [{:asdf 3, :bar 2, :foo 1, :quux 1}
{:asdf 11, :bar 2, :foo 1, :quux 1}],
2 [{:asdf 4, :bar 2, :foo 1, :quux 2}
{:asdf 12, :bar 2, :foo 1, :quux 2}]}},
2 {1 {1 [{:asdf 5, :bar 1, :foo 2, :quux 1}
{:asdf 13, :bar 1, :foo 2, :quux 1}],
2 [{:asdf 6, :bar 1, :foo 2, :quux 2}
{:asdf 14, :bar 1, :foo 2, :quux 2}]},
2 {1 [{:asdf 7, :bar 2, :foo 2, :quux 1}
{:asdf 15, :bar 2, :foo 2, :quux 1}],
2 [{:asdf 8, :bar 2, :foo 2, :quux 2}
{:asdf 16, :bar 2, :foo 2, :quux 2}]}}}

Related

In Clojure, how do I make a nested map return a map with an inner map all set to 0?

So basically how do I make a function given the input {:A 1 :B 2 :C {:X 5 :Y 5 :Z 5} :D 1} and the key :C
return {:A 1 :B 2 :C {:X 0 :Y 0 :Z 0} :D 1}? It's the same mapping but with the nested map all set to 0. Given that we know that the key :C has the nested values.
I'm very new to clojure and I'm struggling with loops and iterations so any help would be appreciated.
Thanks.
(defn with-zero-vals-at-key
[m k]
(update m k (fn [m2] (zipmap (keys m2) (repeat 0)))))
(with-zero-vals-at-key {:A 1 :B 2 :C {:X 5 :Y 5 :Z 5} :D 1} :C)
;; => {:A 1, :B 2, :C {:X 0, :Y 0, :Z 0}, :D 1}
;; OR
(defn with-zero-vals
[m]
(zipmap (keys m) (repeat 0)))
(update {:A 1 :B 2 :C {:X 5 :Y 5 :Z 5} :D 1}
:C
with-zero-vals)
;; => {:A 1, :B 2, :C {:X 0, :Y 0, :Z 0}, :D 1}

merge two lists by some map key

I'd like to merge two lists by some map key as follow:
(def list1 '({:a 2 :b 2} {:a 1 :b 1}))
(def list2 '({:a 1 :c 1} {:a 2 :c 2}))
As result I'd like something like, using sort by :a for example:
'({:a 1 :b 1 :c 1} {:a 2 :b 2 :c 2})
Any ideas?
You can use join and sort-by:
(:require '[clojure.set :as s])
(sort-by :a (s/join list1 list2 {:a :a}))
Does this do it?
(def list1 '({:a 1 :b 1} {:a 2 :b 2}))
(def list2 '({:a 1 :c 1} {:a 2 :c 2}))
(println
(map merge list1 list2)
)
;=> ({:a 1, :b 1, :c 1} {:a 2, :b 2, :c 2})
UPDATE
(def list1 [ {:a 1 :b 1} {:a 2 :b 2} ] )
(def list2 [ {:a 2 :c 2} {:a 1 :c 1} ] )
(defn sort-merge [lista listb]
(map merge (sort-by :a lista) (sort-by :a listb)))
(println
(sort-merge list1 list2))
;=> ({:a 1, :b 1, :c 1} {:a 2, :b 2, :c 2})
another way is to use list comprehension:
user> (for [x list1
y list2
:when (= (:a x) (:a y))]
(merge x y))
({:a 2, :b 2, :c 2} {:a 1, :b 1, :c 1})

Fill in missing values in a list of list of maps over a certain range in Clojure

If our input were to look something like this:
(({:a 1 :b 100} {:a 2 :b 300} {:a 4 :b 0}) ({:a 0 :b 10} {:a 4 :b 50}))
Our range that we would like to police over would be (0 1 2 3 4)
​
We would like the output to be:
(({:a 0 :b 0} {:a 1 :b 100} {:a 2 :b 300} {:a 3 :b 0} {:a 4 :b 0})
({:a 0 :b 10} {:a 1 :b 0} {:a 2 :b 0} {:a 3 :b 0} {:a 4 :b 50}))
Basically what it should do is look at the first list of maps then the second and so on and figure out what the range is for :a. We can easily do that with a min/max function. Now it creates a range and applies it to both lists. If an :a is missing on one list it adds in that :a with a :b of 0. (ie the addition of {:a 0 :b 0} or {:a 3 :b 0} in the first list. We have a function that can somewhat do it, but aren't quite there yet. Here it is:
(map
#(doseq [i (vec myRange)]
(if (some (fn [list] (= i list)) (map :a %))
nil
(println (conj % {:a i :b 0}))))
myList)
Obviously because of Clojures immutable data structures this function fails. If our input is something like:
(({:a 1, :b 1} {:a 2, :b 3} {:a 4, :b 5})
({:a 0, :b 3} {:a 4, :b 1}))
our output is:
(nil nil)
but if we println:
({:a 0, :b 0} {:a 1, :b 1} {:a 2, :b 3} {:a 4, :b 5})
({:a 3, :b 0} {:a 1, :b 1} {:a 2, :b 3} {:a 4, :b 5})
({:a 1, :b 0} {:a 0, :b 3} {:a 4, :b 1})
({:a 2, :b 0} {:a 0, :b 3} {:a 4, :b 1})
({:a 3, :b 0} {:a 0, :b 3} {:a 4, :b 1})
(nil nil)
We want the output to look like:
(({:a 0, :b 0} {:a 1, :b 1} {:a 2, :b 3} {:a 3, :b 0} {:a 4, :b 5})
({:a 0, :b 3} {:a 1, :b 0} {:a 2, :b 0} {:a 3, :b 0} {:a 4, :b 1}))
without the use of a println. Any suggestions?
the idea of working with immutable data in loop, is to pass the result of the latest iteration to the next one. You could do it with loop/recur, but in your case it is common to use reduce function (which is literally one of the cornerstones of functional programming):
(defn update-coll [range items]
(reduce (fn [items i] (if (some #(= (:a %) i) items)
items
(conj items {:a i :b 0})))
items range))
the first parameter to reduce "updates" items for every value of range (i), passing the updated value to the next iteration.
now you just have to map your input data with it:
(def input '(({:a 1 :b 100} {:a 2 :b 300} {:a 4 :b 0})
({:a 0 :b 10} {:a 4 :b 50})))
(map (comp (partial sort-by :a)
(partial update-coll [0 1 2 3 4]))
input)
output:
(({:a 0, :b 0} {:a 1, :b 100} {:a 2, :b 300}
{:a 3, :b 0} {:a 4, :b 0})
({:a 0, :b 10} {:a 1, :b 0} {:a 2, :b 0}
{:a 3, :b 0} {:a 4, :b 50}))
also you can do it without accumulation using clojure's sets:
(defn process-input [input r]
(let [r (map #(hash-map :a % :b 0) r)]
(map (fn [items] (into (apply sorted-set-by
#(compare (:a %1) (:a %2))
items)
r))
input)))
(process-input input [0 1 2 3 4])
output:
(#{{:b 0, :a 0} {:a 1, :b 100} {:a 2, :b 300}
{:b 0, :a 3} {:a 4, :b 0}}
#{{:a 0, :b 10} {:b 0, :a 1} {:b 0, :a 2}
{:b 0, :a 3} {:a 4, :b 50}})
My attempt:
(defn fill-in-missing [lists]
(let [[min max] (apply (juxt min max) (map :a (flatten lists)))]
(for [cur-list lists]
(for [i (range min (inc max))]
(merge {:a i :b 0}
(some #(when (= i (:a %)) %) cur-list))))))
To get the minimum and maximum values of :a I just collect every :a with map and flatten, then I use juxt so I can apply both the min and max functions to them at the same time.
Since we want two levels of nesting lists, I went for two for list comprehensions and tried to make an expression that would attempt to find the map in the input, or else return the default {:a i :b 0}.

How to search and replace in a Clojure script data structure?

I would like to have a search and replace on the values only inside data structures:
(def str [1 2 3
{:a 1
:b 2
1 3}])
and
(subst str 1 2)
to return
[2 2 3 {:a 2, :b 2, 1 3}]
Another example:
(def str2 {[1 2 3] x, {a 1 b 2} y} )
and
(subst str2 1 2)
to return
{[1 2 3] x, {a 1 b 2} y}
Since the 1's are keys in a map they are not replaced
One option is using of postwalk-replace:
user> (def foo [1 2 3
{:a 1
:b 2
1 3}])
;; => #'user/foo
user> (postwalk-replace {1 2} foo)
;; => [2 2 3 {2 3, :b 2, :a 2}]
Although, this method has a downside: it replaces all elements in a structure, not only values. This may be not what you want.
Maybe this will do the trick...
(defn my-replace [smap s]
(letfn [(trns [s]
(map (fn [x]
(if (coll? x)
(my-replace smap x)
(or (smap x) x)))
s))]
(if (map? s)
(zipmap (keys s) (trns (vals s)))
(trns s))))
Works with lists, vectors and maps:
user> (my-replace {1 2} foo)
;; => (2 2 3 {:a 2, :b 2, 1 3})
...Seems to work on arbitrary nested structures too:
user> (my-replace {1 2} [1 2 3 {:a [1 1 1] :b [3 2 1] 1 1}])
;; => (2 2 3 {:a (2 2 2), :b (3 2 2) 1 2})

Group(Partition) lazy sequence by key

I have a lazy sequence of maps :
{:a 1 :b "a"} {:a 1 :b "b"} {:a 2 :b "a"} {:a 3 :b "a"} {:a 3 :b "b"} ...
and I want to group it by key :a and return another lazy seq:
[{:a 1 :b "a"} {:a 1 :b "b"}] [{:a 2 :b "a"}] [{:a 3 :b "a"} {:a 3 :b "b"}] ...
What would be the best way to achieve this?
user=> (def a [{:a 1 :b "a"} {:a 1 :b "b"} {:a 2 :b "a"} {:a 3 :b "a"} {:a 3 :b "b"}])
#'user/a
user=> (group-by :a a)
{1 [{:a 1, :b "a"} {:a 1, :b "b"}], 2 [{:a 2, :b "a"}], 3 [{:a 3, :b "a"} {:a 3, :b "b"}]}
user=> (map second (group-by :a a))
([{:a 1, :b "a"} {:a 1, :b "b"}] [{:a 2, :b "a"}] [{:a 3, :b "a"} {:a 3, :b "b"}])